aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS16
-rw-r--r--drivers/infiniband/core/addr.c12
-rw-r--r--drivers/infiniband/core/cache.c724
-rw-r--r--drivers/infiniband/core/cm.c142
-rw-r--r--drivers/infiniband/core/cm_msgs.h7
-rw-r--r--drivers/infiniband/core/cma.c165
-rw-r--r--drivers/infiniband/core/device.c21
-rw-r--r--drivers/infiniband/core/mad.c99
-rw-r--r--drivers/infiniband/core/mad_priv.h7
-rw-r--r--drivers/infiniband/core/multicast.c40
-rw-r--r--drivers/infiniband/core/nldev.c16
-rw-r--r--drivers/infiniband/core/rdma_core.c347
-rw-r--r--drivers/infiniband/core/rdma_core.h34
-rw-r--r--drivers/infiniband/core/rw.c4
-rw-r--r--drivers/infiniband/core/sa_query.c138
-rw-r--r--drivers/infiniband/core/sysfs.c66
-rw-r--r--drivers/infiniband/core/umem.c62
-rw-r--r--drivers/infiniband/core/user_mad.c1
-rw-r--r--drivers/infiniband/core/uverbs.h18
-rw-r--r--drivers/infiniband/core/uverbs_cmd.c313
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c155
-rw-r--r--drivers/infiniband/core/uverbs_ioctl_merge.c10
-rw-r--r--drivers/infiniband/core/uverbs_main.c53
-rw-r--r--drivers/infiniband/core/uverbs_marshall.c2
-rw-r--r--drivers/infiniband/core/uverbs_std_types.c163
-rw-r--r--drivers/infiniband/core/uverbs_std_types_counters.c61
-rw-r--r--drivers/infiniband/core/uverbs_std_types_cq.c102
-rw-r--r--drivers/infiniband/core/uverbs_std_types_dm.c48
-rw-r--r--drivers/infiniband/core/uverbs_std_types_flow_action.c144
-rw-r--r--drivers/infiniband/core/uverbs_std_types_mr.c62
-rw-r--r--drivers/infiniband/core/verbs.c513
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c108
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.h3
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_sp.c4
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_cq.c64
-rw-r--r--drivers/infiniband/hw/cxgb3/iwch_provider.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cm.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/cq.c105
-rw-r--r--drivers/infiniband/hw/cxgb4/ev.c5
-rw-r--r--drivers/infiniband/hw/cxgb4/iw_cxgb4.h1
-rw-r--r--drivers/infiniband/hw/cxgb4/provider.c3
-rw-r--r--drivers/infiniband/hw/cxgb4/t4.h18
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c205
-rw-r--r--drivers/infiniband/hw/hfi1/chip.h30
-rw-r--r--drivers/infiniband/hw/hfi1/driver.c63
-rw-r--r--drivers/infiniband/hw/hfi1/file_ops.c10
-rw-r--r--drivers/infiniband/hw/hfi1/hfi.h243
-rw-r--r--drivers/infiniband/hw/hfi1/init.c44
-rw-r--r--drivers/infiniband/hw/hfi1/pcie.c19
-rw-r--r--drivers/infiniband/hw/hfi1/pio.c14
-rw-r--r--drivers/infiniband/hw/hfi1/qp.c6
-rw-r--r--drivers/infiniband/hw/hfi1/qp.h24
-rw-r--r--drivers/infiniband/hw/hfi1/rc.c6
-rw-r--r--drivers/infiniband/hw/hfi1/ruc.c14
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c10
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.c18
-rw-r--r--drivers/infiniband/hw/hfi1/vnic_main.c12
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_ah.c19
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_common.h9
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_device.h15
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.c406
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v1.h2
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.c336
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_hw_v2.h123
-rw-r--r--drivers/infiniband/hw/hns/hns_roce_main.c9
-rw-r--r--drivers/infiniband/hw/i40iw/Kconfig1
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_cm.c26
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_hw.c83
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_verbs.c5
-rw-r--r--drivers/infiniband/hw/mlx4/ah.c70
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c22
-rw-r--r--drivers/infiniband/hw/mlx4/main.c41
-rw-r--r--drivers/infiniband/hw/mlx4/mlx4_ib.h8
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c186
-rw-r--r--drivers/infiniband/hw/mlx5/Makefile1
-rw-r--r--drivers/infiniband/hw/mlx5/ah.c11
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c12
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h1
-rw-r--r--drivers/infiniband/hw/mlx5/cong.c9
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c1107
-rw-r--r--drivers/infiniband/hw/mlx5/main.c293
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h34
-rw-r--r--drivers/infiniband/hw/mlx5/mr.c32
-rw-r--r--drivers/infiniband/hw/mlx5/qp.c226
-rw-r--r--drivers/infiniband/hw/mthca/mthca_av.c5
-rw-r--r--drivers/infiniband/hw/mthca/mthca_provider.c5
-rw-r--r--drivers/infiniband/hw/nes/nes.h2
-rw-r--r--drivers/infiniband/hw/nes/nes_cm.c8
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c6
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c3
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_ah.c26
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_hw.c21
-rw-r--r--drivers/infiniband/hw/ocrdma/ocrdma_verbs.c22
-rw-r--r--drivers/infiniband/hw/qedr/qedr_roce_cm.c25
-rw-r--r--drivers/infiniband/hw/qedr/verbs.c30
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.c3
-rw-r--r--drivers/infiniband/hw/qib/qib_verbs.h3
-rw-r--r--drivers/infiniband/hw/usnic/usnic_ib_verbs.c2
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.c40
-rw-r--r--drivers/infiniband/hw/usnic/usnic_uiom.h5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma.h5
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c49
-rw-r--r--drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c6
-rw-r--r--drivers/infiniband/sw/rdmavt/ah.c4
-rw-r--r--drivers/infiniband/sw/rdmavt/qp.c11
-rw-r--r--drivers/infiniband/sw/rxe/rxe.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_av.c30
-rw-r--r--drivers/infiniband/sw/rxe/rxe_loc.h5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c67
-rw-r--r--drivers/infiniband/sw/rxe/rxe_param.h2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_qp.c31
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c15
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c5
-rw-r--r--drivers/infiniband/sw/rxe/rxe_verbs.c44
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h16
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c37
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_fs.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c9
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c26
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_multicast.c6
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_verbs.c8
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_vlan.c2
-rw-r--r--drivers/infiniband/ulp/iser/iscsi_iser.c16
-rw-r--r--drivers/infiniband/ulp/isert/ib_isert.c5
-rw-r--r--drivers/infiniband/ulp/srp/ib_srp.c5
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.c49
-rw-r--r--drivers/infiniband/ulp/srpt/ib_srpt.h4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/cmd.c5
-rw-r--r--drivers/nvme/target/rdma.c2
-rw-r--r--fs/cifs/smbdirect.c13
-rw-r--r--include/linux/idr.h11
-rw-r--r--include/linux/mlx5/mlx5_ifc.h4
-rw-r--r--include/rdma/ib_addr.h2
-rw-r--r--include/rdma/ib_cache.h58
-rw-r--r--include/rdma/ib_cm.h3
-rw-r--r--include/rdma/ib_mad.h33
-rw-r--r--include/rdma/ib_sa.h49
-rw-r--r--include/rdma/ib_verbs.h179
-rw-r--r--include/rdma/opa_addr.h2
-rw-r--r--include/rdma/rdmavt_qp.h30
-rw-r--r--include/rdma/uverbs_ioctl.h307
-rw-r--r--include/rdma/uverbs_named_ioctl.h106
-rw-r--r--include/rdma/uverbs_std_types.h46
-rw-r--r--include/rdma/uverbs_types.h24
-rw-r--r--include/uapi/rdma/cxgb4-abi.h12
-rw-r--r--include/uapi/rdma/ib_user_ioctl_cmds.h7
-rw-r--r--include/uapi/rdma/ib_user_ioctl_verbs.h58
-rw-r--r--include/uapi/rdma/ib_user_verbs.h5
-rw-r--r--include/uapi/rdma/mlx5-abi.h6
-rw-r--r--include/uapi/rdma/mlx5_user_ioctl_cmds.h73
-rw-r--r--net/core/secure_seq.c1
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/smc/smc_core.c20
-rw-r--r--net/smc/smc_ib.c25
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
157 files changed, 6026 insertions, 3412 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 9d5eeff51b5f..1e53fe99eb63 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3506,7 +3506,6 @@ F: drivers/net/ethernet/cisco/enic/
CISCO VIC LOW LATENCY NIC DRIVER
M: Christian Benvenuti <[email protected]>
-M: Dave Goodell <[email protected]>
S: Supported
F: drivers/infiniband/hw/usnic/
@@ -7562,9 +7561,8 @@ S: Maintained
F: drivers/firmware/iscsi_ibft*
ISCSI EXTENSIONS FOR RDMA (ISER) INITIATOR
-M: Or Gerlitz <[email protected]>
M: Sagi Grimberg <[email protected]>
-M: Roi Dayan <[email protected]>
+M: Max Gurtovoy <[email protected]>
S: Supported
W: http://www.openfabrics.org
@@ -12621,15 +12619,21 @@ S: Maintained
F: drivers/scsi/sr*
SCSI RDMA PROTOCOL (SRP) INITIATOR
-M: Bart Van Assche <[email protected]>
+M: Bart Van Assche <[email protected]>
S: Supported
-W: http://www.openfabrics.org
Q: http://patchwork.kernel.org/project/linux-rdma/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/dad/srp-initiator.git
F: drivers/infiniband/ulp/srp/
F: include/scsi/srp.h
+SCSI RDMA PROTOCOL (SRP) TARGET
+M: Bart Van Assche <[email protected]>
+S: Supported
+Q: http://patchwork.kernel.org/project/linux-rdma/list/
+F: drivers/infiniband/ulp/srpt/
+
SCSI SG DRIVER
M: Doug Gilbert <[email protected]>
diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 4f32c4062fb6..1b817fdb97a4 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -315,19 +315,17 @@ static int dst_fetch_ha(const struct dst_entry *dst,
int ret = 0;
n = dst_neigh_lookup(dst, daddr);
+ if (!n)
+ return -ENODATA;
- rcu_read_lock();
- if (!n || !(n->nud_state & NUD_VALID)) {
- if (n)
- neigh_event_send(n, NULL);
+ if (!(n->nud_state & NUD_VALID)) {
+ neigh_event_send(n, NULL);
ret = -ENODATA;
} else {
rdma_copy_addr(dev_addr, dst->dev, n->ha);
}
- rcu_read_unlock();
- if (n)
- neigh_release(n);
+ neigh_release(n);
return ret;
}
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 81d66f56e38f..0bee1f4b914e 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -66,20 +66,28 @@ enum gid_attr_find_mask {
GID_ATTR_FIND_MASK_GID_TYPE = 1UL << 3,
};
-enum gid_table_entry_props {
- GID_TABLE_ENTRY_INVALID = 1UL << 0,
- GID_TABLE_ENTRY_DEFAULT = 1UL << 1,
+enum gid_table_entry_state {
+ GID_TABLE_ENTRY_INVALID = 1,
+ GID_TABLE_ENTRY_VALID = 2,
+ /*
+ * Indicates that entry is pending to be removed, there may
+ * be active users of this GID entry.
+ * When last user of the GID entry releases reference to it,
+ * GID entry is detached from the table.
+ */
+ GID_TABLE_ENTRY_PENDING_DEL = 3,
};
struct ib_gid_table_entry {
- unsigned long props;
- union ib_gid gid;
- struct ib_gid_attr attr;
- void *context;
+ struct kref kref;
+ struct work_struct del_work;
+ struct ib_gid_attr attr;
+ void *context;
+ enum gid_table_entry_state state;
};
struct ib_gid_table {
- int sz;
+ int sz;
/* In RoCE, adding a GID to the table requires:
* (a) Find if this GID is already exists.
* (b) Find a free space.
@@ -91,13 +99,16 @@ struct ib_gid_table {
*
**/
/* Any writer to data_vec must hold this lock and the write side of
- * rwlock. readers must hold only rwlock. All writers must be in a
+ * rwlock. Readers must hold only rwlock. All writers must be in a
* sleepable context.
*/
- struct mutex lock;
- /* rwlock protects data_vec[ix]->props. */
- rwlock_t rwlock;
- struct ib_gid_table_entry *data_vec;
+ struct mutex lock;
+ /* rwlock protects data_vec[ix]->state and entry pointer.
+ */
+ rwlock_t rwlock;
+ struct ib_gid_table_entry **data_vec;
+ /* bit field, each bit indicates the index of default GID */
+ u32 default_gid_indices;
};
static void dispatch_gid_change_event(struct ib_device *ib_dev, u8 port)
@@ -135,6 +146,19 @@ bool rdma_is_zero_gid(const union ib_gid *gid)
}
EXPORT_SYMBOL(rdma_is_zero_gid);
+/** is_gid_index_default - Check if a given index belongs to
+ * reserved default GIDs or not.
+ * @table: GID table pointer
+ * @index: Index to check in GID table
+ * Returns true if index is one of the reserved default GID index otherwise
+ * returns false.
+ */
+static bool is_gid_index_default(const struct ib_gid_table *table,
+ unsigned int index)
+{
+ return index < 32 && (BIT(index) & table->default_gid_indices);
+}
+
int ib_cache_gid_parse_type_str(const char *buf)
{
unsigned int i;
@@ -164,26 +188,136 @@ static struct ib_gid_table *rdma_gid_table(struct ib_device *device, u8 port)
return device->cache.ports[port - rdma_start_port(device)].gid;
}
-static void del_roce_gid(struct ib_device *device, u8 port_num,
- struct ib_gid_table *table, int ix)
+static bool is_gid_entry_free(const struct ib_gid_table_entry *entry)
+{
+ return !entry;
+}
+
+static bool is_gid_entry_valid(const struct ib_gid_table_entry *entry)
+{
+ return entry && entry->state == GID_TABLE_ENTRY_VALID;
+}
+
+static void schedule_free_gid(struct kref *kref)
{
+ struct ib_gid_table_entry *entry =
+ container_of(kref, struct ib_gid_table_entry, kref);
+
+ queue_work(ib_wq, &entry->del_work);
+}
+
+static void free_gid_entry_locked(struct ib_gid_table_entry *entry)
+{
+ struct ib_device *device = entry->attr.device;
+ u8 port_num = entry->attr.port_num;
+ struct ib_gid_table *table = rdma_gid_table(device, port_num);
+
pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
- device->name, port_num, ix,
- table->data_vec[ix].gid.raw);
+ device->name, port_num, entry->attr.index,
+ entry->attr.gid.raw);
+
+ if (rdma_cap_roce_gid_table(device, port_num) &&
+ entry->state != GID_TABLE_ENTRY_INVALID)
+ device->del_gid(&entry->attr, &entry->context);
+
+ write_lock_irq(&table->rwlock);
- if (rdma_cap_roce_gid_table(device, port_num))
- device->del_gid(&table->data_vec[ix].attr,
- &table->data_vec[ix].context);
- dev_put(table->data_vec[ix].attr.ndev);
+ /*
+ * The only way to avoid overwriting NULL in table is
+ * by comparing if it is same entry in table or not!
+ * If new entry in table is added by the time we free here,
+ * don't overwrite the table entry.
+ */
+ if (entry == table->data_vec[entry->attr.index])
+ table->data_vec[entry->attr.index] = NULL;
+ /* Now this index is ready to be allocated */
+ write_unlock_irq(&table->rwlock);
+
+ if (entry->attr.ndev)
+ dev_put(entry->attr.ndev);
+ kfree(entry);
}
-static int add_roce_gid(struct ib_gid_table *table,
- const union ib_gid *gid,
- const struct ib_gid_attr *attr)
+static void free_gid_entry(struct kref *kref)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(kref, struct ib_gid_table_entry, kref);
+
+ free_gid_entry_locked(entry);
+}
+
+/**
+ * free_gid_work - Release reference to the GID entry
+ * @work: Work structure to refer to GID entry which needs to be
+ * deleted.
+ *
+ * free_gid_work() frees the entry from the HCA's hardware table
+ * if provider supports it. It releases reference to netdevice.
+ */
+static void free_gid_work(struct work_struct *work)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(work, struct ib_gid_table_entry, del_work);
+ struct ib_device *device = entry->attr.device;
+ u8 port_num = entry->attr.port_num;
+ struct ib_gid_table *table = rdma_gid_table(device, port_num);
+
+ mutex_lock(&table->lock);
+ free_gid_entry_locked(entry);
+ mutex_unlock(&table->lock);
+}
+
+static struct ib_gid_table_entry *
+alloc_gid_entry(const struct ib_gid_attr *attr)
{
struct ib_gid_table_entry *entry;
- int ix = attr->index;
- int ret = 0;
+
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return NULL;
+ kref_init(&entry->kref);
+ memcpy(&entry->attr, attr, sizeof(*attr));
+ if (entry->attr.ndev)
+ dev_hold(entry->attr.ndev);
+ INIT_WORK(&entry->del_work, free_gid_work);
+ entry->state = GID_TABLE_ENTRY_INVALID;
+ return entry;
+}
+
+static void store_gid_entry(struct ib_gid_table *table,
+ struct ib_gid_table_entry *entry)
+{
+ entry->state = GID_TABLE_ENTRY_VALID;
+
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ entry->attr.device->name, entry->attr.port_num,
+ entry->attr.index, entry->attr.gid.raw);
+
+ lockdep_assert_held(&table->lock);
+ write_lock_irq(&table->rwlock);
+ table->data_vec[entry->attr.index] = entry;
+ write_unlock_irq(&table->rwlock);
+}
+
+static void get_gid_entry(struct ib_gid_table_entry *entry)
+{
+ kref_get(&entry->kref);
+}
+
+static void put_gid_entry(struct ib_gid_table_entry *entry)
+{
+ kref_put(&entry->kref, schedule_free_gid);
+}
+
+static void put_gid_entry_locked(struct ib_gid_table_entry *entry)
+{
+ kref_put(&entry->kref, free_gid_entry);
+}
+
+static int add_roce_gid(struct ib_gid_table_entry *entry)
+{
+ const struct ib_gid_attr *attr = &entry->attr;
+ int ret;
if (!attr->ndev) {
pr_err("%s NULL netdev device=%s port=%d index=%d\n",
@@ -191,38 +325,22 @@ static int add_roce_gid(struct ib_gid_table *table,
attr->index);
return -EINVAL;
}
-
- entry = &table->data_vec[ix];
- if ((entry->props & GID_TABLE_ENTRY_INVALID) == 0) {
- WARN(1, "GID table corruption device=%s port=%d index=%d\n",
- attr->device->name, attr->port_num,
- attr->index);
- return -EINVAL;
- }
-
if (rdma_cap_roce_gid_table(attr->device, attr->port_num)) {
- ret = attr->device->add_gid(gid, attr, &entry->context);
+ ret = attr->device->add_gid(attr, &entry->context);
if (ret) {
pr_err("%s GID add failed device=%s port=%d index=%d\n",
__func__, attr->device->name, attr->port_num,
attr->index);
- goto add_err;
+ return ret;
}
}
- dev_hold(attr->ndev);
-
-add_err:
- if (!ret)
- pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
- attr->device->name, attr->port_num, ix, gid->raw);
- return ret;
+ return 0;
}
/**
* add_modify_gid - Add or modify GID table entry
*
* @table: GID table in which GID to be added or modified
- * @gid: GID content
* @attr: Attributes of the GID
*
* Returns 0 on success or appropriate error code. It accepts zero
@@ -230,34 +348,42 @@ add_err:
* GID. However such zero GIDs are not added to the cache.
*/
static int add_modify_gid(struct ib_gid_table *table,
- const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
- int ret;
+ struct ib_gid_table_entry *entry;
+ int ret = 0;
+
+ /*
+ * Invalidate any old entry in the table to make it safe to write to
+ * this index.
+ */
+ if (is_gid_entry_valid(table->data_vec[attr->index]))
+ put_gid_entry(table->data_vec[attr->index]);
+
+ /*
+ * Some HCA's report multiple GID entries with only one valid GID, and
+ * leave other unused entries as the zero GID. Convert zero GIDs to
+ * empty table entries instead of storing them.
+ */
+ if (rdma_is_zero_gid(&attr->gid))
+ return 0;
+
+ entry = alloc_gid_entry(attr);
+ if (!entry)
+ return -ENOMEM;
if (rdma_protocol_roce(attr->device, attr->port_num)) {
- ret = add_roce_gid(table, gid, attr);
+ ret = add_roce_gid(entry);
if (ret)
- return ret;
- } else {
- /*
- * Some HCA's report multiple GID entries with only one
- * valid GID, but remaining as zero GID.
- * So ignore such behavior for IB link layer and don't
- * fail the call, but don't add such entry to GID cache.
- */
- if (rdma_is_zero_gid(gid))
- return 0;
+ goto done;
}
- lockdep_assert_held(&table->lock);
- memcpy(&table->data_vec[attr->index].gid, gid, sizeof(*gid));
- memcpy(&table->data_vec[attr->index].attr, attr, sizeof(*attr));
-
- write_lock_irq(&table->rwlock);
- table->data_vec[attr->index].props &= ~GID_TABLE_ENTRY_INVALID;
- write_unlock_irq(&table->rwlock);
+ store_gid_entry(table, entry);
return 0;
+
+done:
+ put_gid_entry(entry);
+ return ret;
}
/**
@@ -272,16 +398,25 @@ static int add_modify_gid(struct ib_gid_table *table,
static void del_gid(struct ib_device *ib_dev, u8 port,
struct ib_gid_table *table, int ix)
{
+ struct ib_gid_table_entry *entry;
+
lockdep_assert_held(&table->lock);
+
+ pr_debug("%s device=%s port=%d index=%d gid %pI6\n", __func__,
+ ib_dev->name, port, ix,
+ table->data_vec[ix]->attr.gid.raw);
+
write_lock_irq(&table->rwlock);
- table->data_vec[ix].props |= GID_TABLE_ENTRY_INVALID;
+ entry = table->data_vec[ix];
+ entry->state = GID_TABLE_ENTRY_PENDING_DEL;
+ /*
+ * For non RoCE protocol, GID entry slot is ready to use.
+ */
+ if (!rdma_protocol_roce(ib_dev, port))
+ table->data_vec[ix] = NULL;
write_unlock_irq(&table->rwlock);
- if (rdma_protocol_roce(ib_dev, port))
- del_roce_gid(ib_dev, port, table, ix);
- memset(&table->data_vec[ix].gid, 0, sizeof(table->data_vec[ix].gid));
- memset(&table->data_vec[ix].attr, 0, sizeof(table->data_vec[ix].attr));
- table->data_vec[ix].context = NULL;
+ put_gid_entry_locked(entry);
}
/* rwlock should be read locked, or lock should be held */
@@ -294,8 +429,8 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
int empty = pempty ? -1 : 0;
while (i < table->sz && (found < 0 || empty < 0)) {
- struct ib_gid_table_entry *data = &table->data_vec[i];
- struct ib_gid_attr *attr = &data->attr;
+ struct ib_gid_table_entry *data = table->data_vec[i];
+ struct ib_gid_attr *attr;
int curr_index = i;
i++;
@@ -306,9 +441,9 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
* so lookup free slot only if requested.
*/
if (pempty && empty < 0) {
- if (data->props & GID_TABLE_ENTRY_INVALID &&
- (default_gid ==
- !!(data->props & GID_TABLE_ENTRY_DEFAULT))) {
+ if (is_gid_entry_free(data) &&
+ default_gid ==
+ is_gid_index_default(table, curr_index)) {
/*
* Found an invalid (free) entry; allocate it.
* If default GID is requested, then our
@@ -323,22 +458,23 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
/*
* Additionally find_gid() is used to find valid entry during
- * lookup operation, where validity needs to be checked. So
- * find the empty entry first to continue to search for a free
- * slot and ignore its INVALID flag.
+ * lookup operation; so ignore the entries which are marked as
+ * pending for removal and the entries which are marked as
+ * invalid.
*/
- if (data->props & GID_TABLE_ENTRY_INVALID)
+ if (!is_gid_entry_valid(data))
continue;
if (found >= 0)
continue;
+ attr = &data->attr;
if (mask & GID_ATTR_FIND_MASK_GID_TYPE &&
attr->gid_type != val->gid_type)
continue;
if (mask & GID_ATTR_FIND_MASK_GID &&
- memcmp(gid, &data->gid, sizeof(*gid)))
+ memcmp(gid, &data->attr.gid, sizeof(*gid)))
continue;
if (mask & GID_ATTR_FIND_MASK_NETDEV &&
@@ -346,8 +482,7 @@ static int find_gid(struct ib_gid_table *table, const union ib_gid *gid,
continue;
if (mask & GID_ATTR_FIND_MASK_DEFAULT &&
- !!(data->props & GID_TABLE_ENTRY_DEFAULT) !=
- default_gid)
+ is_gid_index_default(table, curr_index) != default_gid)
continue;
found = curr_index;
@@ -396,7 +531,8 @@ static int __ib_cache_gid_add(struct ib_device *ib_dev, u8 port,
attr->device = ib_dev;
attr->index = empty;
attr->port_num = port;
- ret = add_modify_gid(table, gid, attr);
+ attr->gid = *gid;
+ ret = add_modify_gid(table, attr);
if (!ret)
dispatch_gid_change_event(ib_dev, port);
@@ -492,7 +628,8 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
mutex_lock(&table->lock);
for (ix = 0; ix < table->sz; ix++) {
- if (table->data_vec[ix].attr.ndev == ndev) {
+ if (is_gid_entry_valid(table->data_vec[ix]) &&
+ table->data_vec[ix]->attr.ndev == ndev) {
del_gid(ib_dev, port, table, ix);
deleted = true;
}
@@ -506,103 +643,37 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
return 0;
}
-static int __ib_cache_gid_get(struct ib_device *ib_dev, u8 port, int index,
- union ib_gid *gid, struct ib_gid_attr *attr)
-{
- struct ib_gid_table *table;
-
- table = rdma_gid_table(ib_dev, port);
-
- if (index < 0 || index >= table->sz)
- return -EINVAL;
-
- if (table->data_vec[index].props & GID_TABLE_ENTRY_INVALID)
- return -EINVAL;
-
- memcpy(gid, &table->data_vec[index].gid, sizeof(*gid));
- if (attr) {
- memcpy(attr, &table->data_vec[index].attr, sizeof(*attr));
- if (attr->ndev)
- dev_hold(attr->ndev);
- }
-
- return 0;
-}
-
-static int _ib_cache_gid_table_find(struct ib_device *ib_dev,
- const union ib_gid *gid,
- const struct ib_gid_attr *val,
- unsigned long mask,
- u8 *port, u16 *index)
-{
- struct ib_gid_table *table;
- u8 p;
- int local_index;
- unsigned long flags;
-
- for (p = 0; p < ib_dev->phys_port_cnt; p++) {
- table = ib_dev->cache.ports[p].gid;
- read_lock_irqsave(&table->rwlock, flags);
- local_index = find_gid(table, gid, val, false, mask, NULL);
- if (local_index >= 0) {
- if (index)
- *index = local_index;
- if (port)
- *port = p + rdma_start_port(ib_dev);
- read_unlock_irqrestore(&table->rwlock, flags);
- return 0;
- }
- read_unlock_irqrestore(&table->rwlock, flags);
- }
-
- return -ENOENT;
-}
-
-static int ib_cache_gid_find(struct ib_device *ib_dev,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- struct net_device *ndev, u8 *port,
- u16 *index)
-{
- unsigned long mask = GID_ATTR_FIND_MASK_GID |
- GID_ATTR_FIND_MASK_GID_TYPE;
- struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
-
- if (ndev)
- mask |= GID_ATTR_FIND_MASK_NETDEV;
-
- return _ib_cache_gid_table_find(ib_dev, gid, &gid_attr_val,
- mask, port, index);
-}
-
/**
- * ib_find_cached_gid_by_port - Returns the GID table index where a specified
- * GID value occurs. It searches for the specified GID value in the local
- * software cache.
+ * rdma_find_gid_by_port - Returns the GID entry attributes when it finds
+ * a valid GID entry for given search parameters. It searches for the specified
+ * GID value in the local software cache.
* @device: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
* @port_num: The port number of the device where the GID value should be
* searched.
- * @ndev: In RoCE, the net device of the device. Null means ignore.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
+ * @ndev: In RoCE, the net device of the device. NULL means ignore.
+ *
+ * Returns sgid attributes if the GID is found with valid reference or
+ * returns ERR_PTR for the error.
+ * The caller must invoke rdma_put_gid_attr() to release the reference.
*/
-int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- u8 port, struct net_device *ndev,
- u16 *index)
+const struct ib_gid_attr *
+rdma_find_gid_by_port(struct ib_device *ib_dev,
+ const union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ u8 port, struct net_device *ndev)
{
int local_index;
struct ib_gid_table *table;
unsigned long mask = GID_ATTR_FIND_MASK_GID |
GID_ATTR_FIND_MASK_GID_TYPE;
struct ib_gid_attr val = {.ndev = ndev, .gid_type = gid_type};
+ const struct ib_gid_attr *attr;
unsigned long flags;
if (!rdma_is_port_valid(ib_dev, port))
- return -ENOENT;
+ return ERR_PTR(-ENOENT);
table = rdma_gid_table(ib_dev, port);
@@ -612,89 +683,73 @@ int ib_find_cached_gid_by_port(struct ib_device *ib_dev,
read_lock_irqsave(&table->rwlock, flags);
local_index = find_gid(table, gid, &val, false, mask, NULL);
if (local_index >= 0) {
- if (index)
- *index = local_index;
+ get_gid_entry(table->data_vec[local_index]);
+ attr = &table->data_vec[local_index]->attr;
read_unlock_irqrestore(&table->rwlock, flags);
- return 0;
+ return attr;
}
read_unlock_irqrestore(&table->rwlock, flags);
- return -ENOENT;
+ return ERR_PTR(-ENOENT);
}
-EXPORT_SYMBOL(ib_find_cached_gid_by_port);
+EXPORT_SYMBOL(rdma_find_gid_by_port);
/**
- * ib_cache_gid_find_by_filter - Returns the GID table index where a specified
- * GID value occurs
+ * rdma_find_gid_by_filter - Returns the GID table attribute where a
+ * specified GID value occurs
* @device: The device to query.
* @gid: The GID value to search for.
- * @port_num: The port number of the device where the GID value could be
+ * @port: The port number of the device where the GID value could be
* searched.
* @filter: The filter function is executed on any matching GID in the table.
* If the filter function returns true, the corresponding index is returned,
* otherwise, we continue searching the GID table. It's guaranteed that
* while filter is executed, ndev field is valid and the structure won't
* change. filter is executed in an atomic context. filter must not be NULL.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
*
- * ib_cache_gid_find_by_filter() searches for the specified GID value
+ * rdma_find_gid_by_filter() searches for the specified GID value
* of which the filter function returns true in the port's GID table.
- * This function is only supported on RoCE ports.
*
*/
-static int ib_cache_gid_find_by_filter(struct ib_device *ib_dev,
- const union ib_gid *gid,
- u8 port,
- bool (*filter)(const union ib_gid *,
- const struct ib_gid_attr *,
- void *),
- void *context,
- u16 *index)
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+ struct ib_device *ib_dev, const union ib_gid *gid, u8 port,
+ bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+ void *),
+ void *context)
{
+ const struct ib_gid_attr *res = ERR_PTR(-ENOENT);
struct ib_gid_table *table;
- unsigned int i;
unsigned long flags;
- bool found = false;
-
+ unsigned int i;
- if (!rdma_is_port_valid(ib_dev, port) ||
- !rdma_protocol_roce(ib_dev, port))
- return -EPROTONOSUPPORT;
+ if (!rdma_is_port_valid(ib_dev, port))
+ return ERR_PTR(-EINVAL);
table = rdma_gid_table(ib_dev, port);
read_lock_irqsave(&table->rwlock, flags);
for (i = 0; i < table->sz; i++) {
- struct ib_gid_attr attr;
+ struct ib_gid_table_entry *entry = table->data_vec[i];
- if (table->data_vec[i].props & GID_TABLE_ENTRY_INVALID)
+ if (!is_gid_entry_valid(entry))
continue;
- if (memcmp(gid, &table->data_vec[i].gid, sizeof(*gid)))
+ if (memcmp(gid, &entry->attr.gid, sizeof(*gid)))
continue;
- memcpy(&attr, &table->data_vec[i].attr, sizeof(attr));
-
- if (filter(gid, &attr, context)) {
- found = true;
- if (index)
- *index = i;
+ if (filter(gid, &entry->attr, context)) {
+ get_gid_entry(entry);
+ res = &entry->attr;
break;
}
}
read_unlock_irqrestore(&table->rwlock, flags);
-
- if (!found)
- return -ENOENT;
- return 0;
+ return res;
}
static struct ib_gid_table *alloc_gid_table(int sz)
{
- struct ib_gid_table *table =
- kzalloc(sizeof(struct ib_gid_table), GFP_KERNEL);
- int i;
+ struct ib_gid_table *table = kzalloc(sizeof(*table), GFP_KERNEL);
if (!table)
return NULL;
@@ -707,12 +762,6 @@ static struct ib_gid_table *alloc_gid_table(int sz)
table->sz = sz;
rwlock_init(&table->rwlock);
-
- /* Mark all entries as invalid so that allocator can allocate
- * one of the invalid (free) entry.
- */
- for (i = 0; i < sz; i++)
- table->data_vec[i].props |= GID_TABLE_ENTRY_INVALID;
return table;
err_free_table:
@@ -720,12 +769,30 @@ err_free_table:
return NULL;
}
-static void release_gid_table(struct ib_gid_table *table)
+static void release_gid_table(struct ib_device *device, u8 port,
+ struct ib_gid_table *table)
{
- if (table) {
- kfree(table->data_vec);
- kfree(table);
+ bool leak = false;
+ int i;
+
+ if (!table)
+ return;
+
+ for (i = 0; i < table->sz; i++) {
+ if (is_gid_entry_free(table->data_vec[i]))
+ continue;
+ if (kref_read(&table->data_vec[i]->kref) > 1) {
+ pr_err("GID entry ref leak for %s (index %d) ref=%d\n",
+ device->name, i,
+ kref_read(&table->data_vec[i]->kref));
+ leak = true;
+ }
}
+ if (leak)
+ return;
+
+ kfree(table->data_vec);
+ kfree(table);
}
static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
@@ -739,7 +806,7 @@ static void cleanup_gid_table_port(struct ib_device *ib_dev, u8 port,
mutex_lock(&table->lock);
for (i = 0; i < table->sz; ++i) {
- if (!rdma_is_zero_gid(&table->data_vec[i].gid)) {
+ if (is_gid_entry_valid(table->data_vec[i])) {
del_gid(ib_dev, port, table, i);
deleted = true;
}
@@ -757,12 +824,9 @@ void ib_cache_gid_set_default_gid(struct ib_device *ib_dev, u8 port,
{
union ib_gid gid = { };
struct ib_gid_attr gid_attr;
- struct ib_gid_table *table;
unsigned int gid_type;
unsigned long mask;
- table = rdma_gid_table(ib_dev, port);
-
mask = GID_ATTR_FIND_MASK_GID_TYPE |
GID_ATTR_FIND_MASK_DEFAULT |
GID_ATTR_FIND_MASK_NETDEV;
@@ -792,19 +856,12 @@ static void gid_table_reserve_default(struct ib_device *ib_dev, u8 port,
unsigned int i;
unsigned long roce_gid_type_mask;
unsigned int num_default_gids;
- unsigned int current_gid = 0;
roce_gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
num_default_gids = hweight_long(roce_gid_type_mask);
- for (i = 0; i < num_default_gids && i < table->sz; i++) {
- struct ib_gid_table_entry *entry = &table->data_vec[i];
-
- entry->props |= GID_TABLE_ENTRY_DEFAULT;
- current_gid = find_next_bit(&roce_gid_type_mask,
- BITS_PER_LONG,
- current_gid);
- entry->attr.gid_type = current_gid++;
- }
+ /* Reserve starting indices for default GIDs */
+ for (i = 0; i < num_default_gids && i < table->sz; i++)
+ table->default_gid_indices |= BIT(i);
}
@@ -815,7 +872,7 @@ static void gid_table_release_one(struct ib_device *ib_dev)
for (port = 0; port < ib_dev->phys_port_cnt; port++) {
table = ib_dev->cache.ports[port].gid;
- release_gid_table(table);
+ release_gid_table(ib_dev, port, table);
ib_dev->cache.ports[port].gid = NULL;
}
}
@@ -869,69 +926,94 @@ static int gid_table_setup_one(struct ib_device *ib_dev)
return err;
}
-int ib_get_cached_gid(struct ib_device *device,
- u8 port_num,
- int index,
- union ib_gid *gid,
- struct ib_gid_attr *gid_attr)
+/**
+ * rdma_query_gid - Read the GID content from the GID software cache
+ * @device: Device to query the GID
+ * @port_num: Port number of the device
+ * @index: Index of the GID table entry to read
+ * @gid: Pointer to GID where to store the entry's GID
+ *
+ * rdma_query_gid() only reads the GID entry content for requested device,
+ * port and index. It reads for IB, RoCE and iWarp link layers. It doesn't
+ * hold any reference to the GID table entry in the HCA or software cache.
+ *
+ * Returns 0 on success or appropriate error code.
+ *
+ */
+int rdma_query_gid(struct ib_device *device, u8 port_num,
+ int index, union ib_gid *gid)
{
- int res;
- unsigned long flags;
struct ib_gid_table *table;
+ unsigned long flags;
+ int res = -EINVAL;
if (!rdma_is_port_valid(device, port_num))
return -EINVAL;
table = rdma_gid_table(device, port_num);
read_lock_irqsave(&table->rwlock, flags);
- res = __ib_cache_gid_get(device, port_num, index, gid, gid_attr);
- read_unlock_irqrestore(&table->rwlock, flags);
+ if (index < 0 || index >= table->sz ||
+ !is_gid_entry_valid(table->data_vec[index]))
+ goto done;
+
+ memcpy(gid, &table->data_vec[index]->attr.gid, sizeof(*gid));
+ res = 0;
+
+done:
+ read_unlock_irqrestore(&table->rwlock, flags);
return res;
}
-EXPORT_SYMBOL(ib_get_cached_gid);
+EXPORT_SYMBOL(rdma_query_gid);
/**
- * ib_find_cached_gid - Returns the port number and GID table index where
- * a specified GID value occurs.
+ * rdma_find_gid - Returns SGID attributes if the matching GID is found.
* @device: The device to query.
* @gid: The GID value to search for.
* @gid_type: The GID type to search for.
* @ndev: In RoCE, the net device of the device. NULL means ignore.
- * @port_num: The port number of the device where the GID value was found.
- * @index: The index into the cached GID table where the GID was found. This
- * parameter may be NULL.
*
- * ib_find_cached_gid() searches for the specified GID value in
- * the local software cache.
+ * rdma_find_gid() searches for the specified GID value in the software cache.
+ *
+ * Returns GID attributes if a valid GID is found or returns ERR_PTR for the
+ * error. The caller must invoke rdma_put_gid_attr() to release the reference.
+ *
*/
-int ib_find_cached_gid(struct ib_device *device,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- struct net_device *ndev,
- u8 *port_num,
- u16 *index)
-{
- return ib_cache_gid_find(device, gid, gid_type, ndev, port_num, index);
-}
-EXPORT_SYMBOL(ib_find_cached_gid);
-
-int ib_find_gid_by_filter(struct ib_device *device,
- const union ib_gid *gid,
- u8 port_num,
- bool (*filter)(const union ib_gid *gid,
- const struct ib_gid_attr *,
- void *),
- void *context, u16 *index)
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+ const union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ struct net_device *ndev)
{
- /* Only RoCE GID table supports filter function */
- if (!rdma_protocol_roce(device, port_num) && filter)
- return -EPROTONOSUPPORT;
+ unsigned long mask = GID_ATTR_FIND_MASK_GID |
+ GID_ATTR_FIND_MASK_GID_TYPE;
+ struct ib_gid_attr gid_attr_val = {.ndev = ndev, .gid_type = gid_type};
+ u8 p;
+
+ if (ndev)
+ mask |= GID_ATTR_FIND_MASK_NETDEV;
+
+ for (p = 0; p < device->phys_port_cnt; p++) {
+ struct ib_gid_table *table;
+ unsigned long flags;
+ int index;
+
+ table = device->cache.ports[p].gid;
+ read_lock_irqsave(&table->rwlock, flags);
+ index = find_gid(table, gid, &gid_attr_val, false, mask, NULL);
+ if (index >= 0) {
+ const struct ib_gid_attr *attr;
+
+ get_gid_entry(table->data_vec[index]);
+ attr = &table->data_vec[index]->attr;
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return attr;
+ }
+ read_unlock_irqrestore(&table->rwlock, flags);
+ }
- return ib_cache_gid_find_by_filter(device, gid,
- port_num, filter,
- context, index);
+ return ERR_PTR(-ENOENT);
}
+EXPORT_SYMBOL(rdma_find_gid);
int ib_get_cached_pkey(struct ib_device *device,
u8 port_num,
@@ -1089,12 +1171,92 @@ int ib_get_cached_port_state(struct ib_device *device,
}
EXPORT_SYMBOL(ib_get_cached_port_state);
+/**
+ * rdma_get_gid_attr - Returns GID attributes for a port of a device
+ * at a requested gid_index, if a valid GID entry exists.
+ * @device: The device to query.
+ * @port_num: The port number on the device where the GID value
+ * is to be queried.
+ * @index: Index of the GID table entry whose attributes are to
+ * be queried.
+ *
+ * rdma_get_gid_attr() acquires reference count of gid attributes from the
+ * cached GID table. Caller must invoke rdma_put_gid_attr() to release
+ * reference to gid attribute regardless of link layer.
+ *
+ * Returns pointer to valid gid attribute or ERR_PTR for the appropriate error
+ * code.
+ */
+const struct ib_gid_attr *
+rdma_get_gid_attr(struct ib_device *device, u8 port_num, int index)
+{
+ const struct ib_gid_attr *attr = ERR_PTR(-EINVAL);
+ struct ib_gid_table *table;
+ unsigned long flags;
+
+ if (!rdma_is_port_valid(device, port_num))
+ return ERR_PTR(-EINVAL);
+
+ table = rdma_gid_table(device, port_num);
+ if (index < 0 || index >= table->sz)
+ return ERR_PTR(-EINVAL);
+
+ read_lock_irqsave(&table->rwlock, flags);
+ if (!is_gid_entry_valid(table->data_vec[index]))
+ goto done;
+
+ get_gid_entry(table->data_vec[index]);
+ attr = &table->data_vec[index]->attr;
+done:
+ read_unlock_irqrestore(&table->rwlock, flags);
+ return attr;
+}
+EXPORT_SYMBOL(rdma_get_gid_attr);
+
+/**
+ * rdma_put_gid_attr - Release reference to the GID attribute
+ * @attr: Pointer to the GID attribute whose reference
+ * needs to be released.
+ *
+ * rdma_put_gid_attr() must be used to release reference whose
+ * reference is acquired using rdma_get_gid_attr() or any APIs
+ * which returns a pointer to the ib_gid_attr regardless of link layer
+ * of IB or RoCE.
+ *
+ */
+void rdma_put_gid_attr(const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(attr, struct ib_gid_table_entry, attr);
+
+ put_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_put_gid_attr);
+
+/**
+ * rdma_hold_gid_attr - Get reference to existing GID attribute
+ *
+ * @attr: Pointer to the GID attribute whose reference
+ * needs to be taken.
+ *
+ * Increase the reference count to a GID attribute to keep it from being
+ * freed. Callers are required to already be holding a reference to attribute.
+ *
+ */
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr)
+{
+ struct ib_gid_table_entry *entry =
+ container_of(attr, struct ib_gid_table_entry, attr);
+
+ get_gid_entry(entry);
+}
+EXPORT_SYMBOL(rdma_hold_gid_attr);
+
static int config_non_roce_gid_cache(struct ib_device *device,
u8 port, int gid_tbl_len)
{
struct ib_gid_attr gid_attr = {};
struct ib_gid_table *table;
- union ib_gid gid;
int ret = 0;
int i;
@@ -1106,14 +1268,14 @@ static int config_non_roce_gid_cache(struct ib_device *device,
for (i = 0; i < gid_tbl_len; ++i) {
if (!device->query_gid)
continue;
- ret = device->query_gid(device, port, i, &gid);
+ ret = device->query_gid(device, port, i, &gid_attr.gid);
if (ret) {
pr_warn("query_gid failed (%d) for %s (index %d)\n",
ret, device->name, i);
goto err;
}
gid_attr.index = i;
- add_modify_gid(table, &gid, &gid_attr);
+ add_modify_gid(table, &gid_attr);
}
err:
mutex_unlock(&table->lock);
@@ -1128,13 +1290,10 @@ static void ib_cache_update(struct ib_device *device,
struct ib_pkey_cache *pkey_cache = NULL, *old_pkey_cache;
int i;
int ret;
- struct ib_gid_table *table;
if (!rdma_is_port_valid(device, port))
return;
- table = rdma_gid_table(device, port);
-
tprops = kmalloc(sizeof *tprops, GFP_KERNEL);
if (!tprops)
return;
@@ -1296,4 +1455,9 @@ void ib_cache_cleanup_one(struct ib_device *device)
ib_unregister_event_handler(&device->cache.event_handler);
flush_workqueue(ib_wq);
gid_table_cleanup_one(device);
+
+ /*
+ * Flush the wq second time for any pending GID delete work.
+ */
+ flush_workqueue(ib_wq);
}
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 27a7b0a2e27a..4724cb09b69d 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -474,7 +474,7 @@ static int cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
if (ret)
return ret;
- memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+ rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
@@ -508,31 +508,50 @@ static int add_cm_id_to_port_list(struct cm_id_private *cm_id_priv,
return ret;
}
-static struct cm_port *get_cm_port_from_path(struct sa_path_rec *path)
+static struct cm_port *
+get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
{
struct cm_device *cm_dev;
struct cm_port *port = NULL;
unsigned long flags;
- u8 p;
- struct net_device *ndev = ib_get_ndev_from_path(path);
-
- read_lock_irqsave(&cm.device_lock, flags);
- list_for_each_entry(cm_dev, &cm.device_list, list) {
- if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
- sa_conv_pathrec_to_gid_type(path),
- ndev, &p, NULL)) {
- port = cm_dev->port[p - 1];
- break;
+
+ if (attr) {
+ read_lock_irqsave(&cm.device_lock, flags);
+ list_for_each_entry(cm_dev, &cm.device_list, list) {
+ if (cm_dev->ib_device == attr->device) {
+ port = cm_dev->port[attr->port_num - 1];
+ break;
+ }
+ }
+ read_unlock_irqrestore(&cm.device_lock, flags);
+ } else {
+ /* SGID attribute can be NULL in following
+ * conditions.
+ * (a) Alternative path
+ * (b) IB link layer without GRH
+ * (c) LAP send messages
+ */
+ read_lock_irqsave(&cm.device_lock, flags);
+ list_for_each_entry(cm_dev, &cm.device_list, list) {
+ attr = rdma_find_gid(cm_dev->ib_device,
+ &path->sgid,
+ sa_conv_pathrec_to_gid_type(path),
+ NULL);
+ if (!IS_ERR(attr)) {
+ port = cm_dev->port[attr->port_num - 1];
+ break;
+ }
}
+ read_unlock_irqrestore(&cm.device_lock, flags);
+ if (port)
+ rdma_put_gid_attr(attr);
}
- read_unlock_irqrestore(&cm.device_lock, flags);
-
- if (ndev)
- dev_put(ndev);
return port;
}
-static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
+static int cm_init_av_by_path(struct sa_path_rec *path,
+ const struct ib_gid_attr *sgid_attr,
+ struct cm_av *av,
struct cm_id_private *cm_id_priv)
{
struct rdma_ah_attr new_ah_attr;
@@ -540,7 +559,7 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
struct cm_port *port;
int ret;
- port = get_cm_port_from_path(path);
+ port = get_cm_port_from_path(path, sgid_attr);
if (!port)
return -EINVAL;
cm_dev = port->cm_dev;
@@ -554,22 +573,26 @@ static int cm_init_av_by_path(struct sa_path_rec *path, struct cm_av *av,
/*
* av->ah_attr might be initialized based on wc or during
- * request processing time. So initialize a new ah_attr on stack.
+ * request processing time which might have reference to sgid_attr.
+ * So initialize a new ah_attr on stack.
* If initialization fails, old ah_attr is used for sending any
* responses. If initialization is successful, than new ah_attr
- * is used by overwriting the old one.
+ * is used by overwriting the old one. So that right ah_attr
+ * can be used to return an error response.
*/
ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
- &new_ah_attr);
+ &new_ah_attr, sgid_attr);
if (ret)
return ret;
av->timeout = path->packet_life_time + 1;
ret = add_cm_id_to_port_list(cm_id_priv, av, port);
- if (ret)
+ if (ret) {
+ rdma_destroy_ah_attr(&new_ah_attr);
return ret;
- memcpy(&av->ah_attr, &new_ah_attr, sizeof(new_ah_attr));
+ }
+ rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
return 0;
}
@@ -1091,6 +1114,9 @@ retest:
wait_for_completion(&cm_id_priv->comp);
while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
cm_free_work(work);
+
+ rdma_destroy_ah_attr(&cm_id_priv->av.ah_attr);
+ rdma_destroy_ah_attr(&cm_id_priv->alt_av.ah_attr);
kfree(cm_id_priv->private_data);
kfree(cm_id_priv);
}
@@ -1230,14 +1256,12 @@ new_id:
}
EXPORT_SYMBOL(ib_cm_insert_listen);
-static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
- enum cm_msg_sequence msg_seq)
+static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
{
u64 hi_tid, low_tid;
hi_tid = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
- low_tid = (u64) ((__force u32)cm_id_priv->id.local_id |
- (msg_seq << 30));
+ low_tid = (u64)cm_id_priv->id.local_id;
return cpu_to_be64(hi_tid | low_tid);
}
@@ -1265,7 +1289,7 @@ static void cm_format_req(struct cm_req_msg *req_msg,
pri_path->opa.slid);
cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
+ cm_form_tid(cm_id_priv));
req_msg->local_comm_id = cm_id_priv->id.local_id;
req_msg->service_id = param->service_id;
@@ -1413,12 +1437,13 @@ int ib_send_cm_req(struct ib_cm_id *cm_id,
goto out;
}
- ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
+ ret = cm_init_av_by_path(param->primary_path,
+ param->ppath_sgid_attr, &cm_id_priv->av,
cm_id_priv);
if (ret)
goto error1;
if (param->alternate_path) {
- ret = cm_init_av_by_path(param->alternate_path,
+ ret = cm_init_av_by_path(param->alternate_path, NULL,
&cm_id_priv->alt_av, cm_id_priv);
if (ret)
goto error1;
@@ -1646,7 +1671,7 @@ static void cm_opa_to_ib_sgid(struct cm_work *work,
(ib_is_opa_gid(&path->sgid))) {
union ib_gid sgid;
- if (ib_get_cached_gid(dev, port_num, 0, &sgid, NULL)) {
+ if (rdma_query_gid(dev, port_num, 0, &sgid)) {
dev_warn(&dev->dev,
"Error updating sgid in CM request\n");
return;
@@ -1914,9 +1939,8 @@ static int cm_req_handler(struct cm_work *work)
struct ib_cm_id *cm_id;
struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
struct cm_req_msg *req_msg;
- union ib_gid gid;
- struct ib_gid_attr gid_attr;
const struct ib_global_route *grh;
+ const struct ib_gid_attr *gid_attr;
int ret;
req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1961,24 +1985,13 @@ static int cm_req_handler(struct cm_work *work)
if (cm_req_has_alt_path(req_msg))
memset(&work->path[1], 0, sizeof(work->path[1]));
grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
- ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num,
- grh->sgid_index,
- &gid, &gid_attr);
- if (ret) {
- ib_send_cm_rej(cm_id, IB_CM_REJ_UNSUPPORTED, NULL, 0, NULL, 0);
- goto rejected;
- }
+ gid_attr = grh->sgid_attr;
- if (gid_attr.ndev) {
+ if (gid_attr && gid_attr->ndev) {
work->path[0].rec_type =
- sa_conv_gid_to_pathrec_type(gid_attr.gid_type);
- sa_path_set_ifindex(&work->path[0],
- gid_attr.ndev->ifindex);
- sa_path_set_ndev(&work->path[0],
- dev_net(gid_attr.ndev));
- dev_put(gid_attr.ndev);
+ sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
} else {
+ /* If no GID attribute or ndev is null, it is not RoCE. */
cm_path_set_rec_type(work->port->cm_dev->ib_device,
work->port->port_num,
&work->path[0],
@@ -1992,15 +2005,14 @@ static int cm_req_handler(struct cm_work *work)
sa_path_set_dmac(&work->path[0],
cm_id_priv->av.ah_attr.roce.dmac);
work->path[0].hop_limit = grh->hop_limit;
- ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
+ ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av,
cm_id_priv);
if (ret) {
int err;
- err = ib_get_cached_gid(work->port->cm_dev->ib_device,
- work->port->port_num, 0,
- &work->path[0].sgid,
- NULL);
+ err = rdma_query_gid(work->port->cm_dev->ib_device,
+ work->port->port_num, 0,
+ &work->path[0].sgid);
if (err)
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
NULL, 0, NULL, 0);
@@ -2012,8 +2024,8 @@ static int cm_req_handler(struct cm_work *work)
goto rejected;
}
if (cm_req_has_alt_path(req_msg)) {
- ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
- cm_id_priv);
+ ret = cm_init_av_by_path(&work->path[1], NULL,
+ &cm_id_priv->alt_av, cm_id_priv);
if (ret) {
ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
&work->path[0].sgid,
@@ -2451,7 +2463,7 @@ static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
u8 private_data_len)
{
cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
+ cm_form_tid(cm_id_priv));
dreq_msg->local_comm_id = cm_id_priv->id.local_id;
dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
@@ -3082,7 +3094,7 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
alt_ext = opa_is_extended_lid(alternate_path->opa.dlid,
alternate_path->opa.slid);
cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
+ cm_form_tid(cm_id_priv));
lap_msg->local_comm_id = cm_id_priv->id.local_id;
lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
@@ -3136,7 +3148,7 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
goto out;
}
- ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
+ ret = cm_init_av_by_path(alternate_path, NULL, &cm_id_priv->alt_av,
cm_id_priv);
if (ret)
goto out;
@@ -3279,7 +3291,7 @@ static int cm_lap_handler(struct cm_work *work)
if (ret)
goto unlock;
- cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
+ cm_init_av_by_path(param->alternate_path, NULL, &cm_id_priv->alt_av,
cm_id_priv);
cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
cm_id_priv->tid = lap_msg->hdr.tid;
@@ -3458,7 +3470,7 @@ static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
struct ib_cm_sidr_req_param *param)
{
cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
- cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
+ cm_form_tid(cm_id_priv));
sidr_req_msg->request_id = cm_id_priv->id.local_id;
sidr_req_msg->pkey = param->path->pkey;
sidr_req_msg->service_id = param->service_id;
@@ -3481,7 +3493,9 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
return -EINVAL;
cm_id_priv = container_of(cm_id, struct cm_id_private, id);
- ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
+ ret = cm_init_av_by_path(param->path, param->sgid_attr,
+ &cm_id_priv->av,
+ cm_id_priv);
if (ret)
goto out;
@@ -3665,7 +3679,8 @@ error: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
}
EXPORT_SYMBOL(ib_send_cm_sidr_rep);
-static void cm_format_sidr_rep_event(struct cm_work *work)
+static void cm_format_sidr_rep_event(struct cm_work *work,
+ const struct cm_id_private *cm_id_priv)
{
struct cm_sidr_rep_msg *sidr_rep_msg;
struct ib_cm_sidr_rep_event_param *param;
@@ -3678,6 +3693,7 @@ static void cm_format_sidr_rep_event(struct cm_work *work)
param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
param->info = &sidr_rep_msg->info;
param->info_len = sidr_rep_msg->info_length;
+ param->sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
work->cm_event.private_data = &sidr_rep_msg->private_data;
}
@@ -3701,7 +3717,7 @@ static int cm_sidr_rep_handler(struct cm_work *work)
ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
spin_unlock_irq(&cm_id_priv->lock);
- cm_format_sidr_rep_event(work);
+ cm_format_sidr_rep_event(work, cm_id_priv);
cm_process_work(cm_id_priv, work);
return 0;
out:
diff --git a/drivers/infiniband/core/cm_msgs.h b/drivers/infiniband/core/cm_msgs.h
index 8b76f0ef965e..476d4309576d 100644
--- a/drivers/infiniband/core/cm_msgs.h
+++ b/drivers/infiniband/core/cm_msgs.h
@@ -44,13 +44,6 @@
#define IB_CM_CLASS_VERSION 2 /* IB specification 1.2 */
-enum cm_msg_sequence {
- CM_MSG_SEQUENCE_REQ,
- CM_MSG_SEQUENCE_LAP,
- CM_MSG_SEQUENCE_DREQ,
- CM_MSG_SEQUENCE_SIDR
-};
-
struct cm_req_msg {
struct ib_mad_hdr hdr;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index bff10ab141b0..f2bf997b62cd 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -366,7 +366,6 @@ struct cma_multicast {
void *context;
struct sockaddr_storage addr;
struct kref mcref;
- bool igmp_joined;
u8 join_state;
};
@@ -603,46 +602,54 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
return ret;
}
-static inline int cma_validate_port(struct ib_device *device, u8 port,
- enum ib_gid_type gid_type,
- union ib_gid *gid,
- struct rdma_id_private *id_priv)
+static const struct ib_gid_attr *
+cma_validate_port(struct ib_device *device, u8 port,
+ enum ib_gid_type gid_type,
+ union ib_gid *gid,
+ struct rdma_id_private *id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
int bound_if_index = dev_addr->bound_dev_if;
+ const struct ib_gid_attr *sgid_attr;
int dev_type = dev_addr->dev_type;
struct net_device *ndev = NULL;
- int ret = -ENODEV;
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
- return ret;
+ return ERR_PTR(-ENODEV);
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
- return ret;
+ return ERR_PTR(-ENODEV);
if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
ndev = dev_get_by_index(dev_addr->net, bound_if_index);
if (!ndev)
- return ret;
+ return ERR_PTR(-ENODEV);
} else {
gid_type = IB_GID_TYPE_IB;
}
- ret = ib_find_cached_gid_by_port(device, gid, gid_type, port,
- ndev, NULL);
-
+ sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
if (ndev)
dev_put(ndev);
+ return sgid_attr;
+}
- return ret;
+static void cma_bind_sgid_attr(struct rdma_id_private *id_priv,
+ const struct ib_gid_attr *sgid_attr)
+{
+ WARN_ON(id_priv->id.route.addr.dev_addr.sgid_attr);
+ id_priv->id.route.addr.dev_addr.sgid_attr = sgid_attr;
}
static int cma_acquire_dev(struct rdma_id_private *id_priv,
struct rdma_id_private *listen_id_priv)
{
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ const struct ib_gid_attr *sgid_attr;
struct cma_device *cma_dev;
union ib_gid gid, iboe_gid, *gidp;
+ enum ib_gid_type gid_type;
+ enum ib_gid_type default_type;
int ret = -ENODEV;
u8 port;
@@ -662,14 +669,15 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
port = listen_id_priv->id.port_num;
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
-
- ret = cma_validate_port(cma_dev->device, port,
- rdma_protocol_ib(cma_dev->device, port) ?
- IB_GID_TYPE_IB :
- listen_id_priv->gid_type, gidp,
- id_priv);
- if (!ret) {
+ gid_type = rdma_protocol_ib(cma_dev->device, port) ?
+ IB_GID_TYPE_IB :
+ listen_id_priv->gid_type;
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, gidp, id_priv);
+ if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ ret = 0;
goto out;
}
}
@@ -683,14 +691,16 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
gidp = rdma_protocol_roce(cma_dev->device, port) ?
&iboe_gid : &gid;
-
- ret = cma_validate_port(cma_dev->device, port,
- rdma_protocol_ib(cma_dev->device, port) ?
- IB_GID_TYPE_IB :
- cma_dev->default_gid_type[port - 1],
- gidp, id_priv);
- if (!ret) {
+ default_type = cma_dev->default_gid_type[port - 1];
+ gid_type =
+ rdma_protocol_ib(cma_dev->device, port) ?
+ IB_GID_TYPE_IB : default_type;
+ sgid_attr = cma_validate_port(cma_dev->device, port,
+ gid_type, gidp, id_priv);
+ if (!IS_ERR(sgid_attr)) {
id_priv->id.port_num = port;
+ cma_bind_sgid_attr(id_priv, sgid_attr);
+ ret = 0;
goto out;
}
}
@@ -732,8 +742,8 @@ static int cma_resolve_ib_dev(struct rdma_id_private *id_priv)
if (ib_get_cached_port_state(cur_dev->device, p, &port_state))
continue;
- for (i = 0; !ib_get_cached_gid(cur_dev->device, p, i,
- &gid, NULL);
+ for (i = 0; !rdma_query_gid(cur_dev->device,
+ p, i, &gid);
i++) {
if (!memcmp(&gid, dgid, sizeof(gid))) {
cma_dev = cur_dev;
@@ -1629,6 +1639,21 @@ static void cma_release_port(struct rdma_id_private *id_priv)
mutex_unlock(&lock);
}
+static void cma_leave_roce_mc_group(struct rdma_id_private *id_priv,
+ struct cma_multicast *mc)
+{
+ struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct net_device *ndev = NULL;
+
+ if (dev_addr->bound_dev_if)
+ ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if);
+ if (ndev) {
+ cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid, false);
+ dev_put(ndev);
+ }
+ kref_put(&mc->mcref, release_mc);
+}
+
static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
{
struct cma_multicast *mc;
@@ -1642,22 +1667,7 @@ static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
} else {
- if (mc->igmp_joined) {
- struct rdma_dev_addr *dev_addr =
- &id_priv->id.route.addr.dev_addr;
- struct net_device *ndev = NULL;
-
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(&init_net,
- dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev,
- &mc->multicast.ib->rec.mgid,
- false);
- dev_put(ndev);
- }
- }
- kref_put(&mc->mcref, release_mc);
+ cma_leave_roce_mc_group(id_priv, mc);
}
}
}
@@ -1699,6 +1709,10 @@ void rdma_destroy_id(struct rdma_cm_id *id)
cma_deref_id(id_priv->id.context);
kfree(id_priv->id.route.path_rec);
+
+ if (id_priv->id.route.addr.dev_addr.sgid_attr)
+ rdma_put_gid_attr(id_priv->id.route.addr.dev_addr.sgid_attr);
+
put_net(id_priv->id.route.addr.dev_addr.net);
kfree(id_priv);
}
@@ -2561,8 +2575,6 @@ cma_iboe_set_path_rec_l2_fields(struct rdma_id_private *id_priv)
route->path_rec->rec_type = sa_conv_gid_to_pathrec_type(gid_type);
route->path_rec->roce.route_resolved = true;
- sa_path_set_ndev(route->path_rec, addr->dev_addr.net);
- sa_path_set_ifindex(route->path_rec, ndev->ifindex);
sa_path_set_dmac(route->path_rec, addr->dev_addr.dst_dev_addr);
return ndev;
}
@@ -2791,7 +2803,7 @@ static int cma_bind_loopback(struct rdma_id_private *id_priv)
p = 1;
port_found:
- ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid, NULL);
+ ret = rdma_query_gid(cma_dev->device, p, 0, &gid);
if (ret)
goto out;
@@ -3488,7 +3500,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
ib_init_ah_attr_from_path(id_priv->id.device,
id_priv->id.port_num,
id_priv->id.route.path_rec,
- &event.param.ud.ah_attr);
+ &event.param.ud.ah_attr,
+ rep->sgid_attr);
event.param.ud.qp_num = rep->qpn;
event.param.ud.qkey = rep->qkey;
event.event = RDMA_CM_EVENT_ESTABLISHED;
@@ -3501,6 +3514,8 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
}
ret = id_priv->id.event_handler(&id_priv->id, &event);
+
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
id_priv->cm_id.ib = NULL;
@@ -3557,6 +3572,7 @@ static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
id_priv->cm_id.ib = id;
req.path = id_priv->id.route.path_rec;
+ req.sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
req.max_cm_retries = CMA_MAX_CM_RETRIES;
@@ -3618,6 +3634,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv,
if (route->num_paths == 2)
req.alternate_path = &route->path_rec[1];
+ req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr;
+ /* Alternate path SGID attribute currently unsupported */
req.service_id = rdma_get_service_id(&id_priv->id, cma_dst_addr(id_priv));
req.qp_num = id_priv->qp_num;
req.qp_type = id_priv->id.qp_type;
@@ -3981,6 +3999,8 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
ret = id_priv->id.event_handler(&id_priv->id, &event);
+
+ rdma_destroy_ah_attr(&event.param.ud.ah_attr);
if (ret) {
cma_exch(id_priv, RDMA_CM_DESTROYING);
mutex_unlock(&id_priv->handler_mutex);
@@ -4168,8 +4188,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
if (!send_only) {
err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
true);
- if (!err)
- mc->igmp_joined = true;
}
}
} else {
@@ -4221,26 +4239,29 @@ int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
memcpy(&mc->addr, addr, rdma_addr_size(addr));
mc->context = context;
mc->id_priv = id_priv;
- mc->igmp_joined = false;
mc->join_state = join_state;
- spin_lock(&id_priv->lock);
- list_add(&mc->list, &id_priv->mc_list);
- spin_unlock(&id_priv->lock);
if (rdma_protocol_roce(id->device, id->port_num)) {
kref_init(&mc->mcref);
ret = cma_iboe_join_multicast(id_priv, mc);
- } else if (rdma_cap_ib_mcast(id->device, id->port_num))
+ if (ret)
+ goto out_err;
+ } else if (rdma_cap_ib_mcast(id->device, id->port_num)) {
ret = cma_join_ib_multicast(id_priv, mc);
- else
+ if (ret)
+ goto out_err;
+ } else {
ret = -ENOSYS;
-
- if (ret) {
- spin_lock_irq(&id_priv->lock);
- list_del(&mc->list);
- spin_unlock_irq(&id_priv->lock);
- kfree(mc);
+ goto out_err;
}
+
+ spin_lock(&id_priv->lock);
+ list_add(&mc->list, &id_priv->mc_list);
+ spin_unlock(&id_priv->lock);
+
+ return 0;
+out_err:
+ kfree(mc);
return ret;
}
EXPORT_SYMBOL(rdma_join_multicast);
@@ -4268,23 +4289,7 @@ void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
ib_sa_free_multicast(mc->multicast.ib);
kfree(mc);
} else if (rdma_protocol_roce(id->device, id->port_num)) {
- if (mc->igmp_joined) {
- struct rdma_dev_addr *dev_addr =
- &id->route.addr.dev_addr;
- struct net_device *ndev = NULL;
-
- if (dev_addr->bound_dev_if)
- ndev = dev_get_by_index(dev_addr->net,
- dev_addr->bound_dev_if);
- if (ndev) {
- cma_igmp_send(ndev,
- &mc->multicast.ib->rec.mgid,
- false);
- dev_put(ndev);
- }
- mc->igmp_joined = false;
- }
- kref_put(&mc->mcref, release_mc);
+ cma_leave_roce_mc_group(id_priv, mc);
}
return;
}
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 6fa4c59dc7a7..b8144f194777 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -862,25 +862,6 @@ int ib_query_port(struct ib_device *device,
EXPORT_SYMBOL(ib_query_port);
/**
- * ib_query_gid - Get GID table entry
- * @device:Device to query
- * @port_num:Port number to query
- * @index:GID table index to query
- * @gid:Returned GID
- * @attr: Returned GID attributes related to this GID index (only in RoCE).
- * NULL means ignore.
- *
- * ib_query_gid() fetches the specified GID table entry from the cache.
- */
-int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid,
- struct ib_gid_attr *attr)
-{
- return ib_get_cached_gid(device, port_num, index, gid, attr);
-}
-EXPORT_SYMBOL(ib_query_gid);
-
-/**
* ib_enum_roce_netdev - enumerate all RoCE ports
* @ib_dev : IB device we want to query
* @filter: Should we call the callback?
@@ -1057,7 +1038,7 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid,
continue;
for (i = 0; i < device->port_immutable[port].gid_tbl_len; ++i) {
- ret = ib_query_gid(device, port, i, &tmp_gid, NULL);
+ ret = rdma_query_gid(device, port, i, &tmp_gid);
if (ret)
return ret;
if (!memcmp(&tmp_gid, gid, sizeof *gid)) {
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index f742ae7a768b..34e9b2768324 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -38,6 +38,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/dma-mapping.h>
+#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/security.h>
@@ -58,8 +59,13 @@ MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests
module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
+/*
+ * The mlx4 driver uses the top byte to distinguish which virtual function
+ * generated the MAD, so we must avoid using it.
+ */
+#define AGENT_ID_LIMIT (1 << 24)
+static DEFINE_IDR(ib_mad_clients);
static struct list_head ib_mad_port_list;
-static atomic_t ib_mad_client_id = ATOMIC_INIT(0);
/* Port list lock */
static DEFINE_SPINLOCK(ib_mad_port_list_lock);
@@ -190,6 +196,8 @@ EXPORT_SYMBOL(ib_response_mad);
/*
* ib_register_mad_agent - Register to send/receive MADs
+ *
+ * Context: Process context.
*/
struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
u8 port_num,
@@ -210,7 +218,6 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
struct ib_mad_mgmt_vendor_class *vendor_class;
struct ib_mad_mgmt_method_table *method;
int ret2, qpn;
- unsigned long flags;
u8 mgmt_class, vclass;
/* Validate parameters */
@@ -376,13 +383,24 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
goto error4;
}
- spin_lock_irqsave(&port_priv->reg_lock, flags);
- mad_agent_priv->agent.hi_tid = atomic_inc_return(&ib_mad_client_id);
+ idr_preload(GFP_KERNEL);
+ idr_lock(&ib_mad_clients);
+ ret2 = idr_alloc_cyclic(&ib_mad_clients, mad_agent_priv, 0,
+ AGENT_ID_LIMIT, GFP_ATOMIC);
+ idr_unlock(&ib_mad_clients);
+ idr_preload_end();
+
+ if (ret2 < 0) {
+ ret = ERR_PTR(ret2);
+ goto error5;
+ }
+ mad_agent_priv->agent.hi_tid = ret2;
/*
* Make sure MAD registration (if supplied)
* is non overlapping with any existing ones
*/
+ spin_lock_irq(&port_priv->reg_lock);
if (mad_reg_req) {
mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
if (!is_vendor_class(mgmt_class)) {
@@ -393,7 +411,7 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (method) {
if (method_in_use(&method,
mad_reg_req))
- goto error5;
+ goto error6;
}
}
ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
@@ -409,24 +427,25 @@ struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
if (is_vendor_method_in_use(
vendor_class,
mad_reg_req))
- goto error5;
+ goto error6;
}
}
ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
}
if (ret2) {
ret = ERR_PTR(ret2);
- goto error5;
+ goto error6;
}
}
-
- /* Add mad agent into port's agent list */
- list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ spin_unlock_irq(&port_priv->reg_lock);
return &mad_agent_priv->agent;
+error6:
+ spin_unlock_irq(&port_priv->reg_lock);
+ idr_lock(&ib_mad_clients);
+ idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
+ idr_unlock(&ib_mad_clients);
error5:
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
error4:
kfree(reg_req);
@@ -575,7 +594,6 @@ static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv
static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
{
struct ib_mad_port_private *port_priv;
- unsigned long flags;
/* Note that we could still be handling received MADs */
@@ -587,10 +605,12 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
port_priv = mad_agent_priv->qp_info->port_priv;
cancel_delayed_work(&mad_agent_priv->timed_work);
- spin_lock_irqsave(&port_priv->reg_lock, flags);
+ spin_lock_irq(&port_priv->reg_lock);
remove_mad_reg_req(mad_agent_priv);
- list_del(&mad_agent_priv->agent_list);
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
+ spin_unlock_irq(&port_priv->reg_lock);
+ idr_lock(&ib_mad_clients);
+ idr_remove(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
+ idr_unlock(&ib_mad_clients);
flush_workqueue(port_priv->wq);
ib_cancel_rmpp_recvs(mad_agent_priv);
@@ -601,7 +621,7 @@ static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
kfree(mad_agent_priv->reg_req);
- kfree(mad_agent_priv);
+ kfree_rcu(mad_agent_priv, rcu);
}
static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
@@ -625,6 +645,8 @@ static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
/*
* ib_unregister_mad_agent - Unregisters a client from using MAD services
+ *
+ * Context: Process context.
*/
void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
{
@@ -1720,22 +1742,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
struct ib_mad_agent_private *mad_agent = NULL;
unsigned long flags;
- spin_lock_irqsave(&port_priv->reg_lock, flags);
if (ib_response_mad(mad_hdr)) {
u32 hi_tid;
- struct ib_mad_agent_private *entry;
/*
* Routing is based on high 32 bits of transaction ID
* of MAD.
*/
hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
- list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
- if (entry->agent.hi_tid == hi_tid) {
- mad_agent = entry;
- break;
- }
- }
+ rcu_read_lock();
+ mad_agent = idr_find(&ib_mad_clients, hi_tid);
+ if (mad_agent && !atomic_inc_not_zero(&mad_agent->refcount))
+ mad_agent = NULL;
+ rcu_read_unlock();
} else {
struct ib_mad_mgmt_class_table *class;
struct ib_mad_mgmt_method_table *method;
@@ -1744,6 +1763,7 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
const struct ib_vendor_mad *vendor_mad;
int index;
+ spin_lock_irqsave(&port_priv->reg_lock, flags);
/*
* Routing is based on version, class, and method
* For "newer" vendor MADs, also based on OUI
@@ -1783,20 +1803,19 @@ find_mad_agent(struct ib_mad_port_private *port_priv,
~IB_MGMT_METHOD_RESP];
}
}
+ if (mad_agent)
+ atomic_inc(&mad_agent->refcount);
+out:
+ spin_unlock_irqrestore(&port_priv->reg_lock, flags);
}
- if (mad_agent) {
- if (mad_agent->agent.recv_handler)
- atomic_inc(&mad_agent->refcount);
- else {
- dev_notice(&port_priv->device->dev,
- "No receive handler for client %p on port %d\n",
- &mad_agent->agent, port_priv->port_num);
- mad_agent = NULL;
- }
+ if (mad_agent && !mad_agent->agent.recv_handler) {
+ dev_notice(&port_priv->device->dev,
+ "No receive handler for client %p on port %d\n",
+ &mad_agent->agent, port_priv->port_num);
+ deref_mad_agent(mad_agent);
+ mad_agent = NULL;
}
-out:
- spin_unlock_irqrestore(&port_priv->reg_lock, flags);
return mad_agent;
}
@@ -1896,8 +1915,8 @@ static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_
const struct ib_global_route *grh =
rdma_ah_read_grh(&attr);
- if (ib_get_cached_gid(device, port_num,
- grh->sgid_index, &sgid, NULL))
+ if (rdma_query_gid(device, port_num,
+ grh->sgid_index, &sgid))
return 0;
return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
16);
@@ -3159,7 +3178,6 @@ static int ib_mad_port_open(struct ib_device *device,
port_priv->device = device;
port_priv->port_num = port_num;
spin_lock_init(&port_priv->reg_lock);
- INIT_LIST_HEAD(&port_priv->agent_list);
init_mad_qp(port_priv, &port_priv->qp_info[0]);
init_mad_qp(port_priv, &port_priv->qp_info[1]);
@@ -3338,6 +3356,9 @@ int ib_mad_init(void)
INIT_LIST_HEAD(&ib_mad_port_list);
+ /* Client ID 0 is used for snoop-only clients */
+ idr_alloc(&ib_mad_clients, NULL, 0, 0, GFP_KERNEL);
+
if (ib_register_client(&mad_client)) {
pr_err("Couldn't register ib_mad client\n");
return -EINVAL;
diff --git a/drivers/infiniband/core/mad_priv.h b/drivers/infiniband/core/mad_priv.h
index 28669f6419e1..d84ae1671898 100644
--- a/drivers/infiniband/core/mad_priv.h
+++ b/drivers/infiniband/core/mad_priv.h
@@ -89,7 +89,6 @@ struct ib_rmpp_segment {
};
struct ib_mad_agent_private {
- struct list_head agent_list;
struct ib_mad_agent agent;
struct ib_mad_reg_req *reg_req;
struct ib_mad_qp_info *qp_info;
@@ -105,7 +104,10 @@ struct ib_mad_agent_private {
struct list_head rmpp_list;
atomic_t refcount;
- struct completion comp;
+ union {
+ struct completion comp;
+ struct rcu_head rcu;
+ };
};
struct ib_mad_snoop_private {
@@ -203,7 +205,6 @@ struct ib_mad_port_private {
spinlock_t reg_lock;
struct ib_mad_mgmt_version_table version[MAX_MGMT_VERSION];
- struct list_head agent_list;
struct workqueue_struct *wq;
struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
};
diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index 6c48f4193dda..d50ff70bb24b 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c
@@ -716,14 +716,28 @@ int ib_sa_get_mcmember_rec(struct ib_device *device, u8 port_num,
}
EXPORT_SYMBOL(ib_sa_get_mcmember_rec);
+/**
+ * ib_init_ah_from_mcmember - Initialize AH attribute from multicast
+ * member record and gid of the device.
+ * @device: RDMA device
+ * @port_num: Port of the rdma device to consider
+ * @ndev: Optional netdevice, applicable only for RoCE
+ * @gid_type: GID type to consider
+ * @ah_attr: AH attribute to fillup on successful completion
+ *
+ * ib_init_ah_from_mcmember() initializes AH attribute based on multicast
+ * member record and other device properties. On success the caller is
+ * responsible to call rdma_destroy_ah_attr on the ah_attr. Returns 0 on
+ * success or appropriate error code.
+ *
+ */
int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
struct ib_sa_mcmember_rec *rec,
struct net_device *ndev,
enum ib_gid_type gid_type,
struct rdma_ah_attr *ah_attr)
{
- int ret;
- u16 gid_index;
+ const struct ib_gid_attr *sgid_attr;
/* GID table is not based on the netdevice for IB link layer,
* so ignore ndev during search.
@@ -733,26 +747,22 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
else if (!rdma_protocol_roce(device, port_num))
return -EINVAL;
- ret = ib_find_cached_gid_by_port(device, &rec->port_gid,
- gid_type, port_num,
- ndev,
- &gid_index);
- if (ret)
- return ret;
+ sgid_attr = rdma_find_gid_by_port(device, &rec->port_gid,
+ gid_type, port_num, ndev);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
- memset(ah_attr, 0, sizeof *ah_attr);
+ memset(ah_attr, 0, sizeof(*ah_attr));
ah_attr->type = rdma_ah_find_type(device, port_num);
rdma_ah_set_dlid(ah_attr, be16_to_cpu(rec->mlid));
rdma_ah_set_sl(ah_attr, rec->sl);
rdma_ah_set_port_num(ah_attr, port_num);
rdma_ah_set_static_rate(ah_attr, rec->rate);
-
- rdma_ah_set_grh(ah_attr, &rec->mgid,
- be32_to_cpu(rec->flow_label),
- (u8)gid_index,
- rec->hop_limit,
- rec->traffic_class);
+ rdma_move_grh_sgid_attr(ah_attr, &rec->mgid,
+ be32_to_cpu(rec->flow_label),
+ rec->hop_limit, rec->traffic_class,
+ sgid_attr);
return 0;
}
EXPORT_SYMBOL(ib_init_ah_from_mcmember);
diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 340c7bea45ab..0385ab438320 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -237,15 +237,15 @@ static int fill_port_info(struct sk_buff *msg,
if (ret)
return ret;
- BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
- if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
- (u64)attr.port_cap_flags, RDMA_NLDEV_ATTR_PAD))
- return -EMSGSIZE;
- if (rdma_protocol_ib(device, port) &&
- nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
- attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
- return -EMSGSIZE;
if (rdma_protocol_ib(device, port)) {
+ BUILD_BUG_ON(sizeof(attr.port_cap_flags) > sizeof(u64));
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
+ (u64)attr.port_cap_flags,
+ RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
+ if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
+ attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
+ return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
return -EMSGSIZE;
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
diff --git a/drivers/infiniband/core/rdma_core.c b/drivers/infiniband/core/rdma_core.c
index a6e904973ba8..847c6a2f1346 100644
--- a/drivers/infiniband/core/rdma_core.c
+++ b/drivers/infiniband/core/rdma_core.c
@@ -52,10 +52,10 @@ int uverbs_ns_idx(u16 *id, unsigned int ns_count)
return ret;
}
-const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
+const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
uint16_t object)
{
- const struct uverbs_root_spec *object_hash = ibdev->specs_root;
+ const struct uverbs_root_spec *object_hash = ufile->device->specs_root;
const struct uverbs_object_spec_hash *objects;
int ret = uverbs_ns_idx(&object, object_hash->num_buckets);
@@ -128,7 +128,29 @@ static int uverbs_try_lock_object(struct ib_uobject *uobj, bool exclusive)
return atomic_cmpxchg(&uobj->usecnt, 0, -1) == 0 ? 0 : -EBUSY;
}
-static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
+/*
+ * Does both rdma_lookup_get_uobject() and rdma_remove_commit_uobject(), then
+ * returns success_res on success (negative errno on failure). For use by
+ * callers that do not need the uobj.
+ */
+int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
+ struct ib_uverbs_file *ufile, int success_res)
+{
+ struct ib_uobject *uobj;
+ int ret;
+
+ uobj = rdma_lookup_get_uobject(type, ufile, id, true);
+ if (IS_ERR(uobj))
+ return PTR_ERR(uobj);
+
+ ret = rdma_remove_commit_uobject(uobj);
+ if (ret)
+ return ret;
+
+ return success_res;
+}
+
+static struct ib_uobject *alloc_uobj(struct ib_uverbs_file *ufile,
const struct uverbs_obj_type *type)
{
struct ib_uobject *uobj = kzalloc(type->obj_size, GFP_KERNEL);
@@ -139,7 +161,8 @@ static struct ib_uobject *alloc_uobj(struct ib_ucontext *context,
* user_handle should be filled by the handler,
* The object is added to the list in the commit stage.
*/
- uobj->context = context;
+ uobj->ufile = ufile;
+ uobj->context = ufile->ucontext;
uobj->type = type;
/*
* Allocated objects start out as write locked to deny any other
@@ -157,19 +180,19 @@ static int idr_add_uobj(struct ib_uobject *uobj)
int ret;
idr_preload(GFP_KERNEL);
- spin_lock(&uobj->context->ufile->idr_lock);
+ spin_lock(&uobj->ufile->idr_lock);
/*
* We start with allocating an idr pointing to NULL. This represents an
* object which isn't initialized yet. We'll replace it later on with
* the real object once we commit.
*/
- ret = idr_alloc(&uobj->context->ufile->idr, NULL, 0,
+ ret = idr_alloc(&uobj->ufile->idr, NULL, 0,
min_t(unsigned long, U32_MAX - 1, INT_MAX), GFP_NOWAIT);
if (ret >= 0)
uobj->id = ret;
- spin_unlock(&uobj->context->ufile->idr_lock);
+ spin_unlock(&uobj->ufile->idr_lock);
idr_preload_end();
return ret < 0 ? ret : 0;
@@ -181,21 +204,21 @@ static int idr_add_uobj(struct ib_uobject *uobj)
*/
static void uverbs_idr_remove_uobj(struct ib_uobject *uobj)
{
- spin_lock(&uobj->context->ufile->idr_lock);
- idr_remove(&uobj->context->ufile->idr, uobj->id);
- spin_unlock(&uobj->context->ufile->idr_lock);
+ spin_lock(&uobj->ufile->idr_lock);
+ idr_remove(&uobj->ufile->idr, uobj->id);
+ spin_unlock(&uobj->ufile->idr_lock);
}
/* Returns the ib_uobject or an error. The caller should check for IS_ERR. */
-static struct ib_uobject *lookup_get_idr_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext,
- int id, bool exclusive)
+static struct ib_uobject *
+lookup_get_idr_uobject(const struct uverbs_obj_type *type,
+ struct ib_uverbs_file *ufile, int id, bool exclusive)
{
struct ib_uobject *uobj;
rcu_read_lock();
/* object won't be released as we're protected in rcu */
- uobj = idr_find(&ucontext->ufile->idr, id);
+ uobj = idr_find(&ufile->idr, id);
if (!uobj) {
uobj = ERR_PTR(-ENOENT);
goto free;
@@ -216,7 +239,7 @@ free:
}
static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext,
+ struct ib_uverbs_file *ufile,
int id, bool exclusive)
{
struct file *f;
@@ -247,13 +270,13 @@ static struct ib_uobject *lookup_get_fd_uobject(const struct uverbs_obj_type *ty
}
struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext,
- int id, bool exclusive)
+ struct ib_uverbs_file *ufile, int id,
+ bool exclusive)
{
struct ib_uobject *uobj;
int ret;
- uobj = type->type_class->lookup_get(type, ucontext, id, exclusive);
+ uobj = type->type_class->lookup_get(type, ufile, id, exclusive);
if (IS_ERR(uobj))
return uobj;
@@ -264,7 +287,7 @@ struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
ret = uverbs_try_lock_object(uobj, exclusive);
if (ret) {
- WARN(ucontext->cleanup_reason,
+ WARN(uobj->ufile->cleanup_reason,
"ib_uverbs: Trying to lookup_get while cleanup context\n");
goto free;
}
@@ -277,12 +300,12 @@ free:
}
static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext)
+ struct ib_uverbs_file *ufile)
{
int ret;
struct ib_uobject *uobj;
- uobj = alloc_uobj(ucontext, type);
+ uobj = alloc_uobj(ufile, type);
if (IS_ERR(uobj))
return uobj;
@@ -290,7 +313,7 @@ static struct ib_uobject *alloc_begin_idr_uobject(const struct uverbs_obj_type *
if (ret)
goto uobj_put;
- ret = ib_rdmacg_try_charge(&uobj->cg_obj, ucontext->device,
+ ret = ib_rdmacg_try_charge(&uobj->cg_obj, ufile->ucontext->device,
RDMACG_RESOURCE_HCA_OBJECT);
if (ret)
goto idr_remove;
@@ -305,29 +328,27 @@ uobj_put:
}
static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext)
+ struct ib_uverbs_file *ufile)
{
const struct uverbs_obj_fd_type *fd_type =
container_of(type, struct uverbs_obj_fd_type, type);
int new_fd;
struct ib_uobject *uobj;
- struct ib_uobject_file *uobj_file;
struct file *filp;
new_fd = get_unused_fd_flags(O_CLOEXEC);
if (new_fd < 0)
return ERR_PTR(new_fd);
- uobj = alloc_uobj(ucontext, type);
+ uobj = alloc_uobj(ufile, type);
if (IS_ERR(uobj)) {
put_unused_fd(new_fd);
return uobj;
}
- uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
filp = anon_inode_getfile(fd_type->name,
fd_type->fops,
- uobj_file,
+ uobj,
fd_type->flags);
if (IS_ERR(filp)) {
put_unused_fd(new_fd);
@@ -335,19 +356,19 @@ static struct ib_uobject *alloc_begin_fd_uobject(const struct uverbs_obj_type *t
return (void *)filp;
}
- uobj_file->uobj.id = new_fd;
- uobj_file->uobj.object = filp;
- uobj_file->ufile = ucontext->ufile;
+ uobj->id = new_fd;
+ uobj->object = filp;
+ uobj->ufile = ufile;
INIT_LIST_HEAD(&uobj->list);
- kref_get(&uobj_file->ufile->ref);
+ kref_get(&ufile->ref);
return uobj;
}
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext)
+ struct ib_uverbs_file *ufile)
{
- return type->type_class->alloc_begin(type, ucontext);
+ return type->type_class->alloc_begin(type, ufile);
}
static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
@@ -360,9 +381,10 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
/*
* We can only fail gracefully if the user requested to destroy the
- * object. In the rest of the cases, just remove whatever you can.
+ * object or when a retry may be called upon an error.
+ * In the rest of the cases, just remove whatever you can.
*/
- if (why == RDMA_REMOVE_DESTROY && ret)
+ if (ib_is_destroy_retryable(ret, why, uobj))
return ret;
ib_rdmacg_uncharge(&uobj->cg_obj, uobj->context->device,
@@ -374,10 +396,8 @@ static int __must_check remove_commit_idr_uobject(struct ib_uobject *uobj,
static void alloc_abort_fd_uobject(struct ib_uobject *uobj)
{
- struct ib_uobject_file *uobj_file =
- container_of(uobj, struct ib_uobject_file, uobj);
struct file *filp = uobj->object;
- int id = uobj_file->uobj.id;
+ int id = uobj->id;
/* Unsuccessful NEW */
fput(filp);
@@ -389,11 +409,9 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
{
const struct uverbs_obj_fd_type *fd_type =
container_of(uobj->type, struct uverbs_obj_fd_type, type);
- struct ib_uobject_file *uobj_file =
- container_of(uobj, struct ib_uobject_file, uobj);
- int ret = fd_type->context_closed(uobj_file, why);
+ int ret = fd_type->context_closed(uobj, why);
- if (why == RDMA_REMOVE_DESTROY && ret)
+ if (ib_is_destroy_retryable(ret, why, uobj))
return ret;
if (why == RDMA_REMOVE_DURING_CLEANUP) {
@@ -401,7 +419,7 @@ static int __must_check remove_commit_fd_uobject(struct ib_uobject *uobj,
return ret;
}
- uobj_file->uobj.context = NULL;
+ uobj->context = NULL;
return ret;
}
@@ -418,18 +436,18 @@ static void assert_uverbs_usecnt(struct ib_uobject *uobj, bool exclusive)
static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
+ struct ib_uverbs_file *ufile = uobj->ufile;
int ret;
- struct ib_ucontext *ucontext = uobj->context;
ret = uobj->type->type_class->remove_commit(uobj, why);
- if (ret && why == RDMA_REMOVE_DESTROY) {
+ if (ib_is_destroy_retryable(ret, why, uobj)) {
/* We couldn't remove the object, so just unlock the uobject */
atomic_set(&uobj->usecnt, 0);
uobj->type->type_class->lookup_put(uobj, true);
} else {
- mutex_lock(&ucontext->uobjects_lock);
+ mutex_lock(&ufile->uobjects_lock);
list_del(&uobj->list);
- mutex_unlock(&ucontext->uobjects_lock);
+ mutex_unlock(&ufile->uobjects_lock);
/* put the ref we took when we created the object */
uverbs_uobject_put(uobj);
}
@@ -441,19 +459,19 @@ static int __must_check _rdma_remove_commit_uobject(struct ib_uobject *uobj,
int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj)
{
int ret;
- struct ib_ucontext *ucontext = uobj->context;
+ struct ib_uverbs_file *ufile = uobj->ufile;
/* put the ref count we took at lookup_get */
uverbs_uobject_put(uobj);
/* Cleanup is running. Calling this should have been impossible */
- if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+ if (!down_read_trylock(&ufile->cleanup_rwsem)) {
WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
return 0;
}
assert_uverbs_usecnt(uobj, true);
ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_DESTROY);
- up_read(&ucontext->cleanup_rwsem);
+ up_read(&ufile->cleanup_rwsem);
return ret;
}
@@ -473,10 +491,10 @@ static const struct uverbs_obj_type null_obj_type = {
int rdma_explicit_destroy(struct ib_uobject *uobject)
{
int ret;
- struct ib_ucontext *ucontext = uobject->context;
+ struct ib_uverbs_file *ufile = uobject->ufile;
/* Cleanup is running. Calling this should have been impossible */
- if (!down_read_trylock(&ucontext->cleanup_rwsem)) {
+ if (!down_read_trylock(&ufile->cleanup_rwsem)) {
WARN(true, "ib_uverbs: Cleanup is running while removing an uobject\n");
return 0;
}
@@ -489,38 +507,36 @@ int rdma_explicit_destroy(struct ib_uobject *uobject)
uobject->type = &null_obj_type;
out:
- up_read(&ucontext->cleanup_rwsem);
+ up_read(&ufile->cleanup_rwsem);
return ret;
}
static void alloc_commit_idr_uobject(struct ib_uobject *uobj)
{
- spin_lock(&uobj->context->ufile->idr_lock);
+ spin_lock(&uobj->ufile->idr_lock);
/*
* We already allocated this IDR with a NULL object, so
* this shouldn't fail.
*/
- WARN_ON(idr_replace(&uobj->context->ufile->idr,
- uobj, uobj->id));
- spin_unlock(&uobj->context->ufile->idr_lock);
+ WARN_ON(idr_replace(&uobj->ufile->idr, uobj, uobj->id));
+ spin_unlock(&uobj->ufile->idr_lock);
}
static void alloc_commit_fd_uobject(struct ib_uobject *uobj)
{
- struct ib_uobject_file *uobj_file =
- container_of(uobj, struct ib_uobject_file, uobj);
-
- fd_install(uobj_file->uobj.id, uobj->object);
+ fd_install(uobj->id, uobj->object);
/* This shouldn't be used anymore. Use the file object instead */
- uobj_file->uobj.id = 0;
+ uobj->id = 0;
/* Get another reference as we export this to the fops */
- uverbs_uobject_get(&uobj_file->uobj);
+ uverbs_uobject_get(uobj);
}
int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
{
+ struct ib_uverbs_file *ufile = uobj->ufile;
+
/* Cleanup is running. Calling this should have been impossible */
- if (!down_read_trylock(&uobj->context->cleanup_rwsem)) {
+ if (!down_read_trylock(&ufile->cleanup_rwsem)) {
int ret;
WARN(true, "ib_uverbs: Cleanup is running while allocating an uobject\n");
@@ -536,12 +552,12 @@ int rdma_alloc_commit_uobject(struct ib_uobject *uobj)
assert_uverbs_usecnt(uobj, true);
atomic_set(&uobj->usecnt, 0);
- mutex_lock(&uobj->context->uobjects_lock);
- list_add(&uobj->list, &uobj->context->uobjects);
- mutex_unlock(&uobj->context->uobjects_lock);
+ mutex_lock(&ufile->uobjects_lock);
+ list_add(&uobj->list, &ufile->uobjects);
+ mutex_unlock(&ufile->uobjects_lock);
uobj->type->type_class->alloc_commit(uobj);
- up_read(&uobj->context->cleanup_rwsem);
+ up_read(&ufile->cleanup_rwsem);
return 0;
}
@@ -611,23 +627,22 @@ const struct uverbs_obj_type_class uverbs_idr_class = {
*/
.needs_kfree_rcu = true,
};
+EXPORT_SYMBOL(uverbs_idr_class);
-static void _uverbs_close_fd(struct ib_uobject_file *uobj_file)
+static void _uverbs_close_fd(struct ib_uobject *uobj)
{
- struct ib_ucontext *ucontext;
- struct ib_uverbs_file *ufile = uobj_file->ufile;
+ struct ib_uverbs_file *ufile = uobj->ufile;
int ret;
- mutex_lock(&uobj_file->ufile->cleanup_mutex);
+ mutex_lock(&ufile->cleanup_mutex);
/* uobject was either already cleaned up or is cleaned up right now anyway */
- if (!uobj_file->uobj.context ||
- !down_read_trylock(&uobj_file->uobj.context->cleanup_rwsem))
+ if (!uobj->context ||
+ !down_read_trylock(&ufile->cleanup_rwsem))
goto unlock;
- ucontext = uobj_file->uobj.context;
- ret = _rdma_remove_commit_uobject(&uobj_file->uobj, RDMA_REMOVE_CLOSE);
- up_read(&ucontext->cleanup_rwsem);
+ ret = _rdma_remove_commit_uobject(uobj, RDMA_REMOVE_CLOSE);
+ up_read(&ufile->cleanup_rwsem);
if (ret)
pr_warn("uverbs: unable to clean up uobject file in uverbs_close_fd.\n");
unlock:
@@ -636,78 +651,87 @@ unlock:
void uverbs_close_fd(struct file *f)
{
- struct ib_uobject_file *uobj_file = f->private_data;
- struct kref *uverbs_file_ref = &uobj_file->ufile->ref;
+ struct ib_uobject *uobj = f->private_data;
+ struct kref *uverbs_file_ref = &uobj->ufile->ref;
- _uverbs_close_fd(uobj_file);
- uverbs_uobject_put(&uobj_file->uobj);
+ _uverbs_close_fd(uobj);
+ uverbs_uobject_put(uobj);
kref_put(uverbs_file_ref, ib_uverbs_release_file);
}
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed)
+static int __uverbs_cleanup_ufile(struct ib_uverbs_file *ufile,
+ enum rdma_remove_reason reason)
{
- enum rdma_remove_reason reason = device_removed ?
- RDMA_REMOVE_DRIVER_REMOVE : RDMA_REMOVE_CLOSE;
- unsigned int cur_order = 0;
+ struct ib_uobject *obj, *next_obj;
+ int ret = -EINVAL;
+ int err = 0;
- ucontext->cleanup_reason = reason;
/*
- * Waits for all remove_commit and alloc_commit to finish. Logically, We
- * want to hold this forever as the context is going to be destroyed,
- * but we'll release it since it causes a "held lock freed" BUG message.
+ * This shouldn't run while executing other commands on this
+ * context. Thus, the only thing we should take care of is
+ * releasing a FD while traversing this list. The FD could be
+ * closed and released from the _release fop of this FD.
+ * In order to mitigate this, we add a lock.
+ * We take and release the lock per traversal in order to let
+ * other threads (which might still use the FDs) chance to run.
*/
- down_write(&ucontext->cleanup_rwsem);
-
- while (!list_empty(&ucontext->uobjects)) {
- struct ib_uobject *obj, *next_obj;
- unsigned int next_order = UINT_MAX;
-
+ mutex_lock(&ufile->uobjects_lock);
+ ufile->cleanup_reason = reason;
+ list_for_each_entry_safe(obj, next_obj, &ufile->uobjects, list) {
/*
- * This shouldn't run while executing other commands on this
- * context. Thus, the only thing we should take care of is
- * releasing a FD while traversing this list. The FD could be
- * closed and released from the _release fop of this FD.
- * In order to mitigate this, we add a lock.
- * We take and release the lock per order traversal in order
- * to let other threads (which might still use the FDs) chance
- * to run.
+ * if we hit this WARN_ON, that means we are
+ * racing with a lookup_get.
*/
- mutex_lock(&ucontext->uobjects_lock);
- list_for_each_entry_safe(obj, next_obj, &ucontext->uobjects,
- list) {
- if (obj->type->destroy_order == cur_order) {
- int ret;
-
- /*
- * if we hit this WARN_ON, that means we are
- * racing with a lookup_get.
- */
- WARN_ON(uverbs_try_lock_object(obj, true));
- ret = obj->type->type_class->remove_commit(obj,
- reason);
- list_del(&obj->list);
- if (ret)
- pr_warn("ib_uverbs: failed to remove uobject id %d order %u\n",
- obj->id, cur_order);
- /* put the ref we took when we created the object */
- uverbs_uobject_put(obj);
- } else {
- next_order = min(next_order,
- obj->type->destroy_order);
- }
+ WARN_ON(uverbs_try_lock_object(obj, true));
+ err = obj->type->type_class->remove_commit(obj, reason);
+
+ if (ib_is_destroy_retryable(err, reason, obj)) {
+ pr_debug("ib_uverbs: failed to remove uobject id %d err %d\n",
+ obj->id, err);
+ atomic_set(&obj->usecnt, 0);
+ continue;
}
- mutex_unlock(&ucontext->uobjects_lock);
- cur_order = next_order;
+
+ if (err)
+ pr_err("ib_uverbs: unable to remove uobject id %d err %d\n",
+ obj->id, err);
+
+ list_del(&obj->list);
+ /* put the ref we took when we created the object */
+ uverbs_uobject_put(obj);
+ ret = 0;
}
- up_write(&ucontext->cleanup_rwsem);
+ mutex_unlock(&ufile->uobjects_lock);
+ return ret;
}
-void uverbs_initialize_ucontext(struct ib_ucontext *ucontext)
+void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed)
{
- ucontext->cleanup_reason = 0;
- mutex_init(&ucontext->uobjects_lock);
- INIT_LIST_HEAD(&ucontext->uobjects);
- init_rwsem(&ucontext->cleanup_rwsem);
+ enum rdma_remove_reason reason = device_removed ?
+ RDMA_REMOVE_DRIVER_REMOVE :
+ RDMA_REMOVE_CLOSE;
+
+ /*
+ * Waits for all remove_commit and alloc_commit to finish. Logically, We
+ * want to hold this forever as the context is going to be destroyed,
+ * but we'll release it since it causes a "held lock freed" BUG message.
+ */
+ down_write(&ufile->cleanup_rwsem);
+ ufile->ucontext->cleanup_retryable = true;
+ while (!list_empty(&ufile->uobjects))
+ if (__uverbs_cleanup_ufile(ufile, reason)) {
+ /*
+ * No entry was cleaned-up successfully during this
+ * iteration
+ */
+ break;
+ }
+
+ ufile->ucontext->cleanup_retryable = false;
+ if (!list_empty(&ufile->uobjects))
+ __uverbs_cleanup_ufile(ufile, reason);
+
+ up_write(&ufile->cleanup_rwsem);
}
const struct uverbs_obj_type_class uverbs_fd_class = {
@@ -719,20 +743,21 @@ const struct uverbs_obj_type_class uverbs_fd_class = {
.remove_commit = remove_commit_fd_uobject,
.needs_kfree_rcu = false,
};
+EXPORT_SYMBOL(uverbs_fd_class);
-struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
- struct ib_ucontext *ucontext,
- enum uverbs_obj_access access,
- int id)
+struct ib_uobject *
+uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
+ struct ib_uverbs_file *ufile,
+ enum uverbs_obj_access access, int id)
{
switch (access) {
case UVERBS_ACCESS_READ:
- return rdma_lookup_get_uobject(type_attrs, ucontext, id, false);
+ return rdma_lookup_get_uobject(type_attrs, ufile, id, false);
case UVERBS_ACCESS_DESTROY:
case UVERBS_ACCESS_WRITE:
- return rdma_lookup_get_uobject(type_attrs, ucontext, id, true);
+ return rdma_lookup_get_uobject(type_attrs, ufile, id, true);
case UVERBS_ACCESS_NEW:
- return rdma_alloc_begin_uobject(type_attrs, ucontext);
+ return rdma_alloc_begin_uobject(type_attrs, ufile);
default:
WARN_ON(true);
return ERR_PTR(-EOPNOTSUPP);
@@ -777,43 +802,3 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
return ret;
}
-
-int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
- struct uverbs_attr_spec_hash * const *spec_hash,
- size_t num,
- bool commit)
-{
- unsigned int i;
- int ret = 0;
-
- for (i = 0; i < num; i++) {
- struct uverbs_attr_bundle_hash *curr_bundle =
- &attrs_bundle->hash[i];
- const struct uverbs_attr_spec_hash *curr_spec_bucket =
- spec_hash[i];
- unsigned int j;
-
- for (j = 0; j < curr_bundle->num_attrs; j++) {
- struct uverbs_attr *attr;
- const struct uverbs_attr_spec *spec;
-
- if (!uverbs_attr_is_valid_in_hash(curr_bundle, j))
- continue;
-
- attr = &curr_bundle->attrs[j];
- spec = &curr_spec_bucket->attrs[j];
-
- if (spec->type == UVERBS_ATTR_TYPE_IDR ||
- spec->type == UVERBS_ATTR_TYPE_FD) {
- int current_ret;
-
- current_ret = uverbs_finalize_object(attr->obj_attr.uobject,
- spec->obj.access,
- commit);
- if (!ret)
- ret = current_ret;
- }
- }
- }
- return ret;
-}
diff --git a/drivers/infiniband/core/rdma_core.h b/drivers/infiniband/core/rdma_core.h
index 1efcf93238dd..1bba60e960c1 100644
--- a/drivers/infiniband/core/rdma_core.h
+++ b/drivers/infiniband/core/rdma_core.h
@@ -44,19 +44,12 @@
#include <linux/mutex.h>
int uverbs_ns_idx(u16 *id, unsigned int ns_count);
-const struct uverbs_object_spec *uverbs_get_object(const struct ib_device *ibdev,
+const struct uverbs_object_spec *uverbs_get_object(struct ib_uverbs_file *ufile,
uint16_t object);
const struct uverbs_method_spec *uverbs_get_method(const struct uverbs_object_spec *object,
uint16_t method);
-/*
- * These functions initialize the context and cleanups its uobjects.
- * The context has a list of objects which is protected by a mutex
- * on the context. initialize_ucontext should be called when we create
- * a context.
- * cleanup_ucontext removes all uobjects from the context and puts them.
- */
-void uverbs_cleanup_ucontext(struct ib_ucontext *ucontext, bool device_removed);
-void uverbs_initialize_ucontext(struct ib_ucontext *ucontext);
+
+void uverbs_cleanup_ufile(struct ib_uverbs_file *ufile, bool device_removed);
/*
* uverbs_uobject_get is called in order to increase the reference count on
@@ -82,7 +75,7 @@ void uverbs_uobject_put(struct ib_uobject *uobject);
void uverbs_close_fd(struct file *f);
/*
- * Get an ib_uobject that corresponds to the given id from ucontext, assuming
+ * Get an ib_uobject that corresponds to the given id from ufile, assuming
* the object is from the given type. Lock it to the required access when
* applicable.
* This function could create (access == NEW), destroy (access == DESTROY)
@@ -90,13 +83,11 @@ void uverbs_close_fd(struct file *f);
* The action will be finalized only when uverbs_finalize_object or
* uverbs_finalize_objects are called.
*/
-struct ib_uobject *uverbs_get_uobject_from_context(const struct uverbs_obj_type *type_attrs,
- struct ib_ucontext *ucontext,
- enum uverbs_obj_access access,
- int id);
-int uverbs_finalize_object(struct ib_uobject *uobj,
- enum uverbs_obj_access access,
- bool commit);
+struct ib_uobject *
+uverbs_get_uobject_from_file(const struct uverbs_obj_type *type_attrs,
+ struct ib_uverbs_file *ufile,
+ enum uverbs_obj_access access, int id);
+
/*
* Note that certain finalize stages could return a status:
* (a) alloc_commit could return a failure if the object is committed at the
@@ -112,9 +103,8 @@ int uverbs_finalize_object(struct ib_uobject *uobj,
* function. For example, this could happen when we couldn't destroy an
* object.
*/
-int uverbs_finalize_objects(struct uverbs_attr_bundle *attrs_bundle,
- struct uverbs_attr_spec_hash * const *spec_hash,
- size_t num,
- bool commit);
+int uverbs_finalize_object(struct ib_uobject *uobj,
+ enum uverbs_obj_access access,
+ bool commit);
#endif /* RDMA_CORE_H */
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index c8963e91f92a..474d65297afc 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -87,7 +87,7 @@ static int rdma_rw_init_one_mr(struct ib_qp *qp, u8 port_num,
}
ret = ib_map_mr_sg(reg->mr, sg, nents, &offset, PAGE_SIZE);
- if (ret < nents) {
+ if (ret < 0 || ret < nents) {
ib_mr_pool_put(qp, &qp->rdma_mrs, reg->mr);
return -EINVAL;
}
@@ -325,7 +325,7 @@ out_unmap_sg:
EXPORT_SYMBOL(rdma_rw_ctx_init);
/**
- * rdma_rw_ctx_signature init - initialize a RW context with signature offload
+ * rdma_rw_ctx_signature_init - initialize a RW context with signature offload
* @ctx: context to initialize
* @qp: queue pair to operate on
* @port_num: port num to which the connection is bound
diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c
index a61ec7e33613..7b794a14d6e8 100644
--- a/drivers/infiniband/core/sa_query.c
+++ b/drivers/infiniband/core/sa_query.c
@@ -1227,20 +1227,10 @@ static u8 get_src_path_mask(struct ib_device *device, u8 port_num)
return src_path_mask;
}
-static int
-roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
- struct sa_path_rec *rec)
+static int roce_resolve_route_from_path(struct sa_path_rec *rec,
+ const struct ib_gid_attr *attr)
{
- struct net_device *resolved_dev;
- struct net_device *ndev;
- struct net_device *idev;
- struct rdma_dev_addr dev_addr = {
- .bound_dev_if = ((sa_path_get_ifindex(rec) >= 0) ?
- sa_path_get_ifindex(rec) : 0),
- .net = sa_path_get_ndev(rec) ?
- sa_path_get_ndev(rec) :
- &init_net
- };
+ struct rdma_dev_addr dev_addr = {};
union {
struct sockaddr _sockaddr;
struct sockaddr_in _sockaddr_in;
@@ -1250,9 +1240,14 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
if (rec->roce.route_resolved)
return 0;
+ if (!attr || !attr->ndev)
+ return -EINVAL;
- if (!device->get_netdev)
- return -EOPNOTSUPP;
+ dev_addr.bound_dev_if = attr->ndev->ifindex;
+ /* TODO: Use net from the ib_gid_attr once it is added to it,
+ * until than, limit itself to init_net.
+ */
+ dev_addr.net = &init_net;
rdma_gid2ip(&sgid_addr._sockaddr, &rec->sgid);
rdma_gid2ip(&dgid_addr._sockaddr, &rec->dgid);
@@ -1268,60 +1263,52 @@ roce_resolve_route_from_path(struct ib_device *device, u8 port_num,
rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2)
return -EINVAL;
- idev = device->get_netdev(device, port_num);
- if (!idev)
- return -ENODEV;
-
- resolved_dev = dev_get_by_index(dev_addr.net,
- dev_addr.bound_dev_if);
- if (!resolved_dev) {
- ret = -ENODEV;
- goto done;
- }
- ndev = ib_get_ndev_from_path(rec);
- rcu_read_lock();
- if ((ndev && ndev != resolved_dev) ||
- (resolved_dev != idev &&
- !rdma_is_upper_dev_rcu(idev, resolved_dev)))
- ret = -EHOSTUNREACH;
- rcu_read_unlock();
- dev_put(resolved_dev);
- if (ndev)
- dev_put(ndev);
-done:
- dev_put(idev);
- if (!ret)
- rec->roce.route_resolved = true;
- return ret;
+ rec->roce.route_resolved = true;
+ return 0;
}
static int init_ah_attr_grh_fields(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *gid_attr)
{
enum ib_gid_type type = sa_conv_pathrec_to_gid_type(rec);
- struct net_device *ndev;
- u16 gid_index;
- int ret;
- ndev = ib_get_ndev_from_path(rec);
- ret = ib_find_cached_gid_by_port(device, &rec->sgid, type,
- port_num, ndev, &gid_index);
- if (ndev)
- dev_put(ndev);
- if (ret)
- return ret;
+ if (!gid_attr) {
+ gid_attr = rdma_find_gid_by_port(device, &rec->sgid, type,
+ port_num, NULL);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+ } else
+ rdma_hold_gid_attr(gid_attr);
- rdma_ah_set_grh(ah_attr, &rec->dgid,
- be32_to_cpu(rec->flow_label),
- gid_index, rec->hop_limit,
- rec->traffic_class);
+ rdma_move_grh_sgid_attr(ah_attr, &rec->dgid,
+ be32_to_cpu(rec->flow_label),
+ rec->hop_limit, rec->traffic_class,
+ gid_attr);
return 0;
}
+/**
+ * ib_init_ah_attr_from_path - Initialize address handle attributes based on
+ * an SA path record.
+ * @device: Device associated ah attributes initialization.
+ * @port_num: Port on the specified device.
+ * @rec: path record entry to use for ah attributes initialization.
+ * @ah_attr: address handle attributes to initialization from path record.
+ * @sgid_attr: SGID attribute to consider during initialization.
+ *
+ * When ib_init_ah_attr_from_path() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_destroy_ah_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_path().
+ */
int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr)
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *gid_attr)
{
int ret = 0;
@@ -1332,7 +1319,7 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
rdma_ah_set_static_rate(ah_attr, rec->rate);
if (sa_path_is_roce(rec)) {
- ret = roce_resolve_route_from_path(device, port_num, rec);
+ ret = roce_resolve_route_from_path(rec, gid_attr);
if (ret)
return ret;
@@ -1349,7 +1336,8 @@ int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
}
if (rec->hop_limit > 0 || sa_path_is_roce(rec))
- ret = init_ah_attr_grh_fields(device, port_num, rec, ah_attr);
+ ret = init_ah_attr_grh_fields(device, port_num,
+ rec, ah_attr, gid_attr);
return ret;
}
EXPORT_SYMBOL(ib_init_ah_attr_from_path);
@@ -1557,8 +1545,6 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
ARRAY_SIZE(path_rec_table),
mad->data, &rec);
rec.rec_type = SA_PATH_REC_TYPE_IB;
- sa_path_set_ndev(&rec, NULL);
- sa_path_set_ifindex(&rec, 0);
sa_path_set_dmac_zero(&rec);
if (query->conv_pr) {
@@ -2290,6 +2276,7 @@ static void update_sm_ah(struct work_struct *work)
struct ib_sa_sm_ah *new_ah;
struct ib_port_attr port_attr;
struct rdma_ah_attr ah_attr;
+ bool grh_required;
if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
pr_warn("Couldn't query port\n");
@@ -2314,16 +2301,27 @@ static void update_sm_ah(struct work_struct *work)
rdma_ah_set_dlid(&ah_attr, port_attr.sm_lid);
rdma_ah_set_sl(&ah_attr, port_attr.sm_sl);
rdma_ah_set_port_num(&ah_attr, port->port_num);
- if (port_attr.grh_required) {
- if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA) {
- rdma_ah_set_make_grd(&ah_attr, true);
- } else {
- rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
- rdma_ah_set_subnet_prefix(&ah_attr,
- cpu_to_be64(port_attr.subnet_prefix));
- rdma_ah_set_interface_id(&ah_attr,
- cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
- }
+
+ grh_required = rdma_is_grh_required(port->agent->device,
+ port->port_num);
+
+ /*
+ * The OPA sm_lid of 0xFFFF needs special handling so that it can be
+ * differentiated from a permissive LID of 0xFFFF. We set the
+ * grh_required flag here so the SA can program the DGID in the
+ * address handle appropriately
+ */
+ if (ah_attr.type == RDMA_AH_ATTR_TYPE_OPA &&
+ (grh_required ||
+ port_attr.sm_lid == be16_to_cpu(IB_LID_PERMISSIVE)))
+ rdma_ah_set_make_grd(&ah_attr, true);
+
+ if (ah_attr.type == RDMA_AH_ATTR_TYPE_IB && grh_required) {
+ rdma_ah_set_ah_flags(&ah_attr, IB_AH_GRH);
+ rdma_ah_set_subnet_prefix(&ah_attr,
+ cpu_to_be64(port_attr.subnet_prefix));
+ rdma_ah_set_interface_id(&ah_attr,
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID));
}
new_ah->ah = rdma_create_ah(port->agent->qp->pd, &ah_attr);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 31c7efaf8e7a..7fd14ead7b37 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -42,6 +42,7 @@
#include <rdma/ib_mad.h>
#include <rdma/ib_pma.h>
+#include <rdma/ib_cache.h>
struct ib_port;
@@ -346,7 +347,7 @@ static struct attribute *port_default_attrs[] = {
NULL
};
-static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
+static size_t print_ndev(const struct ib_gid_attr *gid_attr, char *buf)
{
if (!gid_attr->ndev)
return -EINVAL;
@@ -354,33 +355,26 @@ static size_t print_ndev(struct ib_gid_attr *gid_attr, char *buf)
return sprintf(buf, "%s\n", gid_attr->ndev->name);
}
-static size_t print_gid_type(struct ib_gid_attr *gid_attr, char *buf)
+static size_t print_gid_type(const struct ib_gid_attr *gid_attr, char *buf)
{
return sprintf(buf, "%s\n", ib_cache_gid_type_str(gid_attr->gid_type));
}
-static ssize_t _show_port_gid_attr(struct ib_port *p,
- struct port_attribute *attr,
- char *buf,
- size_t (*print)(struct ib_gid_attr *gid_attr,
- char *buf))
+static ssize_t _show_port_gid_attr(
+ struct ib_port *p, struct port_attribute *attr, char *buf,
+ size_t (*print)(const struct ib_gid_attr *gid_attr, char *buf))
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- union ib_gid gid;
- struct ib_gid_attr gid_attr = {};
+ const struct ib_gid_attr *gid_attr;
ssize_t ret;
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid,
- &gid_attr);
- if (ret)
- goto err;
+ gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
- ret = print(&gid_attr, buf);
-
-err:
- if (gid_attr.ndev)
- dev_put(gid_attr.ndev);
+ ret = print(gid_attr, buf);
+ rdma_put_gid_attr(gid_attr);
return ret;
}
@@ -389,26 +383,28 @@ static ssize_t show_port_gid(struct ib_port *p, struct port_attribute *attr,
{
struct port_table_attribute *tab_attr =
container_of(attr, struct port_table_attribute, attr);
- union ib_gid *pgid;
- union ib_gid gid;
+ const struct ib_gid_attr *gid_attr;
ssize_t ret;
- ret = ib_query_gid(p->ibdev, p->port_num, tab_attr->index, &gid, NULL);
+ gid_attr = rdma_get_gid_attr(p->ibdev, p->port_num, tab_attr->index);
+ if (IS_ERR(gid_attr)) {
+ const union ib_gid zgid = {};
+
+ /* If reading GID fails, it is likely due to GID entry being
+ * empty (invalid) or reserved GID in the table. User space
+ * expects to read GID table entries as long as it given index
+ * is within GID table size. Administrative/debugging tool
+ * fails to query rest of the GID entries if it hits error
+ * while querying a GID of the given index. To avoid user
+ * space throwing such error on fail to read gid, return zero
+ * GID as before. This maintains backward compatibility.
+ */
+ return sprintf(buf, "%pI6\n", zgid.raw);
+ }
- /* If reading GID fails, it is likely due to GID entry being empty
- * (invalid) or reserved GID in the table.
- * User space expects to read GID table entries as long as it given
- * index is within GID table size.
- * Administrative/debugging tool fails to query rest of the GID entries
- * if it hits error while querying a GID of the given index.
- * To avoid user space throwing such error on fail to read gid, return
- * zero GID as before. This maintains backward compatibility.
- */
- if (ret)
- pgid = &zgid;
- else
- pgid = &gid;
- return sprintf(buf, "%pI6\n", pgid->raw);
+ ret = sprintf(buf, "%pI6\n", gid_attr->gid.raw);
+ rdma_put_gid_attr(gid_attr);
+ return ret;
}
static ssize_t show_port_gid_attr_ndev(struct ib_port *p,
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 54ab6335c48d..a41792dbae1f 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -84,7 +84,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
struct ib_umem *umem;
struct page **page_list;
struct vm_area_struct **vma_list;
- unsigned long locked;
unsigned long lock_limit;
unsigned long cur_base;
unsigned long npages;
@@ -92,7 +91,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
int i;
unsigned long dma_attrs = 0;
struct scatterlist *sg, *sg_list_start;
- int need_release = 0;
unsigned int gup_flags = FOLL_WRITE;
if (dmasync)
@@ -121,10 +119,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (access & IB_ACCESS_ON_DEMAND) {
ret = ib_umem_odp_get(context, umem, access);
- if (ret) {
- kfree(umem);
- return ERR_PTR(ret);
- }
+ if (ret)
+ goto umem_kfree;
return umem;
}
@@ -135,8 +131,8 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list) {
- kfree(umem);
- return ERR_PTR(-ENOMEM);
+ ret = -ENOMEM;
+ goto umem_kfree;
}
/*
@@ -149,41 +145,43 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
npages = ib_umem_num_pages(umem);
- down_write(&current->mm->mmap_sem);
-
- locked = npages + current->mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ down_write(&current->mm->mmap_sem);
+ current->mm->pinned_vm += npages;
+ if ((current->mm->pinned_vm > lock_limit) && !capable(CAP_IPC_LOCK)) {
+ up_write(&current->mm->mmap_sem);
ret = -ENOMEM;
- goto out;
+ goto vma;
}
+ up_write(&current->mm->mmap_sem);
cur_base = addr & PAGE_MASK;
if (npages == 0 || npages > UINT_MAX) {
ret = -EINVAL;
- goto out;
+ goto vma;
}
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
if (ret)
- goto out;
+ goto vma;
if (!umem->writable)
gup_flags |= FOLL_FORCE;
- need_release = 1;
sg_list_start = umem->sg_head.sgl;
+ down_read(&current->mm->mmap_sem);
while (npages) {
ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof (struct page *)),
gup_flags, page_list, vma_list);
-
- if (ret < 0)
- goto out;
+ if (ret < 0) {
+ up_read(&current->mm->mmap_sem);
+ goto umem_release;
+ }
umem->npages += ret;
cur_base += ret * PAGE_SIZE;
@@ -199,6 +197,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
/* preparing for next loop */
sg_list_start = sg;
}
+ up_read(&current->mm->mmap_sem);
umem->nmap = ib_dma_map_sg_attrs(context->device,
umem->sg_head.sgl,
@@ -206,27 +205,28 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
DMA_BIDIRECTIONAL,
dma_attrs);
- if (umem->nmap <= 0) {
+ if (!umem->nmap) {
ret = -ENOMEM;
- goto out;
+ goto umem_release;
}
ret = 0;
+ goto out;
-out:
- if (ret < 0) {
- if (need_release)
- __ib_umem_release(context->device, umem, 0);
- kfree(umem);
- } else
- current->mm->pinned_vm = locked;
-
+umem_release:
+ __ib_umem_release(context->device, umem, 0);
+vma:
+ down_write(&current->mm->mmap_sem);
+ current->mm->pinned_vm -= ib_umem_num_pages(umem);
up_write(&current->mm->mmap_sem);
+out:
if (vma_list)
free_page((unsigned long) vma_list);
free_page((unsigned long) page_list);
-
- return ret < 0 ? ERR_PTR(ret) : umem;
+umem_kfree:
+ if (ret)
+ kfree(umem);
+ return ret ? ERR_PTR(ret) : umem;
}
EXPORT_SYMBOL(ib_umem_get);
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index bb98c9e4a7fd..c34a6852d691 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -268,6 +268,7 @@ static void recv_handler(struct ib_mad_agent *agent,
packet->mad.hdr.traffic_class = grh->traffic_class;
memcpy(packet->mad.hdr.gid, &grh->dgid, 16);
packet->mad.hdr.flow_label = cpu_to_be32(grh->flow_label);
+ rdma_destroy_ah_attr(&ah_attr);
}
if (queue_packet(file, agent, packet))
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index c0d40fc3a53a..d0a1a54275e5 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -130,7 +130,7 @@ struct ib_uverbs_async_event_file {
};
struct ib_uverbs_completion_event_file {
- struct ib_uobject_file uobj_file;
+ struct ib_uobject uobj;
struct ib_uverbs_event_queue ev_queue;
};
@@ -145,6 +145,14 @@ struct ib_uverbs_file {
struct list_head list;
int is_closed;
+ /* locking the uobjects_list */
+ struct mutex uobjects_lock;
+ struct list_head uobjects;
+
+ /* protects cleanup process from other actions */
+ struct rw_semaphore cleanup_rwsem;
+ enum rdma_remove_reason cleanup_reason;
+
struct idr idr;
/* spinlock protects write access to idr */
spinlock_t idr_lock;
@@ -196,7 +204,6 @@ struct ib_uwq_object {
struct ib_ucq_object {
struct ib_uobject uobject;
- struct ib_uverbs_file *uverbs_file;
struct list_head comp_list;
struct list_head async_list;
u32 comp_events_reported;
@@ -230,7 +237,7 @@ void ib_uverbs_wq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
-int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev, struct ib_xrcd *xrcd,
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject, struct ib_xrcd *xrcd,
enum rdma_remove_reason why);
int uverbs_dealloc_mw(struct ib_mw *mw);
@@ -238,12 +245,7 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
struct ib_uqp_object *uobj);
void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata);
-extern const struct uverbs_attr_def uverbs_uhw_compat_in;
-extern const struct uverbs_attr_def uverbs_uhw_compat_out;
long ib_uverbs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-int uverbs_destroy_def_handler(struct ib_device *ib_dev,
- struct ib_uverbs_file *file,
- struct uverbs_attr_bundle *attrs);
struct ib_uverbs_flow_spec {
union {
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 3e90b6a1d9d2..bd6eefaecbd6 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -48,11 +48,10 @@
#include "core_priv.h"
static struct ib_uverbs_completion_event_file *
-ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
+ib_uverbs_lookup_comp_file(int fd, struct ib_uverbs_file *ufile)
{
struct ib_uobject *uobj = uobj_get_read(UVERBS_OBJECT_COMP_CHANNEL,
- fd, context);
- struct ib_uobject_file *uobj_file;
+ fd, ufile);
if (IS_ERR(uobj))
return (void *)uobj;
@@ -60,9 +59,8 @@ ib_uverbs_lookup_comp_file(int fd, struct ib_ucontext *context)
uverbs_uobject_get(uobj);
uobj_put_read(uobj);
- uobj_file = container_of(uobj, struct ib_uobject_file, uobj);
- return container_of(uobj_file, struct ib_uverbs_completion_event_file,
- uobj_file);
+ return container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
}
ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
@@ -110,12 +108,12 @@ ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file,
ucontext->cg_obj = cg_obj;
/* ufile is required when some objects are released */
ucontext->ufile = file;
- uverbs_initialize_ucontext(ucontext);
rcu_read_lock();
ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
rcu_read_unlock();
ucontext->closing = 0;
+ ucontext->cleanup_retryable = false;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
ucontext->umem_tree = RB_ROOT_CACHED;
@@ -189,7 +187,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
resp->max_qp = attr->max_qp;
resp->max_qp_wr = attr->max_qp_wr;
resp->device_cap_flags = lower_32_bits(attr->device_cap_flags);
- resp->max_sge = attr->max_sge;
+ resp->max_sge = min(attr->max_send_sge, attr->max_recv_sge);
resp->max_sge_rd = attr->max_sge_rd;
resp->max_cq = attr->max_cq;
resp->max_cqe = attr->max_cqe;
@@ -243,6 +241,27 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
return in_len;
}
+/*
+ * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
+ * PortInfo CapabilityMask, but was extended with unique bits.
+ */
+static u32 make_port_cap_flags(const struct ib_port_attr *attr)
+{
+ u32 res;
+
+ /* All IBA CapabilityMask bits are passed through here, except bit 26,
+ * which is overridden with IP_BASED_GIDS. This is due to a historical
+ * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
+ * bits match the IBA definition across all kernel versions.
+ */
+ res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ if (attr->ip_gids)
+ res |= IB_UVERBS_PCF_IP_BASED_GIDS;
+
+ return res;
+}
+
ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
struct ib_device *ib_dev,
const char __user *buf,
@@ -269,12 +288,15 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
resp.max_mtu = attr.max_mtu;
resp.active_mtu = attr.active_mtu;
resp.gid_tbl_len = attr.gid_tbl_len;
- resp.port_cap_flags = attr.port_cap_flags;
+ resp.port_cap_flags = make_port_cap_flags(&attr);
resp.max_msg_sz = attr.max_msg_sz;
resp.bad_pkey_cntr = attr.bad_pkey_cntr;
resp.qkey_viol_cntr = attr.qkey_viol_cntr;
resp.pkey_tbl_len = attr.pkey_tbl_len;
+ if (rdma_is_grh_required(ib_dev, cmd.port_num))
+ resp.flags |= IB_UVERBS_QPF_GRH_REQUIRED;
+
if (rdma_cap_opa_ah(ib_dev, cmd.port_num)) {
resp.lid = OPA_TO_IB_UCAST_LID(attr.lid);
resp.sm_lid = OPA_TO_IB_UCAST_LID(attr.sm_lid);
@@ -322,7 +344,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(UVERBS_OBJECT_PD, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_PD, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -366,20 +388,12 @@ ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file,
int in_len, int out_len)
{
struct ib_uverbs_dealloc_pd cmd;
- struct ib_uobject *uobj;
- int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_PD, cmd.pd_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
-
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_PD, cmd.pd_handle, file,
+ in_len);
}
struct xrcd_table_entry {
@@ -517,8 +531,7 @@ ssize_t ib_uverbs_open_xrcd(struct ib_uverbs_file *file,
}
}
- obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD,
- file->ucontext);
+ obj = (struct ib_uxrcd_object *)uobj_alloc(UVERBS_OBJECT_XRCD, file);
if (IS_ERR(obj)) {
ret = PTR_ERR(obj);
goto err_tree_mutex_unlock;
@@ -596,27 +609,21 @@ ssize_t ib_uverbs_close_xrcd(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_close_xrcd cmd;
- struct ib_uobject *uobj;
- int ret = 0;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_XRCD, cmd.xrcd_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_XRCD, cmd.xrcd_handle, file,
+ in_len);
}
-int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
+int ib_uverbs_dealloc_xrcd(struct ib_uobject *uobject,
struct ib_xrcd *xrcd,
enum rdma_remove_reason why)
{
struct inode *inode;
int ret;
+ struct ib_uverbs_device *dev = uobject->context->ufile->device;
inode = xrcd->inode;
if (inode && !atomic_dec_and_test(&xrcd->usecnt))
@@ -624,9 +631,12 @@ int ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
ret = ib_dealloc_xrcd(xrcd);
- if (why == RDMA_REMOVE_DESTROY && ret)
+ if (ib_is_destroy_retryable(ret, why, uobject)) {
atomic_inc(&xrcd->usecnt);
- else if (inode)
+ return ret;
+ }
+
+ if (inode)
xrcd_table_delete(dev, inode);
return ret;
@@ -663,11 +673,11 @@ ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file,
if (ret)
return ret;
- uobj = uobj_alloc(UVERBS_OBJECT_MR, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MR, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -759,8 +769,7 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
(cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)))
return -EINVAL;
- uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
- file->ucontext);
+ uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -778,7 +787,8 @@ ssize_t ib_uverbs_rereg_mr(struct ib_uverbs_file *file,
}
if (cmd.flags & IB_MR_REREG_PD) {
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle,
+ file);
if (!pd) {
ret = -EINVAL;
goto put_uobjs;
@@ -824,20 +834,12 @@ ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dereg_mr cmd;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_MR, cmd.mr_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
-
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_MR, cmd.mr_handle, file,
+ in_len);
}
ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
@@ -859,11 +861,11 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_alloc(UVERBS_OBJECT_MW, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_MW, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
if (!pd) {
ret = -EINVAL;
goto err_free;
@@ -916,19 +918,12 @@ ssize_t ib_uverbs_dealloc_mw(struct ib_uverbs_file *file,
int out_len)
{
struct ib_uverbs_dealloc_mw cmd;
- struct ib_uobject *uobj;
- int ret = -EINVAL;
if (copy_from_user(&cmd, buf, sizeof(cmd)))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_MW, cmd.mw_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_MW, cmd.mw_handle, file,
+ in_len);
}
ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
@@ -947,14 +942,14 @@ ssize_t ib_uverbs_create_comp_channel(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_COMP_CHANNEL, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
resp.fd = uobj->id;
ev_file = container_of(uobj, struct ib_uverbs_completion_event_file,
- uobj_file.uobj);
+ uobj);
ib_uverbs_init_event_queue(&ev_file->ev_queue);
if (copy_to_user(u64_to_user_ptr(cmd.response), &resp, sizeof resp)) {
@@ -992,14 +987,12 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
if (cmd->comp_vector >= file->device->num_comp_vectors)
return ERR_PTR(-EINVAL);
- obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ,
- file->ucontext);
+ obj = (struct ib_ucq_object *)uobj_alloc(UVERBS_OBJECT_CQ, file);
if (IS_ERR(obj))
return obj;
if (cmd->comp_channel >= 0) {
- ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel,
- file->ucontext);
+ ev_file = ib_uverbs_lookup_comp_file(cmd->comp_channel, file);
if (IS_ERR(ev_file)) {
ret = PTR_ERR(ev_file);
goto err;
@@ -1007,7 +1000,6 @@ static struct ib_ucq_object *create_cq(struct ib_uverbs_file *file,
}
obj->uobject.user_handle = cmd->user_handle;
- obj->uverbs_file = file;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
INIT_LIST_HEAD(&obj->comp_list);
@@ -1181,7 +1173,7 @@ ssize_t ib_uverbs_resize_cq(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq)
return -EINVAL;
@@ -1246,7 +1238,7 @@ ssize_t ib_uverbs_poll_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq)
return -EINVAL;
@@ -1293,7 +1285,7 @@ ssize_t ib_uverbs_req_notify_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq)
return -EINVAL;
@@ -1320,8 +1312,7 @@ ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle,
- file->ucontext);
+ uobj = uobj_get_write(UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -1379,8 +1370,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_RAW_PACKET && !capable(CAP_NET_RAW))
return -EPERM;
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
- file->ucontext);
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file);
if (IS_ERR(obj))
return PTR_ERR(obj);
obj->uxrcd = NULL;
@@ -1390,9 +1380,9 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd_sz >= offsetof(typeof(*cmd), rwq_ind_tbl_handle) +
sizeof(cmd->rwq_ind_tbl_handle) &&
(cmd->comp_mask & IB_UVERBS_CREATE_QP_MASK_IND_TABLE)) {
- ind_tbl = uobj_get_obj_read(rwq_ind_table, UVERBS_OBJECT_RWQ_IND_TBL,
- cmd->rwq_ind_tbl_handle,
- file->ucontext);
+ ind_tbl = uobj_get_obj_read(rwq_ind_table,
+ UVERBS_OBJECT_RWQ_IND_TBL,
+ cmd->rwq_ind_tbl_handle, file);
if (!ind_tbl) {
ret = -EINVAL;
goto err_put;
@@ -1418,7 +1408,7 @@ static int create_qp(struct ib_uverbs_file *file,
if (cmd->qp_type == IB_QPT_XRC_TGT) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->pd_handle,
- file->ucontext);
+ file);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
@@ -1437,8 +1427,8 @@ static int create_qp(struct ib_uverbs_file *file,
cmd->max_recv_sge = 0;
} else {
if (cmd->is_srq) {
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd->srq_handle,
- file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ,
+ cmd->srq_handle, file);
if (!srq || srq->srq_type == IB_SRQT_XRC) {
ret = -EINVAL;
goto err_put;
@@ -1447,8 +1437,9 @@ static int create_qp(struct ib_uverbs_file *file,
if (!ind_tbl) {
if (cmd->recv_cq_handle != cmd->send_cq_handle) {
- rcq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->recv_cq_handle,
- file->ucontext);
+ rcq = uobj_get_obj_read(
+ cq, UVERBS_OBJECT_CQ,
+ cmd->recv_cq_handle, file);
if (!rcq) {
ret = -EINVAL;
goto err_put;
@@ -1458,11 +1449,12 @@ static int create_qp(struct ib_uverbs_file *file,
}
if (has_sq)
- scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->send_cq_handle,
- file->ucontext);
+ scq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+ cmd->send_cq_handle, file);
if (!ind_tbl)
rcq = rcq ?: scq;
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle,
+ file);
if (!pd || (!scq && has_sq)) {
ret = -EINVAL;
goto err_put;
@@ -1759,13 +1751,11 @@ ssize_t ib_uverbs_open_qp(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP,
- file->ucontext);
+ obj = (struct ib_uqp_object *)uobj_alloc(UVERBS_OBJECT_QP, file);
if (IS_ERR(obj))
return PTR_ERR(obj);
- xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle,
- file->ucontext);
+ xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd.pd_handle, file);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err_put;
@@ -1867,7 +1857,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
goto out;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -1968,11 +1958,11 @@ static int modify_qp(struct ib_uverbs_file *file,
struct ib_qp *qp;
int ret;
- attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ attr = kzalloc(sizeof(*attr), GFP_KERNEL);
if (!attr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd->base.qp_handle, file);
if (!qp) {
ret = -EINVAL;
goto out;
@@ -2127,8 +2117,7 @@ ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file,
memset(&resp, 0, sizeof resp);
- uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle,
- file->ucontext);
+ uobj = uobj_get_write(UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -2193,7 +2182,7 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
if (!user_wr)
return -ENOMEM;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp)
goto out;
@@ -2229,8 +2218,8 @@ ssize_t ib_uverbs_post_send(struct ib_uverbs_file *file,
goto out_put;
}
- ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH, user_wr->wr.ud.ah,
- file->ucontext);
+ ud->ah = uobj_get_obj_read(ah, UVERBS_OBJECT_AH,
+ user_wr->wr.ud.ah, file);
if (!ud->ah) {
kfree(ud);
ret = -EINVAL;
@@ -2464,7 +2453,7 @@ ssize_t ib_uverbs_post_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp)
goto out;
@@ -2513,7 +2502,7 @@ ssize_t ib_uverbs_post_srq_recv(struct ib_uverbs_file *file,
if (IS_ERR(wr))
return PTR_ERR(wr);
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
if (!srq)
goto out;
@@ -2552,7 +2541,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
struct ib_uobject *uobj;
struct ib_pd *pd;
struct ib_ah *ah;
- struct rdma_ah_attr attr;
+ struct rdma_ah_attr attr = {};
int ret;
struct ib_udata udata;
@@ -2570,11 +2559,11 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file,
in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
out_len - sizeof(resp));
- uobj = uobj_alloc(UVERBS_OBJECT_AH, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_AH, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
if (!pd) {
ret = -EINVAL;
goto err;
@@ -2636,19 +2625,12 @@ ssize_t ib_uverbs_destroy_ah(struct ib_uverbs_file *file,
const char __user *buf, int in_len, int out_len)
{
struct ib_uverbs_destroy_ah cmd;
- struct ib_uobject *uobj;
- int ret;
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_AH, cmd.ah_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
- return ret ?: in_len;
+ return uobj_perform_destroy(UVERBS_OBJECT_AH, cmd.ah_handle, file,
+ in_len);
}
ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
@@ -2665,7 +2647,7 @@ ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp)
return -EINVAL;
@@ -2716,7 +2698,7 @@ ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp)
return -EINVAL;
@@ -2761,29 +2743,27 @@ static struct ib_uflow_resources *flow_resources_alloc(size_t num_specs)
resources = kzalloc(sizeof(*resources), GFP_KERNEL);
if (!resources)
- goto err_res;
+ return NULL;
+
+ if (!num_specs)
+ goto out;
resources->counters =
kcalloc(num_specs, sizeof(*resources->counters), GFP_KERNEL);
-
- if (!resources->counters)
- goto err_cnt;
-
resources->collection =
kcalloc(num_specs, sizeof(*resources->collection), GFP_KERNEL);
- if (!resources->collection)
- goto err_collection;
+ if (!resources->counters || !resources->collection)
+ goto err;
+out:
resources->max = num_specs;
-
return resources;
-err_collection:
+err:
kfree(resources->counters);
-err_cnt:
kfree(resources);
-err_res:
+
return NULL;
}
@@ -2826,7 +2806,7 @@ static void flow_resources_add(struct ib_uflow_resources *uflow_res,
uflow_res->num++;
}
-static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_file *ufile,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -2855,7 +2835,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
ib_spec->action.act = uobj_get_obj_read(flow_action,
UVERBS_OBJECT_FLOW_ACTION,
kern_spec->action.handle,
- ucontext);
+ ufile);
if (!ib_spec->action.act)
return -EINVAL;
ib_spec->action.size =
@@ -2873,7 +2853,7 @@ static int kern_spec_to_ib_spec_action(struct ib_ucontext *ucontext,
uobj_get_obj_read(counters,
UVERBS_OBJECT_COUNTERS,
kern_spec->flow_count.handle,
- ucontext);
+ ufile);
if (!ib_spec->flow_count.counters)
return -EINVAL;
ib_spec->flow_count.size =
@@ -3042,9 +3022,6 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
void *kern_spec_mask;
void *kern_spec_val;
- if (kern_spec->reserved)
- return -EINVAL;
-
kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr);
kern_spec_val = (void *)kern_spec +
@@ -3057,7 +3034,7 @@ static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
kern_filter_sz, ib_spec);
}
-static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
+static int kern_spec_to_ib_spec(struct ib_uverbs_file *ufile,
struct ib_uverbs_flow_spec *kern_spec,
union ib_flow_spec *ib_spec,
struct ib_uflow_resources *uflow_res)
@@ -3066,7 +3043,7 @@ static int kern_spec_to_ib_spec(struct ib_ucontext *ucontext,
return -EINVAL;
if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
- return kern_spec_to_ib_spec_action(ucontext, kern_spec, ib_spec,
+ return kern_spec_to_ib_spec_action(ufile, kern_spec, ib_spec,
uflow_res);
else
return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
@@ -3109,18 +3086,17 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ,
- file->ucontext);
+ obj = (struct ib_uwq_object *)uobj_alloc(UVERBS_OBJECT_WQ, file);
if (IS_ERR(obj))
return PTR_ERR(obj);
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd.pd_handle, file);
if (!pd) {
err = -EINVAL;
goto err_uobj;
}
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq) {
err = -EINVAL;
goto err_put_pd;
@@ -3224,8 +3200,7 @@ int ib_uverbs_ex_destroy_wq(struct ib_uverbs_file *file,
return -EOPNOTSUPP;
resp.response_length = required_resp_len;
- uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle,
- file->ucontext);
+ uobj = uobj_get_write(UVERBS_OBJECT_WQ, cmd.wq_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -3275,7 +3250,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
return -EINVAL;
- wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, cmd.wq_handle, file);
if (!wq)
return -EINVAL;
@@ -3369,8 +3344,8 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
for (num_read_wqs = 0; num_read_wqs < num_wq_handles;
num_read_wqs++) {
- wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ, wqs_handles[num_read_wqs],
- file->ucontext);
+ wq = uobj_get_obj_read(wq, UVERBS_OBJECT_WQ,
+ wqs_handles[num_read_wqs], file);
if (!wq) {
err = -EINVAL;
goto put_wqs;
@@ -3379,7 +3354,7 @@ int ib_uverbs_ex_create_rwq_ind_table(struct ib_uverbs_file *file,
wqs[num_read_wqs] = wq;
}
- uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_RWQ_IND_TBL, file);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto put_wqs;
@@ -3445,7 +3420,6 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_ex_destroy_rwq_ind_table cmd = {};
- struct ib_uobject *uobj;
int ret;
size_t required_cmd_sz;
@@ -3466,12 +3440,8 @@ int ib_uverbs_ex_destroy_rwq_ind_table(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EOPNOTSUPP;
- uobj = uobj_get_write(UVERBS_OBJECT_RWQ_IND_TBL, cmd.ind_tbl_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- return uobj_remove_commit(uobj);
+ return uobj_perform_destroy(UVERBS_OBJECT_RWQ_IND_TBL,
+ cmd.ind_tbl_handle, file, 0);
}
int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
@@ -3547,18 +3517,23 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kern_flow_attr = &cmd.flow_attr;
}
- uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file->ucontext);
+ uobj = uobj_alloc(UVERBS_OBJECT_FLOW, file);
if (IS_ERR(uobj)) {
err = PTR_ERR(uobj);
goto err_free_attr;
}
- qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file->ucontext);
+ qp = uobj_get_obj_read(qp, UVERBS_OBJECT_QP, cmd.qp_handle, file);
if (!qp) {
err = -EINVAL;
goto err_uobj;
}
+ if (!qp->device->create_flow) {
+ err = -EOPNOTSUPP;
+ goto err_put;
+ }
+
flow_attr = kzalloc(struct_size(flow_attr, flows,
cmd.flow_attr.num_of_specs), GFP_KERNEL);
if (!flow_attr) {
@@ -3584,8 +3559,7 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
cmd.flow_attr.size > offsetof(struct ib_uverbs_flow_spec, reserved) &&
cmd.flow_attr.size >=
((struct ib_uverbs_flow_spec *)kern_spec)->size; i++) {
- err = kern_spec_to_ib_spec(file->ucontext, kern_spec, ib_spec,
- uflow_res);
+ err = kern_spec_to_ib_spec(file, kern_spec, ib_spec, uflow_res);
if (err)
goto err_free;
@@ -3631,7 +3605,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
kfree(kern_flow_attr);
return 0;
err_copy:
- ib_destroy_flow(flow_id);
+ if (!qp->device->destroy_flow(flow_id))
+ atomic_dec(&qp->usecnt);
err_free:
ib_uverbs_flow_resources_free(uflow_res);
err_free_flow_attr:
@@ -3652,7 +3627,6 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
struct ib_udata *uhw)
{
struct ib_uverbs_destroy_flow cmd;
- struct ib_uobject *uobj;
int ret;
if (ucore->inlen < sizeof(cmd))
@@ -3665,13 +3639,8 @@ int ib_uverbs_ex_destroy_flow(struct ib_uverbs_file *file,
if (cmd.comp_mask)
return -EINVAL;
- uobj = uobj_get_write(UVERBS_OBJECT_FLOW, cmd.flow_handle,
- file->ucontext);
- if (IS_ERR(uobj))
- return PTR_ERR(uobj);
-
- ret = uobj_remove_commit(uobj);
- return ret;
+ return uobj_perform_destroy(UVERBS_OBJECT_FLOW, cmd.flow_handle, file,
+ 0);
}
static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
@@ -3687,8 +3656,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
struct ib_srq_init_attr attr;
int ret;
- obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ,
- file->ucontext);
+ obj = (struct ib_usrq_object *)uobj_alloc(UVERBS_OBJECT_SRQ, file);
if (IS_ERR(obj))
return PTR_ERR(obj);
@@ -3697,7 +3665,7 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
if (cmd->srq_type == IB_SRQT_XRC) {
xrcd_uobj = uobj_get_read(UVERBS_OBJECT_XRCD, cmd->xrcd_handle,
- file->ucontext);
+ file);
if (IS_ERR(xrcd_uobj)) {
ret = -EINVAL;
goto err;
@@ -3714,15 +3682,15 @@ static int __uverbs_create_xsrq(struct ib_uverbs_file *file,
}
if (ib_srq_has_cq(cmd->srq_type)) {
- attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd->cq_handle,
- file->ucontext);
+ attr.ext.cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ,
+ cmd->cq_handle, file);
if (!attr.ext.cq) {
ret = -EINVAL;
goto err_put_xrcd;
}
}
- pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file->ucontext);
+ pd = uobj_get_obj_read(pd, UVERBS_OBJECT_PD, cmd->pd_handle, file);
if (!pd) {
ret = -EINVAL;
goto err_put_cq;
@@ -3894,7 +3862,7 @@ ssize_t ib_uverbs_modify_srq(struct ib_uverbs_file *file,
ib_uverbs_init_udata(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd,
out_len);
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
if (!srq)
return -EINVAL;
@@ -3925,7 +3893,7 @@ ssize_t ib_uverbs_query_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file->ucontext);
+ srq = uobj_get_obj_read(srq, UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
if (!srq)
return -EINVAL;
@@ -3962,8 +3930,7 @@ ssize_t ib_uverbs_destroy_srq(struct ib_uverbs_file *file,
if (copy_from_user(&cmd, buf, sizeof cmd))
return -EFAULT;
- uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle,
- file->ucontext);
+ uobj = uobj_get_write(UVERBS_OBJECT_SRQ, cmd.srq_handle, file);
if (IS_ERR(uobj))
return PTR_ERR(uobj);
@@ -4141,7 +4108,7 @@ int ib_uverbs_ex_modify_cq(struct ib_uverbs_file *file,
if (cmd.attr_mask > IB_CQ_MODERATE)
return -EOPNOTSUPP;
- cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file->ucontext);
+ cq = uobj_get_obj_read(cq, UVERBS_OBJECT_CQ, cmd.cq_handle, file);
if (!cq)
return -EINVAL;
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index 8d32c4ae368c..d3bf82cfaa2b 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -46,8 +46,7 @@ static bool uverbs_is_attr_cleared(const struct ib_uverbs_attr *uattr,
0, uattr->len - len);
}
-static int uverbs_process_attr(struct ib_device *ibdev,
- struct ib_ucontext *ucontext,
+static int uverbs_process_attr(struct ib_uverbs_file *ufile,
const struct ib_uverbs_attr *uattr,
u16 attr_id,
const struct uverbs_attr_spec_hash *attr_spec_bucket,
@@ -78,13 +77,13 @@ static int uverbs_process_attr(struct ib_device *ibdev,
switch (spec->type) {
case UVERBS_ATTR_TYPE_ENUM_IN:
- if (uattr->attr_data.enum_data.elem_id >= spec->enum_def.num_elems)
+ if (uattr->attr_data.enum_data.elem_id >= spec->u.enum_def.num_elems)
return -EOPNOTSUPP;
if (uattr->attr_data.enum_data.reserved)
return -EINVAL;
- val_spec = &spec->enum_def.ids[uattr->attr_data.enum_data.elem_id];
+ val_spec = &spec->u2.enum_def.ids[uattr->attr_data.enum_data.elem_id];
/* Currently we only support PTR_IN based enums */
if (val_spec->type != UVERBS_ATTR_TYPE_PTR_IN)
@@ -98,25 +97,42 @@ static int uverbs_process_attr(struct ib_device *ibdev,
* longer struct will fail here if used with an old kernel and
* non-zero content, making ABI compat/discovery simpler.
*/
- if (uattr->len > val_spec->ptr.len &&
- val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO &&
- !uverbs_is_attr_cleared(uattr, val_spec->ptr.len))
+ if (uattr->len > val_spec->u.ptr.len &&
+ val_spec->zero_trailing &&
+ !uverbs_is_attr_cleared(uattr, val_spec->u.ptr.len))
return -EOPNOTSUPP;
/* fall through */
case UVERBS_ATTR_TYPE_PTR_OUT:
- if (uattr->len < val_spec->ptr.min_len ||
- (!(val_spec->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO) &&
- uattr->len > val_spec->ptr.len))
+ if (uattr->len < val_spec->u.ptr.min_len ||
+ (!val_spec->zero_trailing &&
+ uattr->len > val_spec->u.ptr.len))
return -EINVAL;
if (spec->type != UVERBS_ATTR_TYPE_ENUM_IN &&
uattr->attr_data.reserved)
return -EINVAL;
- e->ptr_attr.data = uattr->data;
e->ptr_attr.len = uattr->len;
e->ptr_attr.flags = uattr->flags;
+
+ if (val_spec->alloc_and_copy && !uverbs_attr_ptr_is_inline(e)) {
+ void *p;
+
+ p = kvmalloc(uattr->len, GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ e->ptr_attr.ptr = p;
+
+ if (copy_from_user(p, u64_to_user_ptr(uattr->data),
+ uattr->len)) {
+ kvfree(p);
+ return -EFAULT;
+ }
+ } else {
+ e->ptr_attr.data = uattr->data;
+ }
break;
case UVERBS_ATTR_TYPE_IDR:
@@ -127,26 +143,25 @@ static int uverbs_process_attr(struct ib_device *ibdev,
if (uattr->attr_data.reserved)
return -EINVAL;
- if (uattr->len != 0 || !ucontext || uattr->data > INT_MAX)
+ if (uattr->len != 0 || !ufile->ucontext ||
+ uattr->data > INT_MAX)
return -EINVAL;
o_attr = &e->obj_attr;
- object = uverbs_get_object(ibdev, spec->obj.obj_type);
+ object = uverbs_get_object(ufile, spec->u.obj.obj_type);
if (!object)
return -EINVAL;
- o_attr->type = object->type_attrs;
- o_attr->id = (int)uattr->data;
- o_attr->uobject = uverbs_get_uobject_from_context(
- o_attr->type,
- ucontext,
- spec->obj.access,
- o_attr->id);
+ o_attr->uobject = uverbs_get_uobject_from_file(
+ object->type_attrs,
+ ufile,
+ spec->u.obj.access,
+ (int)uattr->data);
if (IS_ERR(o_attr->uobject))
return PTR_ERR(o_attr->uobject);
- if (spec->obj.access == UVERBS_ACCESS_NEW) {
+ if (spec->u.obj.access == UVERBS_ACCESS_NEW) {
u64 id = o_attr->uobject->id;
/* Copy the allocated id to the user-space */
@@ -167,8 +182,53 @@ static int uverbs_process_attr(struct ib_device *ibdev,
return 0;
}
-static int uverbs_uattrs_process(struct ib_device *ibdev,
- struct ib_ucontext *ucontext,
+static int uverbs_finalize_attrs(struct uverbs_attr_bundle *attrs_bundle,
+ struct uverbs_attr_spec_hash *const *spec_hash,
+ size_t num, bool commit)
+{
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < num; i++) {
+ struct uverbs_attr_bundle_hash *curr_bundle =
+ &attrs_bundle->hash[i];
+ const struct uverbs_attr_spec_hash *curr_spec_bucket =
+ spec_hash[i];
+ unsigned int j;
+
+ if (!curr_spec_bucket)
+ continue;
+
+ for (j = 0; j < curr_bundle->num_attrs; j++) {
+ struct uverbs_attr *attr;
+ const struct uverbs_attr_spec *spec;
+
+ if (!uverbs_attr_is_valid_in_hash(curr_bundle, j))
+ continue;
+
+ attr = &curr_bundle->attrs[j];
+ spec = &curr_spec_bucket->attrs[j];
+
+ if (spec->type == UVERBS_ATTR_TYPE_IDR ||
+ spec->type == UVERBS_ATTR_TYPE_FD) {
+ int current_ret;
+
+ current_ret = uverbs_finalize_object(
+ attr->obj_attr.uobject,
+ spec->u.obj.access, commit);
+ if (!ret)
+ ret = current_ret;
+ } else if (spec->type == UVERBS_ATTR_TYPE_PTR_IN &&
+ spec->alloc_and_copy &&
+ !uverbs_attr_ptr_is_inline(attr)) {
+ kvfree(attr->ptr_attr.ptr);
+ }
+ }
+ }
+ return ret;
+}
+
+static int uverbs_uattrs_process(struct ib_uverbs_file *ufile,
const struct ib_uverbs_attr *uattrs,
size_t num_uattrs,
const struct uverbs_method_spec *method,
@@ -185,12 +245,12 @@ static int uverbs_uattrs_process(struct ib_device *ibdev,
struct uverbs_attr_spec_hash *attr_spec_bucket;
ret = uverbs_ns_idx(&attr_id, method->num_buckets);
- if (ret < 0) {
+ if (ret < 0 || !method->attr_buckets[ret]) {
if (uattr->flags & UVERBS_ATTR_F_MANDATORY) {
- uverbs_finalize_objects(attr_bundle,
- method->attr_buckets,
- num_given_buckets,
- false);
+ uverbs_finalize_attrs(attr_bundle,
+ method->attr_buckets,
+ num_given_buckets,
+ false);
return ret;
}
continue;
@@ -204,14 +264,14 @@ static int uverbs_uattrs_process(struct ib_device *ibdev,
num_given_buckets = ret + 1;
attr_spec_bucket = method->attr_buckets[ret];
- ret = uverbs_process_attr(ibdev, ucontext, uattr, attr_id,
- attr_spec_bucket, &attr_bundle->hash[ret],
- uattr_ptr++);
+ ret = uverbs_process_attr(ufile, uattr, attr_id,
+ attr_spec_bucket,
+ &attr_bundle->hash[ret], uattr_ptr++);
if (ret) {
- uverbs_finalize_objects(attr_bundle,
- method->attr_buckets,
- num_given_buckets,
- false);
+ uverbs_finalize_attrs(attr_bundle,
+ method->attr_buckets,
+ num_given_buckets,
+ false);
return ret;
}
}
@@ -228,6 +288,9 @@ static int uverbs_validate_kernel_mandatory(const struct uverbs_method_spec *met
struct uverbs_attr_spec_hash *attr_spec_bucket =
method_spec->attr_buckets[i];
+ if (!attr_spec_bucket)
+ continue;
+
if (!bitmap_subset(attr_spec_bucket->mandatory_attrs_bitmask,
attr_bundle->hash[i].valid_bitmap,
attr_spec_bucket->num_attrs))
@@ -258,9 +321,8 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
int finalize_ret;
int num_given_buckets;
- num_given_buckets = uverbs_uattrs_process(ibdev, ufile->ucontext, uattrs,
- num_uattrs, method_spec,
- attr_bundle, uattr_ptr);
+ num_given_buckets = uverbs_uattrs_process(
+ ufile, uattrs, num_uattrs, method_spec, attr_bundle, uattr_ptr);
if (num_given_buckets <= 0)
return -EINVAL;
@@ -271,10 +333,10 @@ static int uverbs_handle_method(struct ib_uverbs_attr __user *uattr_ptr,
ret = method_spec->handler(ibdev, ufile, attr_bundle);
cleanup:
- finalize_ret = uverbs_finalize_objects(attr_bundle,
- method_spec->attr_buckets,
- attr_bundle->num_buckets,
- !ret);
+ finalize_ret = uverbs_finalize_attrs(attr_bundle,
+ method_spec->attr_buckets,
+ attr_bundle->num_buckets,
+ !ret);
return ret ? ret : finalize_ret;
}
@@ -301,7 +363,7 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
if (hdr->driver_id != ib_dev->driver_id)
return -EINVAL;
- object_spec = uverbs_get_object(ib_dev, hdr->object_id);
+ object_spec = uverbs_get_object(file, hdr->object_id);
if (!object_spec)
return -EPROTONOSUPPORT;
@@ -341,7 +403,12 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
* filled at a later stage (uverbs_process_attr)
*/
for (i = 0; i < method_spec->num_buckets; i++) {
- unsigned int curr_num_attrs = method_spec->attr_buckets[i]->num_attrs;
+ unsigned int curr_num_attrs;
+
+ if (!method_spec->attr_buckets[i])
+ continue;
+
+ curr_num_attrs = method_spec->attr_buckets[i]->num_attrs;
ctx->uverbs_attr_bundle->hash[i].attrs = curr_attr;
curr_attr += curr_num_attrs;
diff --git a/drivers/infiniband/core/uverbs_ioctl_merge.c b/drivers/infiniband/core/uverbs_ioctl_merge.c
index 6ceb672c4d46..f81aa888ce5c 100644
--- a/drivers/infiniband/core/uverbs_ioctl_merge.c
+++ b/drivers/infiniband/core/uverbs_ioctl_merge.c
@@ -367,25 +367,25 @@ static struct uverbs_method_spec *build_method_with_attrs(const struct uverbs_me
memcpy(attr, &attr_defs[0]->attr, sizeof(*attr));
attr_obj_with_special_access = IS_ATTR_OBJECT(attr) &&
- (attr->obj.access == UVERBS_ACCESS_NEW ||
- attr->obj.access == UVERBS_ACCESS_DESTROY);
+ (attr->u.obj.access == UVERBS_ACCESS_NEW ||
+ attr->u.obj.access == UVERBS_ACCESS_DESTROY);
num_of_singularities += !!attr_obj_with_special_access;
if (WARN(num_of_singularities > 1,
"ib_uverbs: Method contains more than one object attr (%d) with new/destroy access\n",
min_id) ||
WARN(attr_obj_with_special_access &&
- !(attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY),
+ !attr->mandatory,
"ib_uverbs: Tried to merge attr (%d) but it's an object with new/destroy access but isn't mandatory\n",
min_id) ||
WARN(IS_ATTR_OBJECT(attr) &&
- attr->flags & UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
+ attr->zero_trailing,
"ib_uverbs: Tried to merge attr (%d) but it's an object with min_sz flag\n",
min_id)) {
res = -EINVAL;
goto free;
}
- if (attr->flags & UVERBS_ATTR_SPEC_F_MANDATORY)
+ if (attr->mandatory)
set_bit(min_id, hash->mandatory_attrs_bitmask);
min_id++;
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 3ae2339dd27a..8425718bebbd 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -138,6 +138,12 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file,
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile)
+{
+ return ufile->ucontext;
+}
+EXPORT_SYMBOL(ib_uverbs_get_ucontext);
+
int uverbs_dealloc_mw(struct ib_mw *mw)
{
struct ib_pd *pd = mw->pd;
@@ -155,6 +161,7 @@ static void ib_uverbs_release_dev(struct kobject *kobj)
container_of(kobj, struct ib_uverbs_device, kobj);
cleanup_srcu_struct(&dev->disassociate_srcu);
+ uverbs_free_spec_tree(dev->specs_root);
kfree(dev);
}
@@ -184,7 +191,7 @@ void ib_uverbs_release_ucq(struct ib_uverbs_file *file,
}
spin_unlock_irq(&ev_file->ev_queue.lock);
- uverbs_uobject_put(&ev_file->uobj_file.uobj);
+ uverbs_uobject_put(&ev_file->uobj);
}
spin_lock_irq(&file->async_file->ev_queue.lock);
@@ -220,12 +227,13 @@ void ib_uverbs_detach_umcast(struct ib_qp *qp,
}
}
-static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
- struct ib_ucontext *context,
- bool device_removed)
+static int ib_uverbs_cleanup_ufile(struct ib_uverbs_file *file,
+ bool device_removed)
{
+ struct ib_ucontext *context = file->ucontext;
+
context->closing = 1;
- uverbs_cleanup_ucontext(context, device_removed);
+ uverbs_cleanup_ufile(file, device_removed);
put_pid(context->tgid);
ib_rdmacg_uncharge(&context->cg_obj, context->device,
@@ -338,7 +346,7 @@ static ssize_t ib_uverbs_comp_event_read(struct file *filp, char __user *buf,
filp->private_data;
return ib_uverbs_event_read(&comp_ev_file->ev_queue,
- comp_ev_file->uobj_file.ufile, filp,
+ comp_ev_file->uobj.ufile, filp,
buf, count, pos,
sizeof(struct ib_uverbs_comp_event_desc));
}
@@ -420,7 +428,9 @@ static int ib_uverbs_async_event_close(struct inode *inode, struct file *filp)
static int ib_uverbs_comp_event_close(struct inode *inode, struct file *filp)
{
- struct ib_uverbs_completion_event_file *file = filp->private_data;
+ struct ib_uobject *uobj = filp->private_data;
+ struct ib_uverbs_completion_event_file *file = container_of(
+ uobj, struct ib_uverbs_completion_event_file, uobj);
struct ib_uverbs_event *entry, *tmp;
spin_lock_irq(&file->ev_queue.lock);
@@ -528,7 +538,7 @@ void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr)
struct ib_ucq_object *uobj = container_of(event->element.cq->uobject,
struct ib_ucq_object, uobject);
- ib_uverbs_async_handler(uobj->uverbs_file, uobj->uobject.user_handle,
+ ib_uverbs_async_handler(uobj->uobject.ufile, uobj->uobject.user_handle,
event->event, &uobj->async_list,
&uobj->async_events_reported);
}
@@ -875,12 +885,14 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
file->device = dev;
spin_lock_init(&file->idr_lock);
idr_init(&file->idr);
- file->ucontext = NULL;
- file->async_file = NULL;
kref_init(&file->ref);
mutex_init(&file->mutex);
mutex_init(&file->cleanup_mutex);
+ mutex_init(&file->uobjects_lock);
+ INIT_LIST_HEAD(&file->uobjects);
+ init_rwsem(&file->cleanup_rwsem);
+
filp->private_data = file;
kobject_get(&dev->kobj);
list_add_tail(&file->list, &dev->uverbs_file_list);
@@ -907,7 +919,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
mutex_lock(&file->cleanup_mutex);
if (file->ucontext) {
- ib_uverbs_cleanup_ucontext(file, file->ucontext, false);
+ ib_uverbs_cleanup_ufile(file, false);
file->ucontext = NULL;
}
mutex_unlock(&file->cleanup_mutex);
@@ -1061,7 +1073,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (device_create_file(uverbs_dev->dev, &dev_attr_abi_version))
goto err_class;
- if (!device->specs_root) {
+ if (!device->driver_specs_root) {
const struct uverbs_object_tree_def *default_root[] = {
uverbs_default_get_objects()};
@@ -1069,8 +1081,13 @@ static void ib_uverbs_add_one(struct ib_device *device)
default_root);
if (IS_ERR(uverbs_dev->specs_root))
goto err_class;
-
- device->specs_root = uverbs_dev->specs_root;
+ } else {
+ uverbs_dev->specs_root = device->driver_specs_root;
+ /*
+ * Take responsibility to free the specs allocated by the
+ * driver.
+ */
+ device->driver_specs_root = NULL;
}
ib_set_client_data(device, &uverbs_client, uverbs_dev);
@@ -1160,7 +1177,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
mutex_unlock(&file->cleanup_mutex);
/* At this point ib_uverbs_close cannot be running
- * ib_uverbs_cleanup_ucontext
+ * ib_uverbs_cleanup_ufile
*/
if (ucontext) {
/* We must release the mutex before going ahead and
@@ -1172,7 +1189,7 @@ static void ib_uverbs_free_hw_resources(struct ib_uverbs_device *uverbs_dev,
ib_uverbs_event_handler(&file->event_handler, &event);
ib_uverbs_disassociate_ucontext(ucontext);
mutex_lock(&file->cleanup_mutex);
- ib_uverbs_cleanup_ucontext(file, ucontext, true);
+ ib_uverbs_cleanup_ufile(file, true);
mutex_unlock(&file->cleanup_mutex);
}
@@ -1235,10 +1252,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
ib_uverbs_comp_dev(uverbs_dev);
if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
- if (uverbs_dev->specs_root) {
- uverbs_free_spec_tree(uverbs_dev->specs_root);
- device->specs_root = NULL;
- }
kobject_put(&uverbs_dev->kobj);
}
diff --git a/drivers/infiniband/core/uverbs_marshall.c b/drivers/infiniband/core/uverbs_marshall.c
index bb372b4713a4..b8d715c68ca4 100644
--- a/drivers/infiniband/core/uverbs_marshall.c
+++ b/drivers/infiniband/core/uverbs_marshall.c
@@ -211,7 +211,5 @@ void ib_copy_path_rec_from_user(struct sa_path_rec *dst,
/* TODO: No need to set this */
sa_path_set_dmac_zero(dst);
- sa_path_set_ndev(dst, NULL);
- sa_path_set_ifindex(dst, 0);
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
diff --git a/drivers/infiniband/core/uverbs_std_types.c b/drivers/infiniband/core/uverbs_std_types.c
index b570acbd94af..718c8430d364 100644
--- a/drivers/infiniband/core/uverbs_std_types.c
+++ b/drivers/infiniband/core/uverbs_std_types.c
@@ -48,14 +48,17 @@ static int uverbs_free_ah(struct ib_uobject *uobject,
static int uverbs_free_flow(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
- int ret;
struct ib_flow *flow = (struct ib_flow *)uobject->object;
struct ib_uflow_object *uflow =
container_of(uobject, struct ib_uflow_object, uobject);
+ struct ib_qp *qp = flow->qp;
+ int ret;
- ret = ib_destroy_flow(flow);
- if (!ret)
+ ret = qp->device->destroy_flow(flow);
+ if (!ret) {
+ atomic_dec(&qp->usecnt);
ib_uverbs_flow_resources_free(uflow->resources);
+ }
return ret;
}
@@ -74,6 +77,13 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
container_of(uobject, struct ib_uqp_object, uevent.uobject);
int ret;
+ /*
+ * If this is a user triggered destroy then do not allow destruction
+ * until the user cleans up all the mcast bindings. Unlike in other
+ * places we forcibly clean up the mcast attachments for !DESTROY
+ * because the mcast attaches are not ubojects and will not be
+ * destroyed by anything else during cleanup processing.
+ */
if (why == RDMA_REMOVE_DESTROY) {
if (!list_empty(&uqp->mcast_list))
return -EBUSY;
@@ -82,7 +92,7 @@ static int uverbs_free_qp(struct ib_uobject *uobject,
}
ret = ib_destroy_qp(qp);
- if (ret && why == RDMA_REMOVE_DESTROY)
+ if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
if (uqp->uxrcd)
@@ -100,8 +110,10 @@ static int uverbs_free_rwq_ind_tbl(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_rwq_ind_table(rwq_ind_tbl);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- kfree(ind_tbl);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ kfree(ind_tbl);
return ret;
}
@@ -114,8 +126,10 @@ static int uverbs_free_wq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_wq(wq);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ ib_uverbs_release_uevent(uobject->context->ufile, &uwq->uevent);
return ret;
}
@@ -129,8 +143,7 @@ static int uverbs_free_srq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_srq(srq);
-
- if (ret && why == RDMA_REMOVE_DESTROY)
+ if (ib_is_destroy_retryable(ret, why, uobject))
return ret;
if (srq_type == IB_SRQT_XRC) {
@@ -152,12 +165,12 @@ static int uverbs_free_xrcd(struct ib_uobject *uobject,
container_of(uobject, struct ib_uxrcd_object, uobject);
int ret;
+ ret = ib_destroy_usecnt(&uxrcd->refcnt, why, uobject);
+ if (ret)
+ return ret;
+
mutex_lock(&uobject->context->ufile->device->xrcd_tree_mutex);
- if (why == RDMA_REMOVE_DESTROY && atomic_read(&uxrcd->refcnt))
- ret = -EBUSY;
- else
- ret = ib_uverbs_dealloc_xrcd(uobject->context->ufile->device,
- xrcd, why);
+ ret = ib_uverbs_dealloc_xrcd(uobject, xrcd, why);
mutex_unlock(&uobject->context->ufile->device->xrcd_tree_mutex);
return ret;
@@ -167,20 +180,22 @@ static int uverbs_free_pd(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_pd *pd = uobject->object;
+ int ret;
- if (why == RDMA_REMOVE_DESTROY && atomic_read(&pd->usecnt))
- return -EBUSY;
+ ret = ib_destroy_usecnt(&pd->usecnt, why, uobject);
+ if (ret)
+ return ret;
ib_dealloc_pd((struct ib_pd *)uobject->object);
return 0;
}
-static int uverbs_hot_unplug_completion_event_file(struct ib_uobject_file *uobj_file,
+static int uverbs_hot_unplug_completion_event_file(struct ib_uobject *uobj,
enum rdma_remove_reason why)
{
struct ib_uverbs_completion_event_file *comp_event_file =
- container_of(uobj_file, struct ib_uverbs_completion_event_file,
- uobj_file);
+ container_of(uobj, struct ib_uverbs_completion_event_file,
+ uobj);
struct ib_uverbs_event_queue *event_queue = &comp_event_file->ev_queue;
spin_lock_irq(&event_queue->lock);
@@ -200,18 +215,7 @@ int uverbs_destroy_def_handler(struct ib_device *ib_dev,
{
return 0;
}
-
-/*
- * This spec is used in order to pass information to the hardware driver in a
- * legacy way. Every verb that could get driver specific data should get this
- * spec.
- */
-const struct uverbs_attr_def uverbs_uhw_compat_in =
- UVERBS_ATTR_PTR_IN_SZ(UVERBS_ATTR_UHW_IN, UVERBS_ATTR_SIZE(0, USHRT_MAX),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
-const struct uverbs_attr_def uverbs_uhw_compat_out =
- UVERBS_ATTR_PTR_OUT_SZ(UVERBS_ATTR_UHW_OUT, UVERBS_ATTR_SIZE(0, USHRT_MAX),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO));
+EXPORT_SYMBOL(uverbs_destroy_def_handler);
void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
{
@@ -245,65 +249,68 @@ void create_udata(struct uverbs_attr_bundle *ctx, struct ib_udata *udata)
}
}
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COMP_CHANNEL,
- &UVERBS_TYPE_ALLOC_FD(0,
- sizeof(struct ib_uverbs_completion_event_file),
- uverbs_hot_unplug_completion_event_file,
- &uverbs_event_fops,
- "[infinibandevent]", O_RDONLY));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_TYPE_ALLOC_FD(sizeof(struct ib_uverbs_completion_event_file),
+ uverbs_hot_unplug_completion_event_file,
+ &uverbs_event_fops,
+ "[infinibandevent]",
+ O_RDONLY));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_QP,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), 0,
- uverbs_free_qp));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_QP,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uqp_object), uverbs_free_qp));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MW,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_mw));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mw));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_SRQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object), 0,
- uverbs_free_srq));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_SRQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_usrq_object),
+ uverbs_free_srq));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_AH,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_ah));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_ah));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
- 0, uverbs_free_flow));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_FLOW,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uflow_object),
+ uverbs_free_flow));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_WQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), 0,
- uverbs_free_wq));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_WQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uwq_object), uverbs_free_wq));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_rwq_ind_tbl));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_rwq_ind_tbl));
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_XRCD,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object), 0,
- uverbs_free_xrcd));
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_XRCD,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uxrcd_object),
+ uverbs_free_xrcd));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_PD,
- /* 2 is used in order to free the PD after MRs */
- &UVERBS_TYPE_ALLOC_IDR(2, uverbs_free_pd));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DEVICE, NULL);
-
-static DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
- &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
- &UVERBS_OBJECT(UVERBS_OBJECT_PD),
- &UVERBS_OBJECT(UVERBS_OBJECT_MR),
- &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
- &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_QP),
- &UVERBS_OBJECT(UVERBS_OBJECT_AH),
- &UVERBS_OBJECT(UVERBS_OBJECT_MW),
- &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
- &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
- &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
- &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
- &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
- &UVERBS_OBJECT(UVERBS_OBJECT_DM),
- &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_pd));
+
+DECLARE_UVERBS_GLOBAL_METHODS(UVERBS_OBJECT_DEVICE);
+
+DECLARE_UVERBS_OBJECT_TREE(uverbs_default_objects,
+ &UVERBS_OBJECT(UVERBS_OBJECT_DEVICE),
+ &UVERBS_OBJECT(UVERBS_OBJECT_PD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MR),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COMP_CHANNEL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_CQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_QP),
+ &UVERBS_OBJECT(UVERBS_OBJECT_AH),
+ &UVERBS_OBJECT(UVERBS_OBJECT_MW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_SRQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW),
+ &UVERBS_OBJECT(UVERBS_OBJECT_WQ),
+ &UVERBS_OBJECT(UVERBS_OBJECT_RWQ_IND_TBL),
+ &UVERBS_OBJECT(UVERBS_OBJECT_XRCD),
+ &UVERBS_OBJECT(UVERBS_OBJECT_FLOW_ACTION),
+ &UVERBS_OBJECT(UVERBS_OBJECT_DM),
+ &UVERBS_OBJECT(UVERBS_OBJECT_COUNTERS));
const struct uverbs_object_tree_def *uverbs_default_get_objects(void)
{
diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c
index 03b182a684a6..dfe59ad721f6 100644
--- a/drivers/infiniband/core/uverbs_std_types_counters.c
+++ b/drivers/infiniband/core/uverbs_std_types_counters.c
@@ -38,10 +38,11 @@ static int uverbs_free_counters(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_counters *counters = uobject->object;
+ int ret;
- if (why == RDMA_REMOVE_DESTROY &&
- atomic_read(&counters->usecnt))
- return -EBUSY;
+ ret = ib_destroy_usecnt(&counters->usecnt, why, uobject);
+ if (ret)
+ return ret;
return counters->device->destroy_counters(counters);
}
@@ -123,35 +124,35 @@ err_read:
return ret;
}
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_CREATE,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
- UVERBS_OBJECT_COUNTERS,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_COUNTERS_DESTROY,
- uverbs_destroy_def_handler,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
- UVERBS_OBJECT_COUNTERS,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-#define MAX_COUNTERS_BUFF_SIZE USHRT_MAX
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_COUNTERS_READ,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
- UVERBS_OBJECT_COUNTERS,
- UVERBS_ACCESS_READ,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
- UVERBS_ATTR_SIZE(0, MAX_COUNTERS_BUFF_SIZE),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
- UVERBS_ATTR_TYPE(__u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COUNTERS_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_COUNTERS_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_COUNTERS_READ,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_READ_COUNTERS_HANDLE,
+ UVERBS_OBJECT_COUNTERS,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_READ_COUNTERS_BUFF,
+ UVERBS_ATTR_MIN_SIZE(0),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_READ_COUNTERS_FLAGS,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_COUNTERS,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_counters),
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_counters),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_CREATE),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_DESTROY),
&UVERBS_METHOD(UVERBS_METHOD_COUNTERS_READ));
-
diff --git a/drivers/infiniband/core/uverbs_std_types_cq.c b/drivers/infiniband/core/uverbs_std_types_cq.c
index 3d293d01afea..5a6154345fa0 100644
--- a/drivers/infiniband/core/uverbs_std_types_cq.c
+++ b/drivers/infiniband/core/uverbs_std_types_cq.c
@@ -44,12 +44,16 @@ static int uverbs_free_cq(struct ib_uobject *uobject,
int ret;
ret = ib_destroy_cq(cq);
- if (!ret || why != RDMA_REMOVE_DESTROY)
- ib_uverbs_release_ucq(uobject->context->ufile, ev_queue ?
- container_of(ev_queue,
- struct ib_uverbs_completion_event_file,
- ev_queue) : NULL,
- ucq);
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ ib_uverbs_release_ucq(
+ uobject->context->ufile,
+ ev_queue ? container_of(ev_queue,
+ struct ib_uverbs_completion_event_file,
+ ev_queue) :
+ NULL,
+ ucq);
return ret;
}
@@ -57,7 +61,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
- struct ib_ucontext *ucontext = file->ucontext;
struct ib_ucq_object *obj;
struct ib_udata uhw;
int ret;
@@ -67,7 +70,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
struct ib_uverbs_completion_event_file *ev_file = NULL;
struct ib_uobject *ev_file_uobj;
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_CREATE_CQ))
+ if (!ib_dev->create_cq || !ib_dev->destroy_cq)
return -EOPNOTSUPP;
ret = uverbs_copy_from(&attr.comp_vector, attrs,
@@ -90,11 +93,11 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
if (!IS_ERR(ev_file_uobj)) {
ev_file = container_of(ev_file_uobj,
struct ib_uverbs_completion_event_file,
- uobj_file.uobj);
+ uobj);
uverbs_uobject_get(ev_file_uobj);
}
- if (attr.comp_vector >= ucontext->ufile->device->num_comp_vectors) {
+ if (attr.comp_vector >= file->device->num_comp_vectors) {
ret = -EINVAL;
goto err_event_file;
}
@@ -102,7 +105,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
obj = container_of(uverbs_attr_get_uobject(attrs,
UVERBS_ATTR_CREATE_CQ_HANDLE),
typeof(*obj), uobject);
- obj->uverbs_file = ucontext->ufile;
obj->comp_events_reported = 0;
obj->async_events_reported = 0;
INIT_LIST_HEAD(&obj->comp_list);
@@ -111,7 +113,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_CREATE)(struct ib_device *ib_dev,
/* Temporary, only until drivers get the new uverbs_attr_bundle */
create_udata(attrs, &uhw);
- cq = ib_dev->create_cq(ib_dev, &attr, ucontext, &uhw);
+ cq = ib_dev->create_cq(ib_dev, &attr, file->ucontext, &uhw);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto err_event_file;
@@ -143,25 +145,30 @@ err_event_file:
return ret;
};
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_CREATE,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE, UVERBS_OBJECT_CQ,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_CQ_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_CQE,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
+ UVERBS_OBJECT_COMP_CHANNEL,
+ UVERBS_ACCESS_READ,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE,
UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_USER_HANDLE,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_FD(UVERBS_ATTR_CREATE_CQ_COMP_CHANNEL,
- UVERBS_OBJECT_COMP_CHANNEL,
- UVERBS_ACCESS_READ),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_COMP_VECTOR, UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_CREATE_CQ_FLAGS, UVERBS_ATTR_TYPE(u32)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_CREATE_CQ_RESP_CQE, UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &uverbs_uhw_compat_in, &uverbs_uhw_compat_out);
+ UA_MANDATORY),
+ UVERBS_ATTR_UHW());
static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
struct ib_uverbs_file *file,
@@ -178,9 +185,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
obj = container_of(uobj, struct ib_ucq_object, uobject);
- if (!(ib_dev->uverbs_cmd_mask & 1ULL << IB_USER_VERBS_CMD_DESTROY_CQ))
- return -EOPNOTSUPP;
-
ret = rdma_explicit_destroy(uobj);
if (ret)
return ret;
@@ -192,20 +196,22 @@ static int UVERBS_HANDLER(UVERBS_METHOD_CQ_DESTROY)(struct ib_device *ib_dev,
sizeof(resp));
}
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_CQ_DESTROY,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE, UVERBS_OBJECT_CQ,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
- UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_CQ,
- &UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), 0,
- uverbs_free_cq),
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_CQ_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_CQ_HANDLE,
+ UVERBS_OBJECT_CQ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_DESTROY_CQ_RESP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_destroy_cq_resp),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_CQ,
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_ucq_object), uverbs_free_cq),
+
#if IS_ENABLED(CONFIG_INFINIBAND_EXP_LEGACY_VERBS_NEW_UAPI)
- &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_CQ_DESTROY)
#endif
- );
-
+);
diff --git a/drivers/infiniband/core/uverbs_std_types_dm.c b/drivers/infiniband/core/uverbs_std_types_dm.c
index 8b681575b615..9e148e322523 100644
--- a/drivers/infiniband/core/uverbs_std_types_dm.c
+++ b/drivers/infiniband/core/uverbs_std_types_dm.c
@@ -37,9 +37,11 @@ static int uverbs_free_dm(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_dm *dm = uobject->object;
+ int ret;
- if (why == RDMA_REMOVE_DESTROY && atomic_read(&dm->usecnt))
- return -EBUSY;
+ ret = ib_destroy_usecnt(&dm->usecnt, why, uobject);
+ if (ret)
+ return ret;
return dm->device->dealloc_dm(dm);
}
@@ -48,7 +50,6 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
struct ib_uverbs_file *file,
struct uverbs_attr_bundle *attrs)
{
- struct ib_ucontext *ucontext = file->ucontext;
struct ib_dm_alloc_attr attr = {};
struct ib_uobject *uobj;
struct ib_dm *dm;
@@ -69,7 +70,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
uobj = uverbs_attr_get(attrs, UVERBS_ATTR_ALLOC_DM_HANDLE)->obj_attr.uobject;
- dm = ib_dev->alloc_dm(ib_dev, ucontext, &attr, attrs);
+ dm = ib_dev->alloc_dm(ib_dev, file->ucontext, &attr, attrs);
if (IS_ERR(dm))
return PTR_ERR(dm);
@@ -83,26 +84,27 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_ALLOC)(struct ib_device *ib_dev,
return 0;
}
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_ALLOC,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE, UVERBS_OBJECT_DM,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
- UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_DM_FREE,
- uverbs_destroy_def_handler,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
- UVERBS_OBJECT_DM,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DM_ALLOC,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_ALLOC_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_ALLOC_DM_ALIGNMENT,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_DM_FREE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_FREE_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_DM,
- /* 1 is used in order to free the DM after MRs */
- &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_dm),
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_dm),
&UVERBS_METHOD(UVERBS_METHOD_DM_ALLOC),
&UVERBS_METHOD(UVERBS_METHOD_DM_FREE));
diff --git a/drivers/infiniband/core/uverbs_std_types_flow_action.c b/drivers/infiniband/core/uverbs_std_types_flow_action.c
index a7be51cf2e42..c753a34cd984 100644
--- a/drivers/infiniband/core/uverbs_std_types_flow_action.c
+++ b/drivers/infiniband/core/uverbs_std_types_flow_action.c
@@ -37,10 +37,11 @@ static int uverbs_free_flow_action(struct ib_uobject *uobject,
enum rdma_remove_reason why)
{
struct ib_flow_action *action = uobject->object;
+ int ret;
- if (why == RDMA_REMOVE_DESTROY &&
- atomic_read(&action->usecnt))
- return -EBUSY;
+ ret = ib_destroy_usecnt(&action->usecnt, why, uobject);
+ if (ret)
+ return ret;
return action->device->destroy_flow_action(action);
}
@@ -320,7 +321,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE)(struct ib_device
return ret;
/* No need to check as this attribute is marked as MANDATORY */
- uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
+ uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE);
action = ib_dev->create_flow_action_esp(ib_dev, &esp_attr.hdr, attrs);
if (IS_ERR(action))
return PTR_ERR(action);
@@ -350,7 +352,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
if (ret)
return ret;
- uobj = uverbs_attr_get_uobject(attrs, UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE);
+ uobj = uverbs_attr_get_uobject(
+ attrs, UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE);
action = uobj->object;
if (action->type != IB_FLOW_ACTION_ESP)
@@ -363,73 +366,84 @@ static int UVERBS_HANDLER(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY)(struct ib_device
static const struct uverbs_attr_spec uverbs_flow_action_esp_keymat[] = {
[IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM] = {
- { .ptr = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_keymat_aes_gcm),
- .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
- } },
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(
+ struct ib_uverbs_flow_action_esp_keymat_aes_gcm,
+ aes_key),
},
};
static const struct uverbs_attr_spec uverbs_flow_action_esp_replay[] = {
[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_NONE] = {
- { .ptr = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- /* No need to specify any data */
- .len = 0,
- } }
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ /* No need to specify any data */
+ UVERBS_ATTR_SIZE(0, 0),
},
[IB_UVERBS_FLOW_ACTION_ESP_REPLAY_BMP] = {
- { .ptr = {
- .type = UVERBS_ATTR_TYPE_PTR_IN,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp, size),
- .flags = UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO,
- } }
+ .type = UVERBS_ATTR_TYPE_PTR_IN,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_replay_bmp,
+ size),
},
};
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY |
- UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
- &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
-
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE, UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_WRITE,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp, hard_limit_pkts),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN, UVERBS_ATTR_TYPE(__u32)),
- &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
- uverbs_flow_action_esp_keymat),
- &UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
- uverbs_flow_action_esp_replay),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
- UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp_encap, type)));
-
-static DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(UVERBS_METHOD_FLOW_ACTION_DESTROY,
- uverbs_destroy_def_handler,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
- UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_ACCESS_DESTROY,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_FLOW_ACTION,
- &UVERBS_TYPE_ALLOC_IDR(0, uverbs_free_flow_action),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
- &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
-
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+ hard_limit_pkts),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_MANDATORY),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
+ UVERBS_ATTR_STRUCT(struct ib_uverbs_flow_action_esp,
+ hard_limit_pkts),
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
+ UVERBS_ATTR_TYPE(__u32),
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
+ uverbs_flow_action_esp_keymat,
+ UA_OPTIONAL),
+ UVERBS_ATTR_ENUM_IN(UVERBS_ATTR_FLOW_ACTION_ESP_REPLAY,
+ uverbs_flow_action_esp_replay,
+ UA_OPTIONAL),
+ UVERBS_ATTR_PTR_IN(
+ UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
+ UVERBS_ATTR_TYPE(struct ib_uverbs_flow_action_esp_encap),
+ UA_OPTIONAL));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ UVERBS_METHOD_FLOW_ACTION_DESTROY,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_flow_action),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_CREATE),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_DESTROY),
+ &UVERBS_METHOD(UVERBS_METHOD_FLOW_ACTION_ESP_MODIFY));
diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c b/drivers/infiniband/core/uverbs_std_types_mr.c
index 68f7cadf088f..c1b9124d611e 100644
--- a/drivers/infiniband/core/uverbs_std_types_mr.c
+++ b/drivers/infiniband/core/uverbs_std_types_mr.c
@@ -115,33 +115,37 @@ err_dereg:
return ret;
}
-static DECLARE_UVERBS_NAMED_METHOD(UVERBS_METHOD_DM_MR_REG,
- &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE, UVERBS_OBJECT_MR,
- UVERBS_ACCESS_NEW,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE, UVERBS_OBJECT_PD,
- UVERBS_ACCESS_READ,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+DECLARE_UVERBS_NAMED_METHOD(
+ UVERBS_METHOD_DM_MR_REG,
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_HANDLE,
+ UVERBS_OBJECT_MR,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_LENGTH,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_PD_HANDLE,
+ UVERBS_OBJECT_PD,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_DM_MR_ACCESS_FLAGS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE,
+ UVERBS_OBJECT_DM,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_IDR(UVERBS_ATTR_REG_DM_MR_DM_HANDLE, UVERBS_OBJECT_DM,
- UVERBS_ACCESS_READ,
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_LKEY,
- UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
- UVERBS_ATTR_TYPE(u32),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-DECLARE_UVERBS_NAMED_OBJECT(UVERBS_OBJECT_MR,
- /* 1 is used in order to free the MR after all the MWs */
- &UVERBS_TYPE_ALLOC_IDR(1, uverbs_free_mr),
- &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_DM_MR_RESP_RKEY,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_OBJECT(
+ UVERBS_OBJECT_MR,
+ UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
+ &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG));
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 0b56828c1319..b6ceb6fd6a67 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -326,6 +326,153 @@ EXPORT_SYMBOL(ib_dealloc_pd);
/* Address handles */
+/**
+ * rdma_copy_ah_attr - Copy rdma ah attribute from source to destination.
+ * @dest: Pointer to destination ah_attr. Contents of the destination
+ * pointer is assumed to be invalid and attribute are overwritten.
+ * @src: Pointer to source ah_attr.
+ */
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+ const struct rdma_ah_attr *src)
+{
+ *dest = *src;
+ if (dest->grh.sgid_attr)
+ rdma_hold_gid_attr(dest->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_copy_ah_attr);
+
+/**
+ * rdma_replace_ah_attr - Replace valid ah_attr with new new one.
+ * @old: Pointer to existing ah_attr which needs to be replaced.
+ * old is assumed to be valid or zero'd
+ * @new: Pointer to the new ah_attr.
+ *
+ * rdma_replace_ah_attr() first releases any reference in the old ah_attr if
+ * old the ah_attr is valid; after that it copies the new attribute and holds
+ * the reference to the replaced ah_attr.
+ */
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+ const struct rdma_ah_attr *new)
+{
+ rdma_destroy_ah_attr(old);
+ *old = *new;
+ if (old->grh.sgid_attr)
+ rdma_hold_gid_attr(old->grh.sgid_attr);
+}
+EXPORT_SYMBOL(rdma_replace_ah_attr);
+
+/**
+ * rdma_move_ah_attr - Move ah_attr pointed by source to destination.
+ * @dest: Pointer to destination ah_attr to copy to.
+ * dest is assumed to be valid or zero'd
+ * @src: Pointer to the new ah_attr.
+ *
+ * rdma_move_ah_attr() first releases any reference in the destination ah_attr
+ * if it is valid. This also transfers ownership of internal references from
+ * src to dest, making src invalid in the process. No new reference of the src
+ * ah_attr is taken.
+ */
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src)
+{
+ rdma_destroy_ah_attr(dest);
+ *dest = *src;
+ src->grh.sgid_attr = NULL;
+}
+EXPORT_SYMBOL(rdma_move_ah_attr);
+
+/*
+ * Validate that the rdma_ah_attr is valid for the device before passing it
+ * off to the driver.
+ */
+static int rdma_check_ah_attr(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr)
+{
+ if (!rdma_is_port_valid(device, ah_attr->port_num))
+ return -EINVAL;
+
+ if ((rdma_is_grh_required(device, ah_attr->port_num) ||
+ ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) &&
+ !(ah_attr->ah_flags & IB_AH_GRH))
+ return -EINVAL;
+
+ if (ah_attr->grh.sgid_attr) {
+ /*
+ * Make sure the passed sgid_attr is consistent with the
+ * parameters
+ */
+ if (ah_attr->grh.sgid_attr->index != ah_attr->grh.sgid_index ||
+ ah_attr->grh.sgid_attr->port_num != ah_attr->port_num)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/*
+ * If the ah requires a GRH then ensure that sgid_attr pointer is filled in.
+ * On success the caller is responsible to call rdma_unfill_sgid_attr().
+ */
+static int rdma_fill_sgid_attr(struct ib_device *device,
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr **old_sgid_attr)
+{
+ const struct ib_gid_attr *sgid_attr;
+ struct ib_global_route *grh;
+ int ret;
+
+ *old_sgid_attr = ah_attr->grh.sgid_attr;
+
+ ret = rdma_check_ah_attr(device, ah_attr);
+ if (ret)
+ return ret;
+
+ if (!(ah_attr->ah_flags & IB_AH_GRH))
+ return 0;
+
+ grh = rdma_ah_retrieve_grh(ah_attr);
+ if (grh->sgid_attr)
+ return 0;
+
+ sgid_attr =
+ rdma_get_gid_attr(device, ah_attr->port_num, grh->sgid_index);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+
+ /* Move ownerhip of the kref into the ah_attr */
+ grh->sgid_attr = sgid_attr;
+ return 0;
+}
+
+static void rdma_unfill_sgid_attr(struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *old_sgid_attr)
+{
+ /*
+ * Fill didn't change anything, the caller retains ownership of
+ * whatever it passed
+ */
+ if (ah_attr->grh.sgid_attr == old_sgid_attr)
+ return;
+
+ /*
+ * Otherwise, we need to undo what rdma_fill_sgid_attr so the caller
+ * doesn't see any change in the rdma_ah_attr. If we get here
+ * old_sgid_attr is NULL.
+ */
+ rdma_destroy_ah_attr(ah_attr);
+}
+
+static const struct ib_gid_attr *
+rdma_update_sgid_attr(struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *old_attr)
+{
+ if (old_attr)
+ rdma_put_gid_attr(old_attr);
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ rdma_hold_gid_attr(ah_attr->grh.sgid_attr);
+ return ah_attr->grh.sgid_attr;
+ }
+ return NULL;
+}
+
static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
@@ -339,15 +486,38 @@ static struct ib_ah *_rdma_create_ah(struct ib_pd *pd,
ah->pd = pd;
ah->uobject = NULL;
ah->type = ah_attr->type;
+ ah->sgid_attr = rdma_update_sgid_attr(ah_attr, NULL);
+
atomic_inc(&pd->usecnt);
}
return ah;
}
+/**
+ * rdma_create_ah - Creates an address handle for the
+ * given address vector.
+ * @pd: The protection domain associated with the address handle.
+ * @ah_attr: The attributes of the address vector.
+ *
+ * It returns 0 on success and returns appropriate error code on error.
+ * The address handle is used to reference a local or global destination
+ * in all UD QP post sends.
+ */
struct ib_ah *rdma_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr)
{
- return _rdma_create_ah(pd, ah_attr, NULL);
+ const struct ib_gid_attr *old_sgid_attr;
+ struct ib_ah *ah;
+ int ret;
+
+ ret = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ah = _rdma_create_ah(pd, ah_attr, NULL);
+
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ah;
}
EXPORT_SYMBOL(rdma_create_ah);
@@ -368,15 +538,27 @@ struct ib_ah *rdma_create_user_ah(struct ib_pd *pd,
struct rdma_ah_attr *ah_attr,
struct ib_udata *udata)
{
+ const struct ib_gid_attr *old_sgid_attr;
+ struct ib_ah *ah;
int err;
+ err = rdma_fill_sgid_attr(pd->device, ah_attr, &old_sgid_attr);
+ if (err)
+ return ERR_PTR(err);
+
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
err = ib_resolve_eth_dmac(pd->device, ah_attr);
- if (err)
- return ERR_PTR(err);
+ if (err) {
+ ah = ERR_PTR(err);
+ goto out;
+ }
}
- return _rdma_create_ah(pd, ah_attr, udata);
+ ah = _rdma_create_ah(pd, ah_attr, udata);
+
+out:
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ah;
}
EXPORT_SYMBOL(rdma_create_user_ah);
@@ -455,16 +637,16 @@ static bool find_gid_index(const union ib_gid *gid,
return true;
}
-static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
- u16 vlan_id, const union ib_gid *sgid,
- enum ib_gid_type gid_type,
- u16 *gid_index)
+static const struct ib_gid_attr *
+get_sgid_attr_from_eth(struct ib_device *device, u8 port_num,
+ u16 vlan_id, const union ib_gid *sgid,
+ enum ib_gid_type gid_type)
{
struct find_gid_index_context context = {.vlan_id = vlan_id,
.gid_type = gid_type};
- return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
- &context, gid_index);
+ return rdma_find_gid_by_filter(device, sgid, port_num, find_gid_index,
+ &context);
}
int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
@@ -508,39 +690,24 @@ EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr);
static int ib_resolve_unicast_gid_dmac(struct ib_device *device,
struct rdma_ah_attr *ah_attr)
{
- struct ib_gid_attr sgid_attr;
- struct ib_global_route *grh;
+ struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
+ const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
int hop_limit = 0xff;
- union ib_gid sgid;
- int ret;
-
- grh = rdma_ah_retrieve_grh(ah_attr);
-
- ret = ib_query_gid(device,
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index,
- &sgid, &sgid_attr);
- if (ret || !sgid_attr.ndev) {
- if (!ret)
- ret = -ENXIO;
- return ret;
- }
+ int ret = 0;
/* If destination is link local and source GID is RoCEv1,
* IP stack is not used.
*/
if (rdma_link_local_addr((struct in6_addr *)grh->dgid.raw) &&
- sgid_attr.gid_type == IB_GID_TYPE_ROCE) {
+ sgid_attr->gid_type == IB_GID_TYPE_ROCE) {
rdma_get_ll_mac((struct in6_addr *)grh->dgid.raw,
ah_attr->roce.dmac);
- goto done;
+ return ret;
}
- ret = rdma_addr_find_l2_eth_by_grh(&sgid, &grh->dgid,
+ ret = rdma_addr_find_l2_eth_by_grh(&sgid_attr->gid, &grh->dgid,
ah_attr->roce.dmac,
- sgid_attr.ndev, &hop_limit);
-done:
- dev_put(sgid_attr.ndev);
+ sgid_attr->ndev, &hop_limit);
grh->hop_limit = hop_limit;
return ret;
@@ -555,16 +722,18 @@ done:
* as sgid and, sgid is used as dgid because sgid contains destinations
* GID whom to respond to.
*
+ * On success the caller is responsible to call rdma_destroy_ah_attr on the
+ * attr.
*/
int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
struct rdma_ah_attr *ah_attr)
{
u32 flow_class;
- u16 gid_index;
int ret;
enum rdma_network_type net_type = RDMA_NETWORK_IB;
enum ib_gid_type gid_type = IB_GID_TYPE_IB;
+ const struct ib_gid_attr *sgid_attr;
int hoplimit = 0xff;
union ib_gid dgid;
union ib_gid sgid;
@@ -595,72 +764,141 @@ int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
if (!(wc->wc_flags & IB_WC_GRH))
return -EPROTOTYPE;
- ret = get_sgid_index_from_eth(device, port_num,
- vlan_id, &dgid,
- gid_type, &gid_index);
- if (ret)
- return ret;
+ sgid_attr = get_sgid_attr_from_eth(device, port_num,
+ vlan_id, &dgid,
+ gid_type);
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
- flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
- return ib_resolve_unicast_gid_dmac(device, ah_attr);
+ rdma_move_grh_sgid_attr(ah_attr,
+ &sgid,
+ flow_class & 0xFFFFF,
+ hoplimit,
+ (flow_class >> 20) & 0xFF,
+ sgid_attr);
+
+ ret = ib_resolve_unicast_gid_dmac(device, ah_attr);
+ if (ret)
+ rdma_destroy_ah_attr(ah_attr);
+
+ return ret;
} else {
rdma_ah_set_dlid(ah_attr, wc->slid);
rdma_ah_set_path_bits(ah_attr, wc->dlid_path_bits);
- if (wc->wc_flags & IB_WC_GRH) {
- if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
- ret = ib_find_cached_gid_by_port(device, &dgid,
- IB_GID_TYPE_IB,
- port_num, NULL,
- &gid_index);
- if (ret)
- return ret;
- } else {
- gid_index = 0;
- }
+ if ((wc->wc_flags & IB_WC_GRH) == 0)
+ return 0;
- flow_class = be32_to_cpu(grh->version_tclass_flow);
- rdma_ah_set_grh(ah_attr, &sgid,
+ if (dgid.global.interface_id !=
+ cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+ sgid_attr = rdma_find_gid_by_port(
+ device, &dgid, IB_GID_TYPE_IB, port_num, NULL);
+ } else
+ sgid_attr = rdma_get_gid_attr(device, port_num, 0);
+
+ if (IS_ERR(sgid_attr))
+ return PTR_ERR(sgid_attr);
+ flow_class = be32_to_cpu(grh->version_tclass_flow);
+ rdma_move_grh_sgid_attr(ah_attr,
+ &sgid,
flow_class & 0xFFFFF,
- (u8)gid_index, hoplimit,
- (flow_class >> 20) & 0xFF);
- }
+ hoplimit,
+ (flow_class >> 20) & 0xFF,
+ sgid_attr);
+
return 0;
}
}
EXPORT_SYMBOL(ib_init_ah_attr_from_wc);
+/**
+ * rdma_move_grh_sgid_attr - Sets the sgid attribute of GRH, taking ownership
+ * of the reference
+ *
+ * @attr: Pointer to AH attribute structure
+ * @dgid: Destination GID
+ * @flow_label: Flow label
+ * @hop_limit: Hop limit
+ * @traffic_class: traffic class
+ * @sgid_attr: Pointer to SGID attribute
+ *
+ * This takes ownership of the sgid_attr reference. The caller must ensure
+ * rdma_destroy_ah_attr() is called before destroying the rdma_ah_attr after
+ * calling this function.
+ */
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+ u32 flow_label, u8 hop_limit, u8 traffic_class,
+ const struct ib_gid_attr *sgid_attr)
+{
+ rdma_ah_set_grh(attr, dgid, flow_label, sgid_attr->index, hop_limit,
+ traffic_class);
+ attr->grh.sgid_attr = sgid_attr;
+}
+EXPORT_SYMBOL(rdma_move_grh_sgid_attr);
+
+/**
+ * rdma_destroy_ah_attr - Release reference to SGID attribute of
+ * ah attribute.
+ * @ah_attr: Pointer to ah attribute
+ *
+ * Release reference to the SGID attribute of the ah attribute if it is
+ * non NULL. It is safe to call this multiple times, and safe to call it on
+ * a zero initialized ah_attr.
+ */
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr)
+{
+ if (ah_attr->grh.sgid_attr) {
+ rdma_put_gid_attr(ah_attr->grh.sgid_attr);
+ ah_attr->grh.sgid_attr = NULL;
+ }
+}
+EXPORT_SYMBOL(rdma_destroy_ah_attr);
+
struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
const struct ib_grh *grh, u8 port_num)
{
struct rdma_ah_attr ah_attr;
+ struct ib_ah *ah;
int ret;
ret = ib_init_ah_attr_from_wc(pd->device, port_num, wc, grh, &ah_attr);
if (ret)
return ERR_PTR(ret);
- return rdma_create_ah(pd, &ah_attr);
+ ah = rdma_create_ah(pd, &ah_attr);
+
+ rdma_destroy_ah_attr(&ah_attr);
+ return ah;
}
EXPORT_SYMBOL(ib_create_ah_from_wc);
int rdma_modify_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
+ const struct ib_gid_attr *old_sgid_attr;
+ int ret;
+
if (ah->type != ah_attr->type)
return -EINVAL;
- return ah->device->modify_ah ?
+ ret = rdma_fill_sgid_attr(ah->device, ah_attr, &old_sgid_attr);
+ if (ret)
+ return ret;
+
+ ret = ah->device->modify_ah ?
ah->device->modify_ah(ah, ah_attr) :
-EOPNOTSUPP;
+
+ ah->sgid_attr = rdma_update_sgid_attr(ah_attr, ah->sgid_attr);
+ rdma_unfill_sgid_attr(ah_attr, old_sgid_attr);
+ return ret;
}
EXPORT_SYMBOL(rdma_modify_ah);
int rdma_query_ah(struct ib_ah *ah, struct rdma_ah_attr *ah_attr)
{
+ ah_attr->grh.sgid_attr = NULL;
+
return ah->device->query_ah ?
ah->device->query_ah(ah, ah_attr) :
-EOPNOTSUPP;
@@ -669,13 +907,17 @@ EXPORT_SYMBOL(rdma_query_ah);
int rdma_destroy_ah(struct ib_ah *ah)
{
+ const struct ib_gid_attr *sgid_attr = ah->sgid_attr;
struct ib_pd *pd;
int ret;
pd = ah->pd;
ret = ah->device->destroy_ah(ah);
- if (!ret)
+ if (!ret) {
atomic_dec(&pd->usecnt);
+ if (sgid_attr)
+ rdma_put_gid_attr(sgid_attr);
+ }
return ret;
}
@@ -1290,16 +1532,19 @@ bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
}
EXPORT_SYMBOL(ib_modify_qp_is_ok);
+/**
+ * ib_resolve_eth_dmac - Resolve destination mac address
+ * @device: Device to consider
+ * @ah_attr: address handle attribute which describes the
+ * source and destination parameters
+ * ib_resolve_eth_dmac() resolves destination mac address and L3 hop limit It
+ * returns 0 on success or appropriate error code. It initializes the
+ * necessary ah_attr fields when call is successful.
+ */
static int ib_resolve_eth_dmac(struct ib_device *device,
struct rdma_ah_attr *ah_attr)
{
- int ret = 0;
- struct ib_global_route *grh;
-
- if (!rdma_is_port_valid(device, rdma_ah_get_port_num(ah_attr)))
- return -EINVAL;
-
- grh = rdma_ah_retrieve_grh(ah_attr);
+ int ret = 0;
if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
@@ -1317,6 +1562,14 @@ static int ib_resolve_eth_dmac(struct ib_device *device,
return ret;
}
+static bool is_qp_type_connected(const struct ib_qp *qp)
+{
+ return (qp->qp_type == IB_QPT_UC ||
+ qp->qp_type == IB_QPT_RC ||
+ qp->qp_type == IB_QPT_XRC_INI ||
+ qp->qp_type == IB_QPT_XRC_TGT);
+}
+
/**
* IB core internal function to perform QP attributes modification.
*/
@@ -1324,8 +1577,53 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+ const struct ib_gid_attr *old_sgid_attr_av;
+ const struct ib_gid_attr *old_sgid_attr_alt_av;
int ret;
+ if (attr_mask & IB_QP_AV) {
+ ret = rdma_fill_sgid_attr(qp->device, &attr->ah_attr,
+ &old_sgid_attr_av);
+ if (ret)
+ return ret;
+ }
+ if (attr_mask & IB_QP_ALT_PATH) {
+ /*
+ * FIXME: This does not track the migration state, so if the
+ * user loads a new alternate path after the HW has migrated
+ * from primary->alternate we will keep the wrong
+ * references. This is OK for IB because the reference
+ * counting does not serve any functional purpose.
+ */
+ ret = rdma_fill_sgid_attr(qp->device, &attr->alt_ah_attr,
+ &old_sgid_attr_alt_av);
+ if (ret)
+ goto out_av;
+
+ /*
+ * Today the core code can only handle alternate paths and APM
+ * for IB. Ban them in roce mode.
+ */
+ if (!(rdma_protocol_ib(qp->device,
+ attr->alt_ah_attr.port_num) &&
+ rdma_protocol_ib(qp->device, port))) {
+ ret = EINVAL;
+ goto out;
+ }
+ }
+
+ /*
+ * If the user provided the qp_attr then we have to resolve it. Kernel
+ * users have to provide already resolved rdma_ah_attr's
+ */
+ if (udata && (attr_mask & IB_QP_AV) &&
+ attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
+ is_qp_type_connected(qp)) {
+ ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
+ if (ret)
+ goto out;
+ }
+
if (rdma_ib_or_roce(qp->device, port)) {
if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
pr_warn("%s: %s rq_psn overflow, masking to 24 bits\n",
@@ -1341,20 +1639,27 @@ static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
}
ret = ib_security_modify_qp(qp, attr, attr_mask, udata);
- if (!ret && (attr_mask & IB_QP_PORT))
- qp->port = attr->port_num;
+ if (ret)
+ goto out;
+ if (attr_mask & IB_QP_PORT)
+ qp->port = attr->port_num;
+ if (attr_mask & IB_QP_AV)
+ qp->av_sgid_attr =
+ rdma_update_sgid_attr(&attr->ah_attr, qp->av_sgid_attr);
+ if (attr_mask & IB_QP_ALT_PATH)
+ qp->alt_path_sgid_attr = rdma_update_sgid_attr(
+ &attr->alt_ah_attr, qp->alt_path_sgid_attr);
+
+out:
+ if (attr_mask & IB_QP_ALT_PATH)
+ rdma_unfill_sgid_attr(&attr->alt_ah_attr, old_sgid_attr_alt_av);
+out_av:
+ if (attr_mask & IB_QP_AV)
+ rdma_unfill_sgid_attr(&attr->ah_attr, old_sgid_attr_av);
return ret;
}
-static bool is_qp_type_connected(const struct ib_qp *qp)
-{
- return (qp->qp_type == IB_QPT_UC ||
- qp->qp_type == IB_QPT_RC ||
- qp->qp_type == IB_QPT_XRC_INI ||
- qp->qp_type == IB_QPT_XRC_TGT);
-}
-
/**
* ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
* @ib_qp: The QP to modify.
@@ -1369,17 +1674,7 @@ static bool is_qp_type_connected(const struct ib_qp *qp)
int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata)
{
- struct ib_qp *qp = ib_qp->real_qp;
- int ret;
-
- if (attr_mask & IB_QP_AV &&
- attr->ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE &&
- is_qp_type_connected(qp)) {
- ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
- if (ret)
- return ret;
- }
- return _ib_modify_qp(qp, attr, attr_mask, udata);
+ return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
}
EXPORT_SYMBOL(ib_modify_qp_with_udata);
@@ -1451,6 +1746,9 @@ int ib_query_qp(struct ib_qp *qp,
int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr)
{
+ qp_attr->ah_attr.grh.sgid_attr = NULL;
+ qp_attr->alt_ah_attr.grh.sgid_attr = NULL;
+
return qp->device->query_qp ?
qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
-EOPNOTSUPP;
@@ -1509,6 +1807,8 @@ static int __ib_destroy_shared_qp(struct ib_qp *qp)
int ib_destroy_qp(struct ib_qp *qp)
{
+ const struct ib_gid_attr *alt_path_sgid_attr = qp->alt_path_sgid_attr;
+ const struct ib_gid_attr *av_sgid_attr = qp->av_sgid_attr;
struct ib_pd *pd;
struct ib_cq *scq, *rcq;
struct ib_srq *srq;
@@ -1539,6 +1839,10 @@ int ib_destroy_qp(struct ib_qp *qp)
rdma_restrack_del(&qp->res);
ret = qp->device->destroy_qp(qp);
if (!ret) {
+ if (alt_path_sgid_attr)
+ rdma_put_gid_attr(alt_path_sgid_attr);
+ if (av_sgid_attr)
+ rdma_put_gid_attr(av_sgid_attr);
if (pd)
atomic_dec(&pd->usecnt);
if (scq)
@@ -1975,35 +2279,6 @@ int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
}
EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr,
- int domain)
-{
- struct ib_flow *flow_id;
- if (!qp->device->create_flow)
- return ERR_PTR(-EOPNOTSUPP);
-
- flow_id = qp->device->create_flow(qp, flow_attr, domain, NULL);
- if (!IS_ERR(flow_id)) {
- atomic_inc(&qp->usecnt);
- flow_id->qp = qp;
- }
- return flow_id;
-}
-EXPORT_SYMBOL(ib_create_flow);
-
-int ib_destroy_flow(struct ib_flow *flow_id)
-{
- int err;
- struct ib_qp *qp = flow_id->qp;
-
- err = qp->device->destroy_flow(flow_id);
- if (!err)
- atomic_dec(&qp->usecnt);
- return err;
-}
-EXPORT_SYMBOL(ib_destroy_flow);
-
int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
struct ib_mr_status *mr_status)
{
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index a76e206704d4..dd800d153aa2 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -166,7 +166,8 @@ int bnxt_re_query_device(struct ib_device *ibdev,
| IB_DEVICE_MEM_WINDOW
| IB_DEVICE_MEM_WINDOW_TYPE_2B
| IB_DEVICE_MEM_MGT_EXTENSIONS;
- ib_attr->max_sge = dev_attr->max_qp_sges;
+ ib_attr->max_send_sge = dev_attr->max_qp_sges;
+ ib_attr->max_recv_sge = dev_attr->max_qp_sges;
ib_attr->max_sge_rd = dev_attr->max_qp_sges;
ib_attr->max_cq = dev_attr->max_cq;
ib_attr->max_cqe = dev_attr->max_cq_wqes;
@@ -243,8 +244,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
port_attr->gid_tbl_len = dev_attr->max_sgid;
port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
IB_PORT_DEVICE_MGMT_SUP |
- IB_PORT_VENDOR_CLASS_SUP |
- IB_PORT_IP_BASED_GIDS;
+ IB_PORT_VENDOR_CLASS_SUP;
+ port_attr->ip_gids = true;
port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW;
port_attr->bad_pkey_cntr = 0;
@@ -364,8 +365,7 @@ int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context)
return rc;
}
-int bnxt_re_add_gid(const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context)
+int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context)
{
int rc;
u32 tbl_idx = 0;
@@ -377,7 +377,7 @@ int bnxt_re_add_gid(const union ib_gid *gid,
if ((attr->ndev) && is_vlan_dev(attr->ndev))
vlan_id = vlan_dev_vlan_id(attr->ndev);
- rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid,
+ rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)&attr->gid,
rdev->qplib_res.netdev->dev_addr,
vlan_id, true, &tbl_idx);
if (rc == -EALREADY) {
@@ -673,8 +673,6 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
int rc;
u8 nw_type;
- struct ib_gid_attr sgid_attr;
-
if (!(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) {
dev_err(rdev_to_dev(rdev), "Failed to alloc AH: GRH not set");
return ERR_PTR(-EINVAL);
@@ -705,20 +703,11 @@ struct ib_ah *bnxt_re_create_ah(struct ib_pd *ib_pd,
grh->dgid.raw) &&
!rdma_link_local_addr((struct in6_addr *)
grh->dgid.raw)) {
- union ib_gid sgid;
+ const struct ib_gid_attr *sgid_attr;
- rc = ib_get_cached_gid(&rdev->ibdev, 1,
- grh->sgid_index, &sgid,
- &sgid_attr);
- if (rc) {
- dev_err(rdev_to_dev(rdev),
- "Failed to query gid at index %d",
- grh->sgid_index);
- goto fail;
- }
- dev_put(sgid_attr.ndev);
+ sgid_attr = grh->sgid_attr;
/* Get network header type for this GID */
- nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ nw_type = rdma_gid_attr_network_type(sgid_attr);
switch (nw_type) {
case RDMA_NETWORK_IPV4:
ah->qplib_ah.nw_type = CMDQ_CREATE_AH_TYPE_V2IPV4;
@@ -1599,9 +1588,6 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
enum ib_qp_state curr_qp_state, new_qp_state;
int rc, entries;
- int status;
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
unsigned int flags;
u8 nw_type;
@@ -1668,6 +1654,7 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
if (qp_attr_mask & IB_QP_AV) {
const struct ib_global_route *grh =
rdma_ah_read_grh(&qp_attr->ah_attr);
+ const struct ib_gid_attr *sgid_attr;
qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_DGID |
CMDQ_MODIFY_QP_MODIFY_MASK_FLOW_LABEL |
@@ -1691,29 +1678,23 @@ int bnxt_re_modify_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
ether_addr_copy(qp->qplib_qp.ah.dmac,
qp_attr->ah_attr.roce.dmac);
- status = ib_get_cached_gid(&rdev->ibdev, 1,
- grh->sgid_index,
- &sgid, &sgid_attr);
- if (!status) {
- memcpy(qp->qplib_qp.smac, sgid_attr.ndev->dev_addr,
- ETH_ALEN);
- dev_put(sgid_attr.ndev);
- nw_type = ib_gid_to_network_type(sgid_attr.gid_type,
- &sgid);
- switch (nw_type) {
- case RDMA_NETWORK_IPV4:
- qp->qplib_qp.nw_type =
- CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
- break;
- case RDMA_NETWORK_IPV6:
- qp->qplib_qp.nw_type =
- CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
- break;
- default:
- qp->qplib_qp.nw_type =
- CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
- break;
- }
+ sgid_attr = qp_attr->ah_attr.grh.sgid_attr;
+ memcpy(qp->qplib_qp.smac, sgid_attr->ndev->dev_addr,
+ ETH_ALEN);
+ nw_type = rdma_gid_attr_network_type(sgid_attr);
+ switch (nw_type) {
+ case RDMA_NETWORK_IPV4:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV4;
+ break;
+ case RDMA_NETWORK_IPV6:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV2_IPV6;
+ break;
+ default:
+ qp->qplib_qp.nw_type =
+ CMDQ_MODIFY_QP_NETWORK_TYPE_ROCEV1;
+ break;
}
}
@@ -1899,15 +1880,13 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
struct bnxt_qplib_swqe *wqe,
int payload_size)
{
- struct ib_device *ibdev = &qp->rdev->ibdev;
struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah,
ib_ah);
struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah;
+ const struct ib_gid_attr *sgid_attr = ah->ib_ah.sgid_attr;
struct bnxt_qplib_sge sge;
- union ib_gid sgid;
u8 nw_type;
u16 ether_type;
- struct ib_gid_attr sgid_attr;
union ib_gid dgid;
bool is_eth = false;
bool is_vlan = false;
@@ -1920,22 +1899,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
- rc = ib_get_cached_gid(ibdev, 1,
- qplib_ah->host_sgid_index, &sgid,
- &sgid_attr);
- if (rc) {
- dev_err(rdev_to_dev(qp->rdev),
- "Failed to query gid at index %d",
- qplib_ah->host_sgid_index);
- return rc;
- }
- if (sgid_attr.ndev) {
- if (is_vlan_dev(sgid_attr.ndev))
- vlan_id = vlan_dev_vlan_id(sgid_attr.ndev);
- dev_put(sgid_attr.ndev);
- }
+ if (is_vlan_dev(sgid_attr->ndev))
+ vlan_id = vlan_dev_vlan_id(sgid_attr->ndev);
/* Get network header type for this GID */
- nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ nw_type = rdma_gid_attr_network_type(sgid_attr);
switch (nw_type) {
case RDMA_NETWORK_IPV4:
nw_type = BNXT_RE_ROCEV2_IPV4_PACKET;
@@ -1948,9 +1915,9 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
break;
}
memcpy(&dgid.raw, &qplib_ah->dgid, 16);
- is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+ is_udp = sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
if (is_udp) {
- if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+ if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) {
ip_version = 4;
ether_type = ETH_P_IP;
} else {
@@ -1983,9 +1950,10 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
}
if (is_grh || (ip_version == 6)) {
- memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid));
+ memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid_attr->gid.raw,
+ sizeof(sgid_attr->gid));
memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data,
- sizeof(sgid));
+ sizeof(sgid_attr->gid));
qp->qp1_hdr.grh.hop_limit = qplib_ah->hop_limit;
}
@@ -1995,7 +1963,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
qp->qp1_hdr.ip4.frag_off = htons(IP_DF);
qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit;
- memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4);
+ memcpy(&qp->qp1_hdr.ip4.saddr, sgid_attr->gid.raw + 12, 4);
memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4);
qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr);
}
@@ -2441,7 +2409,7 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
default:
break;
}
- /* Fall thru to build the wqe */
+ /* fall through */
case IB_WR_SEND_WITH_INV:
rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
break;
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
index 5c6414cad4af..bd04d40d897a 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h
@@ -158,8 +158,7 @@ void bnxt_re_query_fw_str(struct ib_device *ibdev, char *str);
int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
u16 index, u16 *pkey);
int bnxt_re_del_gid(const struct ib_gid_attr *attr, void **context);
-int bnxt_re_add_gid(const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context);
+int bnxt_re_add_gid(const struct ib_gid_attr *attr, void **context);
int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
int index, union ib_gid *gid);
enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 50d8f1fc98d5..e426b990c1dd 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -2354,7 +2354,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
srq = qp->srq;
if (!srq)
return -EINVAL;
- if (wr_id_idx > srq->hwq.max_elements) {
+ if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process RC ");
dev_err(&cq->hwq.pdev->dev,
@@ -2369,7 +2369,7 @@ static int bnxt_qplib_cq_process_res_rc(struct bnxt_qplib_cq *cq,
*pcqe = cqe;
} else {
rq = &qp->rq;
- if (wr_id_idx > rq->hwq.max_elements) {
+ if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process RC ");
dev_err(&cq->hwq.pdev->dev,
@@ -2437,7 +2437,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
if (!srq)
return -EINVAL;
- if (wr_id_idx > srq->hwq.max_elements) {
+ if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process UD ");
dev_err(&cq->hwq.pdev->dev,
@@ -2452,7 +2452,7 @@ static int bnxt_qplib_cq_process_res_ud(struct bnxt_qplib_cq *cq,
*pcqe = cqe;
} else {
rq = &qp->rq;
- if (wr_id_idx > rq->hwq.max_elements) {
+ if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process UD ");
dev_err(&cq->hwq.pdev->dev,
@@ -2546,7 +2546,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
"QPLIB: FP: SRQ used but not defined??");
return -EINVAL;
}
- if (wr_id_idx > srq->hwq.max_elements) {
+ if (wr_id_idx >= srq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Raw/QP1 ");
dev_err(&cq->hwq.pdev->dev,
@@ -2561,7 +2561,7 @@ static int bnxt_qplib_cq_process_res_raweth_qp1(struct bnxt_qplib_cq *cq,
*pcqe = cqe;
} else {
rq = &qp->rq;
- if (wr_id_idx > rq->hwq.max_elements) {
+ if (wr_id_idx >= rq->hwq.max_elements) {
dev_err(&cq->hwq.pdev->dev,
"QPLIB: FP: CQ Process Raw/QP1 RQ wr_id ");
dev_err(&cq->hwq.pdev->dev,
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
index 2f3f32eaa1d5..4097f3fa25c5 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c
@@ -197,7 +197,7 @@ int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
struct bnxt_qplib_gid *gid)
{
- if (index > sgid_tbl->max) {
+ if (index >= sgid_tbl->max) {
dev_err(&res->pdev->dev,
"QPLIB: Index %d exceeded SGID table max (%d)",
index, sgid_tbl->max);
@@ -402,7 +402,7 @@ int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
*pkey = 0xFFFF;
return 0;
}
- if (index > pkey_tbl->max) {
+ if (index >= pkey_tbl->max) {
dev_err(&res->pdev->dev,
"QPLIB: Index %d exceeded PKEY table max (%d)",
index, pkey_tbl->max);
diff --git a/drivers/infiniband/hw/cxgb3/iwch_cq.c b/drivers/infiniband/hw/cxgb3/iwch_cq.c
index 0a8542c20804..a098c0140580 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_cq.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_cq.c
@@ -32,38 +32,16 @@
#include "iwch_provider.h"
#include "iwch.h"
-/*
- * Get one cq entry from cxio and map it to openib.
- *
- * Returns:
- * 0 EMPTY;
- * 1 cqe returned
- * -EAGAIN caller must try again
- * any other -errno fatal error
- */
-static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
- struct ib_wc *wc)
+static int __iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+ struct iwch_qp *qhp, struct ib_wc *wc)
{
- struct iwch_qp *qhp = NULL;
- struct t3_cqe cqe, *rd_cqe;
- struct t3_wq *wq;
+ struct t3_wq *wq = qhp ? &qhp->wq : NULL;
+ struct t3_cqe cqe;
u32 credit = 0;
u8 cqe_flushed;
u64 cookie;
int ret = 1;
- rd_cqe = cxio_next_cqe(&chp->cq);
-
- if (!rd_cqe)
- return 0;
-
- qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
- if (!qhp)
- wq = NULL;
- else {
- spin_lock(&qhp->lock);
- wq = &(qhp->wq);
- }
ret = cxio_poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie,
&credit);
if (t3a_device(chp->rhp) && credit) {
@@ -79,7 +57,7 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
ret = 1;
wc->wr_id = cookie;
- wc->qp = &qhp->ibqp;
+ wc->qp = qhp ? &qhp->ibqp : NULL;
wc->vendor_err = CQE_STATUS(cqe);
wc->wc_flags = 0;
@@ -182,8 +160,38 @@ static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
}
}
out:
- if (wq)
+ return ret;
+}
+
+/*
+ * Get one cq entry from cxio and map it to openib.
+ *
+ * Returns:
+ * 0 EMPTY;
+ * 1 cqe returned
+ * -EAGAIN caller must try again
+ * any other -errno fatal error
+ */
+static int iwch_poll_cq_one(struct iwch_dev *rhp, struct iwch_cq *chp,
+ struct ib_wc *wc)
+{
+ struct iwch_qp *qhp;
+ struct t3_cqe *rd_cqe;
+ int ret;
+
+ rd_cqe = cxio_next_cqe(&chp->cq);
+
+ if (!rd_cqe)
+ return 0;
+
+ qhp = get_qhp(rhp, CQE_QPID(*rd_cqe));
+ if (qhp) {
+ spin_lock(&qhp->lock);
+ ret = __iwch_poll_cq_one(rhp, chp, qhp, wc);
spin_unlock(&qhp->lock);
+ } else {
+ ret = __iwch_poll_cq_one(rhp, chp, NULL, wc);
+ }
return ret;
}
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index be097c6723c0..68bc2f9a532f 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1103,7 +1103,8 @@ static int iwch_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
props->max_mr_size = dev->attr.max_mr_size;
props->max_qp = dev->attr.max_qps;
props->max_qp_wr = dev->attr.max_wrs;
- props->max_sge = dev->attr.max_sge_per_wr;
+ props->max_send_sge = dev->attr.max_sge_per_wr;
+ props->max_recv_sge = dev->attr.max_sge_per_wr;
props->max_sge_rd = 1;
props->max_qp_rd_atom = dev->attr.max_rdma_reads_per_qp;
props->max_qp_init_rd_atom = dev->attr.max_rdma_reads_per_qp;
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 0912fa026327..77243f7e17d5 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -3444,9 +3444,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
}
insert_handle(dev, &dev->stid_idr, ep, ep->stid);
- memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
- sizeof(ep->com.local_addr));
-
state_set(&ep->com, LISTEN);
if (ep->com.local_addr.ss_family == AF_INET)
err = create_server4(dev, ep);
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 2be2e1ac1b5f..a055f9f08e76 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -77,6 +77,10 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
int user = (uctx != &rdev->uctx);
int ret;
struct sk_buff *skb;
+ struct c4iw_ucontext *ucontext = NULL;
+
+ if (user)
+ ucontext = container_of(uctx, struct c4iw_ucontext, uctx);
cq->cqid = c4iw_get_cqid(rdev, uctx);
if (!cq->cqid) {
@@ -100,6 +104,16 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
dma_unmap_addr_set(cq, mapping, cq->dma_addr);
memset(cq->queue, 0, cq->memsize);
+ if (user && ucontext->is_32b_cqe) {
+ cq->qp_errp = &((struct t4_status_page *)
+ ((u8 *)cq->queue + (cq->size - 1) *
+ (sizeof(*cq->queue) / 2)))->qp_err;
+ } else {
+ cq->qp_errp = &((struct t4_status_page *)
+ ((u8 *)cq->queue + (cq->size - 1) *
+ sizeof(*cq->queue)))->qp_err;
+ }
+
/* build fw_ri_res_wr */
wr_len = sizeof *res_wr + sizeof *res;
@@ -132,7 +146,9 @@ static int create_cq(struct c4iw_rdev *rdev, struct t4_cq *cq,
FW_RI_RES_WR_IQPCIECH_V(2) |
FW_RI_RES_WR_IQINTCNTTHRESH_V(0) |
FW_RI_RES_WR_IQO_F |
- FW_RI_RES_WR_IQESIZE_V(1));
+ ((user && ucontext->is_32b_cqe) ?
+ FW_RI_RES_WR_IQESIZE_V(1) :
+ FW_RI_RES_WR_IQESIZE_V(2)));
res->u.cq.iqsize = cpu_to_be16(cq->size);
res->u.cq.iqaddr = cpu_to_be64(cq->dma_addr);
@@ -668,43 +684,22 @@ skip_cqe:
return ret;
}
-/*
- * Get one cq entry from c4iw and map it to openib.
- *
- * Returns:
- * 0 cqe returned
- * -ENODATA EMPTY;
- * -EAGAIN caller must try again
- * any other -errno fatal error
- */
-static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+static int __c4iw_poll_cq_one(struct c4iw_cq *chp, struct c4iw_qp *qhp,
+ struct ib_wc *wc)
{
- struct c4iw_qp *qhp = NULL;
- struct t4_cqe uninitialized_var(cqe), *rd_cqe;
- struct t4_wq *wq;
+ struct t4_cqe uninitialized_var(cqe);
+ struct t4_wq *wq = qhp ? &qhp->wq : NULL;
u32 credit = 0;
u8 cqe_flushed;
u64 cookie = 0;
int ret;
- ret = t4_next_cqe(&chp->cq, &rd_cqe);
-
- if (ret)
- return ret;
-
- qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
- if (!qhp)
- wq = NULL;
- else {
- spin_lock(&qhp->lock);
- wq = &(qhp->wq);
- }
ret = poll_cq(wq, &(chp->cq), &cqe, &cqe_flushed, &cookie, &credit);
if (ret)
goto out;
wc->wr_id = cookie;
- wc->qp = &qhp->ibqp;
+ wc->qp = qhp ? &qhp->ibqp : NULL;
wc->vendor_err = CQE_STATUS(&cqe);
wc->wc_flags = 0;
@@ -819,8 +814,37 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
}
}
out:
- if (wq)
+ return ret;
+}
+
+/*
+ * Get one cq entry from c4iw and map it to openib.
+ *
+ * Returns:
+ * 0 cqe returned
+ * -ENODATA EMPTY;
+ * -EAGAIN caller must try again
+ * any other -errno fatal error
+ */
+static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
+{
+ struct c4iw_qp *qhp = NULL;
+ struct t4_cqe *rd_cqe;
+ int ret;
+
+ ret = t4_next_cqe(&chp->cq, &rd_cqe);
+
+ if (ret)
+ return ret;
+
+ qhp = get_qhp(chp->rhp, CQE_QPID(rd_cqe));
+ if (qhp) {
+ spin_lock(&qhp->lock);
+ ret = __c4iw_poll_cq_one(chp, qhp, wc);
spin_unlock(&qhp->lock);
+ } else {
+ ret = __c4iw_poll_cq_one(chp, NULL, wc);
+ }
return ret;
}
@@ -876,6 +900,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
int vector = attr->comp_vector;
struct c4iw_dev *rhp;
struct c4iw_cq *chp;
+ struct c4iw_create_cq ucmd;
struct c4iw_create_cq_resp uresp;
struct c4iw_ucontext *ucontext = NULL;
int ret, wr_len;
@@ -891,9 +916,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
if (vector >= rhp->rdev.lldi.nciq)
return ERR_PTR(-EINVAL);
+ if (ib_context) {
+ ucontext = to_c4iw_ucontext(ib_context);
+ if (udata->inlen < sizeof(ucmd))
+ ucontext->is_32b_cqe = 1;
+ }
+
chp = kzalloc(sizeof(*chp), GFP_KERNEL);
if (!chp)
return ERR_PTR(-ENOMEM);
+
chp->wr_waitp = c4iw_alloc_wr_wait(GFP_KERNEL);
if (!chp->wr_waitp) {
ret = -ENOMEM;
@@ -908,9 +940,6 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
goto err_free_wr_wait;
}
- if (ib_context)
- ucontext = to_c4iw_ucontext(ib_context);
-
/* account for the status page. */
entries++;
@@ -934,13 +963,15 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
if (hwentries < 64)
hwentries = 64;
- memsize = hwentries * sizeof *chp->cq.queue;
+ memsize = hwentries * ((ucontext && ucontext->is_32b_cqe) ?
+ (sizeof(*chp->cq.queue) / 2) : sizeof(*chp->cq.queue));
/*
* memsize must be a multiple of the page size if its a user cq.
*/
if (ucontext)
memsize = roundup(memsize, PAGE_SIZE);
+
chp->cq.size = hwentries;
chp->cq.memsize = memsize;
chp->cq.vector = vector;
@@ -971,6 +1002,7 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
if (!mm2)
goto err_free_mm;
+ memset(&uresp, 0, sizeof(uresp));
uresp.qid_mask = rhp->rdev.cqmask;
uresp.cqid = chp->cq.cqid;
uresp.size = chp->cq.size;
@@ -980,9 +1012,16 @@ struct ib_cq *c4iw_create_cq(struct ib_device *ibdev,
ucontext->key += PAGE_SIZE;
uresp.gts_key = ucontext->key;
ucontext->key += PAGE_SIZE;
+ /* communicate to the userspace that
+ * kernel driver supports 64B CQE
+ */
+ uresp.flags |= C4IW_64B_CQE;
+
spin_unlock(&ucontext->mmap_lock);
ret = ib_copy_to_udata(udata, &uresp,
- sizeof(uresp) - sizeof(uresp.reserved));
+ ucontext->is_32b_cqe ?
+ sizeof(uresp) - sizeof(uresp.flags) :
+ sizeof(uresp));
if (ret)
goto err_free_mm2;
diff --git a/drivers/infiniband/hw/cxgb4/ev.c b/drivers/infiniband/hw/cxgb4/ev.c
index 3e9d8b277ab9..8741d23168f3 100644
--- a/drivers/infiniband/hw/cxgb4/ev.c
+++ b/drivers/infiniband/hw/cxgb4/ev.c
@@ -70,9 +70,10 @@ static void dump_err_cqe(struct c4iw_dev *dev, struct t4_cqe *err_cqe)
CQE_STATUS(err_cqe), CQE_TYPE(err_cqe), ntohl(err_cqe->len),
CQE_WRID_HI(err_cqe), CQE_WRID_LOW(err_cqe));
- pr_debug("%016llx %016llx %016llx %016llx\n",
+ pr_debug("%016llx %016llx %016llx %016llx - %016llx %016llx %016llx %016llx\n",
be64_to_cpu(p[0]), be64_to_cpu(p[1]), be64_to_cpu(p[2]),
- be64_to_cpu(p[3]));
+ be64_to_cpu(p[3]), be64_to_cpu(p[4]), be64_to_cpu(p[5]),
+ be64_to_cpu(p[6]), be64_to_cpu(p[7]));
/*
* Ingress WRITE and READ_RESP errors provide
diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
index 870649ff049c..8866bf992316 100644
--- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
+++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h
@@ -566,6 +566,7 @@ struct c4iw_ucontext {
spinlock_t mmap_lock;
struct list_head mmaps;
struct kref kref;
+ bool is_32b_cqe;
};
static inline struct c4iw_ucontext *to_c4iw_ucontext(struct ib_ucontext *c)
diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c
index 1feade8bb4b3..61b8bdb9423d 100644
--- a/drivers/infiniband/hw/cxgb4/provider.c
+++ b/drivers/infiniband/hw/cxgb4/provider.c
@@ -343,7 +343,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
props->max_mr_size = T4_MAX_MR_SIZE;
props->max_qp = dev->rdev.lldi.vr->qp.size / 2;
props->max_qp_wr = dev->rdev.hw_queue.t4_max_qp_depth;
- props->max_sge = T4_MAX_RECV_SGE;
+ props->max_send_sge = min(T4_MAX_SEND_SGE, T4_MAX_WRITE_SGE);
+ props->max_recv_sge = T4_MAX_RECV_SGE;
props->max_sge_rd = 1;
props->max_res_rd_atom = dev->rdev.lldi.max_ird_adapter;
props->max_qp_rd_atom = min(dev->rdev.lldi.max_ordird_qp,
diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h
index 8369c7c8de83..838a7dee48bd 100644
--- a/drivers/infiniband/hw/cxgb4/t4.h
+++ b/drivers/infiniband/hw/cxgb4/t4.h
@@ -179,9 +179,20 @@ struct t4_cqe {
__be32 wrid_hi;
__be32 wrid_low;
} gen;
+ struct {
+ __be32 stag;
+ __be32 msn;
+ __be32 reserved;
+ __be32 abs_rqe_idx;
+ } srcqe;
+ struct {
+ __be64 imm_data;
+ } imm_data_rcqe;
+
u64 drain_cookie;
+ __be64 flits[3];
} u;
- __be64 reserved;
+ __be64 reserved[3];
__be64 bits_type_ts;
};
@@ -565,6 +576,7 @@ struct t4_cq {
u16 cidx_inc;
u8 gen;
u8 error;
+ u8 *qp_errp;
unsigned long flags;
};
@@ -698,12 +710,12 @@ static inline int t4_next_cqe(struct t4_cq *cq, struct t4_cqe **cqe)
static inline int t4_cq_in_error(struct t4_cq *cq)
{
- return ((struct t4_status_page *)&cq->queue[cq->size])->qp_err;
+ return *cq->qp_errp;
}
static inline void t4_set_cq_in_error(struct t4_cq *cq)
{
- ((struct t4_status_page *)&cq->queue[cq->size])->qp_err = 1;
+ *cq->qp_errp = 1;
}
#endif
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 6deb101cdd43..2c19bf772451 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -8143,8 +8143,15 @@ static void is_sdma_eng_int(struct hfi1_devdata *dd, unsigned int source)
}
}
-/*
+/**
+ * is_rcv_avail_int() - User receive context available IRQ handler
+ * @dd: valid dd
+ * @source: logical IRQ source (offset from IS_RCVAVAIL_START)
+ *
* RX block receive available interrupt. Source is < 160.
+ *
+ * This is the general interrupt handler for user (PSM) receive contexts,
+ * and can only be used for non-threaded IRQs.
*/
static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
{
@@ -8154,12 +8161,7 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
if (likely(source < dd->num_rcv_contexts)) {
rcd = hfi1_rcd_get_by_index(dd, source);
if (rcd) {
- /* Check for non-user contexts, including vnic */
- if (source < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
- rcd->do_interrupt(rcd, 0);
- else
- handle_user_interrupt(rcd);
-
+ handle_user_interrupt(rcd);
hfi1_rcd_put(rcd);
return; /* OK */
}
@@ -8173,8 +8175,14 @@ static void is_rcv_avail_int(struct hfi1_devdata *dd, unsigned int source)
err_detail, source);
}
-/*
+/**
+ * is_rcv_urgent_int() - User receive context urgent IRQ handler
+ * @dd: valid dd
+ * @source: logical IRQ source (ofse from IS_RCVURGENT_START)
+ *
* RX block receive urgent interrupt. Source is < 160.
+ *
+ * NOTE: kernel receive contexts specifically do NOT enable this IRQ.
*/
static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
{
@@ -8184,11 +8192,7 @@ static void is_rcv_urgent_int(struct hfi1_devdata *dd, unsigned int source)
if (likely(source < dd->num_rcv_contexts)) {
rcd = hfi1_rcd_get_by_index(dd, source);
if (rcd) {
- /* only pay attention to user urgent interrupts */
- if (source >= dd->first_dyn_alloc_ctxt &&
- !rcd->is_vnic)
- handle_user_interrupt(rcd);
-
+ handle_user_interrupt(rcd);
hfi1_rcd_put(rcd);
return; /* OK */
}
@@ -8260,9 +8264,14 @@ static void is_interrupt(struct hfi1_devdata *dd, unsigned int source)
dd_dev_err(dd, "invalid interrupt source %u\n", source);
}
-/*
- * General interrupt handler. This is able to correctly handle
- * all interrupts in case INTx is used.
+/**
+ * gerneral_interrupt() - General interrupt handler
+ * @irq: MSIx IRQ vector
+ * @data: hfi1 devdata
+ *
+ * This is able to correctly handle all non-threaded interrupts. Receive
+ * context DATA IRQs are threaded and are not supported by this handler.
+ *
*/
static irqreturn_t general_interrupt(int irq, void *data)
{
@@ -10130,7 +10139,7 @@ static void set_lidlmc(struct hfi1_pportdata *ppd)
(((lid & mask) & SEND_CTXT_CHECK_SLID_VALUE_MASK) <<
SEND_CTXT_CHECK_SLID_VALUE_SHIFT);
- for (i = 0; i < dd->chip_send_contexts; i++) {
+ for (i = 0; i < chip_send_contexts(dd); i++) {
hfi1_cdbg(LINKVERB, "SendContext[%d].SLID_CHECK = 0x%x",
i, (u32)sreg);
write_kctxt_csr(dd, i, SEND_CTXT_CHECK_SLID, sreg);
@@ -11857,7 +11866,7 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
* sequence numbers could land exactly on the same spot.
* E.g. a rcd restart before the receive header wrapped.
*/
- memset(rcd->rcvhdrq, 0, rcd->rcvhdrq_size);
+ memset(rcd->rcvhdrq, 0, rcvhdrq_size(rcd));
/* starting timeout */
rcd->rcvavail_timeout = dd->rcv_intr_timeout_csr;
@@ -11952,9 +11961,8 @@ void hfi1_rcvctrl(struct hfi1_devdata *dd, unsigned int op,
rcvctrl |= RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
if (op & HFI1_RCVCTRL_NO_EGR_DROP_DIS)
rcvctrl &= ~RCV_CTXT_CTRL_DONT_DROP_EGR_FULL_SMASK;
- rcd->rcvctrl = rcvctrl;
hfi1_cdbg(RCVCTRL, "ctxt %d rcvctrl 0x%llx\n", ctxt, rcvctrl);
- write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcd->rcvctrl);
+ write_kctxt_csr(dd, ctxt, RCV_CTXT_CTRL, rcvctrl);
/* work around sticky RcvCtxtStatus.BlockedRHQFull */
if (did_enable &&
@@ -12042,7 +12050,7 @@ u32 hfi1_read_cntrs(struct hfi1_devdata *dd, char **namep, u64 **cntrp)
} else if (entry->flags & CNTR_SDMA) {
hfi1_cdbg(CNTR,
"\t Per SDMA Engine\n");
- for (j = 0; j < dd->chip_sdma_engines;
+ for (j = 0; j < chip_sdma_engines(dd);
j++) {
val =
entry->rw_cntr(entry, dd, j,
@@ -12418,6 +12426,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
struct hfi1_pportdata *ppd;
const char *bit_type_32 = ",32";
const int bit_type_32_sz = strlen(bit_type_32);
+ u32 sdma_engines = chip_sdma_engines(dd);
/* set up the stats timer; the add_timer is done at the end */
timer_setup(&dd->synth_stats_timer, update_synth_timer, 0);
@@ -12450,7 +12459,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
}
} else if (dev_cntrs[i].flags & CNTR_SDMA) {
dev_cntrs[i].offset = dd->ndevcntrs;
- for (j = 0; j < dd->chip_sdma_engines; j++) {
+ for (j = 0; j < sdma_engines; j++) {
snprintf(name, C_MAX_NAME, "%s%d",
dev_cntrs[i].name, j);
sz += strlen(name);
@@ -12507,7 +12516,7 @@ static int init_cntrs(struct hfi1_devdata *dd)
*p++ = '\n';
}
} else if (dev_cntrs[i].flags & CNTR_SDMA) {
- for (j = 0; j < dd->chip_sdma_engines; j++) {
+ for (j = 0; j < sdma_engines; j++) {
snprintf(name, C_MAX_NAME, "%s%d",
dev_cntrs[i].name, j);
memcpy(p, name, strlen(name));
@@ -13020,9 +13029,9 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, SEND_PIO_ERR_CLEAR, ~(u64)0);
write_csr(dd, SEND_DMA_ERR_CLEAR, ~(u64)0);
write_csr(dd, SEND_EGRESS_ERR_CLEAR, ~(u64)0);
- for (i = 0; i < dd->chip_send_contexts; i++)
+ for (i = 0; i < chip_send_contexts(dd); i++)
write_kctxt_csr(dd, i, SEND_CTXT_ERR_CLEAR, ~(u64)0);
- for (i = 0; i < dd->chip_sdma_engines; i++)
+ for (i = 0; i < chip_sdma_engines(dd); i++)
write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_CLEAR, ~(u64)0);
write_csr(dd, DCC_ERR_FLG_CLR, ~(u64)0);
@@ -13030,48 +13039,30 @@ static void clear_all_interrupts(struct hfi1_devdata *dd)
write_csr(dd, DC_DC8051_ERR_CLR, ~(u64)0);
}
-/* Move to pcie.c? */
-static void disable_intx(struct pci_dev *pdev)
-{
- pci_intx(pdev, 0);
-}
-
/**
* hfi1_clean_up_interrupts() - Free all IRQ resources
* @dd: valid device data data structure
*
- * Free the MSI or INTx IRQs and assoicated PCI resources,
- * if they have been allocated.
+ * Free the MSIx and assoicated PCI resources, if they have been allocated.
*/
void hfi1_clean_up_interrupts(struct hfi1_devdata *dd)
{
int i;
+ struct hfi1_msix_entry *me = dd->msix_entries;
/* remove irqs - must happen before disabling/turning off */
- if (dd->num_msix_entries) {
- /* MSI-X */
- struct hfi1_msix_entry *me = dd->msix_entries;
-
- for (i = 0; i < dd->num_msix_entries; i++, me++) {
- if (!me->arg) /* => no irq, no affinity */
- continue;
- hfi1_put_irq_affinity(dd, me);
- pci_free_irq(dd->pcidev, i, me->arg);
- }
-
- /* clean structures */
- kfree(dd->msix_entries);
- dd->msix_entries = NULL;
- dd->num_msix_entries = 0;
- } else {
- /* INTx */
- if (dd->requested_intx_irq) {
- pci_free_irq(dd->pcidev, 0, dd);
- dd->requested_intx_irq = 0;
- }
- disable_intx(dd->pcidev);
+ for (i = 0; i < dd->num_msix_entries; i++, me++) {
+ if (!me->arg) /* => no irq, no affinity */
+ continue;
+ hfi1_put_irq_affinity(dd, me);
+ pci_free_irq(dd->pcidev, i, me->arg);
}
+ /* clean structures */
+ kfree(dd->msix_entries);
+ dd->msix_entries = NULL;
+ dd->num_msix_entries = 0;
+
pci_free_irq_vectors(dd->pcidev);
}
@@ -13121,20 +13112,6 @@ static void remap_sdma_interrupts(struct hfi1_devdata *dd,
msix_intr);
}
-static int request_intx_irq(struct hfi1_devdata *dd)
-{
- int ret;
-
- ret = pci_request_irq(dd->pcidev, 0, general_interrupt, NULL, dd,
- DRIVER_NAME "_%d", dd->unit);
- if (ret)
- dd_dev_err(dd, "unable to request INTx interrupt, err %d\n",
- ret);
- else
- dd->requested_intx_irq = 1;
- return ret;
-}
-
static int request_msix_irqs(struct hfi1_devdata *dd)
{
int first_general, last_general;
@@ -13253,11 +13230,6 @@ void hfi1_vnic_synchronize_irq(struct hfi1_devdata *dd)
{
int i;
- if (!dd->num_msix_entries) {
- synchronize_irq(pci_irq_vector(dd->pcidev, 0));
- return;
- }
-
for (i = 0; i < dd->vnic.num_ctxt; i++) {
struct hfi1_ctxtdata *rcd = dd->vnic.ctxt[i];
struct hfi1_msix_entry *me = &dd->msix_entries[rcd->msix_intr];
@@ -13346,7 +13318,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
{
u32 total;
int ret, request;
- int single_interrupt = 0; /* we expect to have all the interrupts */
/*
* Interrupt count:
@@ -13363,17 +13334,6 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
if (request < 0) {
ret = request;
goto fail;
- } else if (request == 0) {
- /* using INTx */
- /* dd->num_msix_entries already zero */
- single_interrupt = 1;
- dd_dev_err(dd, "MSI-X failed, using INTx interrupts\n");
- } else if (request < total) {
- /* using MSI-X, with reduced interrupts */
- dd_dev_err(dd, "reduced interrupt found, wanted %u, got %u\n",
- total, request);
- ret = -EINVAL;
- goto fail;
} else {
dd->msix_entries = kcalloc(total, sizeof(*dd->msix_entries),
GFP_KERNEL);
@@ -13394,10 +13354,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
/* reset general handler mask, chip MSI-X mappings */
reset_interrupts(dd);
- if (single_interrupt)
- ret = request_intx_irq(dd);
- else
- ret = request_msix_irqs(dd);
+ ret = request_msix_irqs(dd);
if (ret)
goto fail;
@@ -13429,6 +13386,8 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
int qos_rmt_count;
int user_rmt_reduced;
u32 n_usr_ctxts;
+ u32 send_contexts = chip_send_contexts(dd);
+ u32 rcv_contexts = chip_rcv_contexts(dd);
/*
* Kernel receive contexts:
@@ -13450,16 +13409,16 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
* Every kernel receive context needs an ACK send context.
* one send context is allocated for each VL{0-7} and VL15
*/
- if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
+ if (num_kernel_contexts > (send_contexts - num_vls - 1)) {
dd_dev_err(dd,
"Reducing # kernel rcv contexts to: %d, from %lu\n",
- (int)(dd->chip_send_contexts - num_vls - 1),
+ send_contexts - num_vls - 1,
num_kernel_contexts);
- num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
+ num_kernel_contexts = send_contexts - num_vls - 1;
}
/* Accommodate VNIC contexts if possible */
- if ((num_kernel_contexts + num_vnic_contexts) > dd->chip_rcv_contexts) {
+ if ((num_kernel_contexts + num_vnic_contexts) > rcv_contexts) {
dd_dev_err(dd, "No receive contexts available for VNIC\n");
num_vnic_contexts = 0;
}
@@ -13477,13 +13436,13 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
/*
* Adjust the counts given a global max.
*/
- if (total_contexts + n_usr_ctxts > dd->chip_rcv_contexts) {
+ if (total_contexts + n_usr_ctxts > rcv_contexts) {
dd_dev_err(dd,
"Reducing # user receive contexts to: %d, from %u\n",
- (int)(dd->chip_rcv_contexts - total_contexts),
+ rcv_contexts - total_contexts,
n_usr_ctxts);
/* recalculate */
- n_usr_ctxts = dd->chip_rcv_contexts - total_contexts;
+ n_usr_ctxts = rcv_contexts - total_contexts;
}
/* each user context requires an entry in the RMT */
@@ -13509,7 +13468,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
dd->freectxts = n_usr_ctxts;
dd_dev_info(dd,
"rcv contexts: chip %d, used %d (kernel %d, vnic %u, user %u)\n",
- (int)dd->chip_rcv_contexts,
+ rcv_contexts,
(int)dd->num_rcv_contexts,
(int)dd->n_krcv_queues,
dd->num_vnic_contexts,
@@ -13527,7 +13486,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
* contexts.
*/
dd->rcv_entries.group_size = RCV_INCREMENT;
- ngroups = dd->chip_rcv_array_count / dd->rcv_entries.group_size;
+ ngroups = chip_rcv_array_count(dd) / dd->rcv_entries.group_size;
dd->rcv_entries.ngroups = ngroups / dd->num_rcv_contexts;
dd->rcv_entries.nctxt_extra = ngroups -
(dd->num_rcv_contexts * dd->rcv_entries.ngroups);
@@ -13552,7 +13511,7 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
dd_dev_info(
dd,
"send contexts: chip %d, used %d (kernel %d, ack %d, user %d, vl15 %d)\n",
- dd->chip_send_contexts,
+ send_contexts,
dd->num_send_contexts,
dd->sc_sizes[SC_KERNEL].count,
dd->sc_sizes[SC_ACK].count,
@@ -13610,7 +13569,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
write_csr(dd, CCE_INT_MAP + (8 * i), 0);
/* SendCtxtCreditReturnAddr */
- for (i = 0; i < dd->chip_send_contexts; i++)
+ for (i = 0; i < chip_send_contexts(dd); i++)
write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
/* PIO Send buffers */
@@ -13623,7 +13582,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
/* RcvHdrAddr */
/* RcvHdrTailAddr */
/* RcvTidFlowTable */
- for (i = 0; i < dd->chip_rcv_contexts; i++) {
+ for (i = 0; i < chip_rcv_contexts(dd); i++) {
write_kctxt_csr(dd, i, RCV_HDR_ADDR, 0);
write_kctxt_csr(dd, i, RCV_HDR_TAIL_ADDR, 0);
for (j = 0; j < RXE_NUM_TID_FLOWS; j++)
@@ -13631,7 +13590,7 @@ static void write_uninitialized_csrs_and_memories(struct hfi1_devdata *dd)
}
/* RcvArray */
- for (i = 0; i < dd->chip_rcv_array_count; i++)
+ for (i = 0; i < chip_rcv_array_count(dd); i++)
hfi1_put_tid(dd, i, PT_INVALID_FLUSH, 0, 0);
/* RcvQPMapTable */
@@ -13789,7 +13748,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
write_csr(dd, SEND_LOW_PRIORITY_LIST + (8 * i), 0);
for (i = 0; i < VL_ARB_HIGH_PRIO_TABLE_SIZE; i++)
write_csr(dd, SEND_HIGH_PRIORITY_LIST + (8 * i), 0);
- for (i = 0; i < dd->chip_send_contexts / NUM_CONTEXTS_PER_SET; i++)
+ for (i = 0; i < chip_send_contexts(dd) / NUM_CONTEXTS_PER_SET; i++)
write_csr(dd, SEND_CONTEXT_SET_CTRL + (8 * i), 0);
for (i = 0; i < TXE_NUM_32_BIT_COUNTER; i++)
write_csr(dd, SEND_COUNTER_ARRAY32 + (8 * i), 0);
@@ -13817,7 +13776,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
/*
* TXE Per-Context CSRs
*/
- for (i = 0; i < dd->chip_send_contexts; i++) {
+ for (i = 0; i < chip_send_contexts(dd); i++) {
write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_CTRL, 0);
write_kctxt_csr(dd, i, SEND_CTXT_CREDIT_RETURN_ADDR, 0);
@@ -13835,7 +13794,7 @@ static void reset_txe_csrs(struct hfi1_devdata *dd)
/*
* TXE Per-SDMA CSRs
*/
- for (i = 0; i < dd->chip_sdma_engines; i++) {
+ for (i = 0; i < chip_sdma_engines(dd); i++) {
write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
/* SEND_DMA_STATUS read-only */
write_kctxt_csr(dd, i, SEND_DMA_BASE_ADDR, 0);
@@ -13968,7 +13927,7 @@ static void reset_rxe_csrs(struct hfi1_devdata *dd)
/*
* RXE Kernel and User Per-Context CSRs
*/
- for (i = 0; i < dd->chip_rcv_contexts; i++) {
+ for (i = 0; i < chip_rcv_contexts(dd); i++) {
/* kernel */
write_kctxt_csr(dd, i, RCV_CTXT_CTRL, 0);
/* RCV_CTXT_STATUS read-only */
@@ -14084,13 +14043,13 @@ static int init_chip(struct hfi1_devdata *dd)
/* disable send contexts and SDMA engines */
write_csr(dd, SEND_CTRL, 0);
- for (i = 0; i < dd->chip_send_contexts; i++)
+ for (i = 0; i < chip_send_contexts(dd); i++)
write_kctxt_csr(dd, i, SEND_CTXT_CTRL, 0);
- for (i = 0; i < dd->chip_sdma_engines; i++)
+ for (i = 0; i < chip_sdma_engines(dd); i++)
write_kctxt_csr(dd, i, SEND_DMA_CTRL, 0);
/* disable port (turn off RXE inbound traffic) and contexts */
write_csr(dd, RCV_CTRL, 0);
- for (i = 0; i < dd->chip_rcv_contexts; i++)
+ for (i = 0; i < chip_rcv_contexts(dd); i++)
write_csr(dd, RCV_CTXT_CTRL, 0);
/* mask all interrupt sources */
for (i = 0; i < CCE_NUM_INT_CSRS; i++)
@@ -14709,9 +14668,9 @@ static void init_txe(struct hfi1_devdata *dd)
write_csr(dd, SEND_EGRESS_ERR_MASK, ~0ull);
/* enable all per-context and per-SDMA engine errors */
- for (i = 0; i < dd->chip_send_contexts; i++)
+ for (i = 0; i < chip_send_contexts(dd); i++)
write_kctxt_csr(dd, i, SEND_CTXT_ERR_MASK, ~0ull);
- for (i = 0; i < dd->chip_sdma_engines; i++)
+ for (i = 0; i < chip_sdma_engines(dd); i++)
write_kctxt_csr(dd, i, SEND_DMA_ENG_ERR_MASK, ~0ull);
/* set the local CU to AU mapping */
@@ -14979,11 +14938,13 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
"Functional simulator"
};
struct pci_dev *parent = pdev->bus->self;
+ u32 sdma_engines;
dd = hfi1_alloc_devdata(pdev, NUM_IB_PORTS *
sizeof(struct hfi1_pportdata));
if (IS_ERR(dd))
goto bail;
+ sdma_engines = chip_sdma_engines(dd);
ppd = dd->pport;
for (i = 0; i < dd->num_pports; i++, ppd++) {
int vl;
@@ -15081,11 +15042,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
/* give a reasonable active value, will be set on link up */
dd->pport->link_speed_active = OPA_LINK_SPEED_25G;
- dd->chip_rcv_contexts = read_csr(dd, RCV_CONTEXTS);
- dd->chip_send_contexts = read_csr(dd, SEND_CONTEXTS);
- dd->chip_sdma_engines = read_csr(dd, SEND_DMA_ENGINES);
- dd->chip_pio_mem_size = read_csr(dd, SEND_PIO_MEM_SIZE);
- dd->chip_sdma_mem_size = read_csr(dd, SEND_DMA_MEM_SIZE);
/* fix up link widths for emulation _p */
ppd = dd->pport;
if (dd->icode == ICODE_FPGA_EMULATION && is_emulator_p(dd)) {
@@ -15096,11 +15052,11 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
OPA_LINK_WIDTH_1X;
}
/* insure num_vls isn't larger than number of sdma engines */
- if (HFI1_CAP_IS_KSET(SDMA) && num_vls > dd->chip_sdma_engines) {
+ if (HFI1_CAP_IS_KSET(SDMA) && num_vls > sdma_engines) {
dd_dev_err(dd, "num_vls %u too large, using %u VLs\n",
- num_vls, dd->chip_sdma_engines);
- num_vls = dd->chip_sdma_engines;
- ppd->vls_supported = dd->chip_sdma_engines;
+ num_vls, sdma_engines);
+ num_vls = sdma_engines;
+ ppd->vls_supported = sdma_engines;
ppd->vls_operational = ppd->vls_supported;
}
@@ -15216,13 +15172,6 @@ struct hfi1_devdata *hfi1_init_dd(struct pci_dev *pdev,
*/
aspm_init(dd);
- dd->rcvhdrsize = DEFAULT_RCVHDRSIZE;
- /*
- * rcd[0] is guaranteed to be valid by this point. Also, all
- * context are using the same value, as per the module parameter.
- */
- dd->rhf_offset = dd->rcd[0]->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
-
ret = init_pervl_scs(dd);
if (ret)
goto bail_cleanup;
diff --git a/drivers/infiniband/hw/hfi1/chip.h b/drivers/infiniband/hw/hfi1/chip.h
index fdf389e46e19..36b04d6300e5 100644
--- a/drivers/infiniband/hw/hfi1/chip.h
+++ b/drivers/infiniband/hw/hfi1/chip.h
@@ -656,6 +656,36 @@ static inline void write_uctxt_csr(struct hfi1_devdata *dd, int ctxt,
write_csr(dd, offset0 + (0x1000 * ctxt), value);
}
+static inline u32 chip_rcv_contexts(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, RCV_CONTEXTS);
+}
+
+static inline u32 chip_send_contexts(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, SEND_CONTEXTS);
+}
+
+static inline u32 chip_sdma_engines(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, SEND_DMA_ENGINES);
+}
+
+static inline u32 chip_pio_mem_size(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, SEND_PIO_MEM_SIZE);
+}
+
+static inline u32 chip_sdma_mem_size(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, SEND_DMA_MEM_SIZE);
+}
+
+static inline u32 chip_rcv_array_count(struct hfi1_devdata *dd)
+{
+ return read_csr(dd, RCV_ARRAY_CNT);
+}
+
u64 create_pbc(struct hfi1_pportdata *ppd, u64 flags, int srate_mbs, u32 vl,
u32 dw_len);
diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 94dca95db04f..a41f85558312 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -208,25 +208,25 @@ static inline void *get_egrbuf(const struct hfi1_ctxtdata *rcd, u64 rhf,
(offset * RCV_BUF_BLOCK_SIZE));
}
-static inline void *hfi1_get_header(struct hfi1_devdata *dd,
+static inline void *hfi1_get_header(struct hfi1_ctxtdata *rcd,
__le32 *rhf_addr)
{
u32 offset = rhf_hdrq_offset(rhf_to_cpu(rhf_addr));
- return (void *)(rhf_addr - dd->rhf_offset + offset);
+ return (void *)(rhf_addr - rcd->rhf_offset + offset);
}
-static inline struct ib_header *hfi1_get_msgheader(struct hfi1_devdata *dd,
+static inline struct ib_header *hfi1_get_msgheader(struct hfi1_ctxtdata *rcd,
__le32 *rhf_addr)
{
- return (struct ib_header *)hfi1_get_header(dd, rhf_addr);
+ return (struct ib_header *)hfi1_get_header(rcd, rhf_addr);
}
static inline struct hfi1_16b_header
- *hfi1_get_16B_header(struct hfi1_devdata *dd,
+ *hfi1_get_16B_header(struct hfi1_ctxtdata *rcd,
__le32 *rhf_addr)
{
- return (struct hfi1_16b_header *)hfi1_get_header(dd, rhf_addr);
+ return (struct hfi1_16b_header *)hfi1_get_header(rcd, rhf_addr);
}
/*
@@ -591,13 +591,12 @@ static void __prescan_rxq(struct hfi1_packet *packet)
init_ps_mdata(&mdata, packet);
while (1) {
- struct hfi1_devdata *dd = rcd->dd;
struct hfi1_ibport *ibp = rcd_to_iport(rcd);
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
- dd->rhf_offset;
+ packet->rcd->rhf_offset;
struct rvt_qp *qp;
struct ib_header *hdr;
- struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+ struct rvt_dev_info *rdi = &rcd->dd->verbs_dev.rdi;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn, bth1;
int is_ecn = 0;
@@ -612,7 +611,7 @@ static void __prescan_rxq(struct hfi1_packet *packet)
if (etype != RHF_RCV_TYPE_IB)
goto next;
- packet->hdr = hfi1_get_msgheader(dd, rhf_addr);
+ packet->hdr = hfi1_get_msgheader(packet->rcd, rhf_addr);
hdr = packet->hdr;
lnh = ib_get_lnh(hdr);
@@ -718,7 +717,7 @@ static noinline int skip_rcv_packet(struct hfi1_packet *packet, int thread)
ret = check_max_packet(packet, thread);
packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
- packet->rcd->dd->rhf_offset;
+ packet->rcd->rhf_offset;
packet->rhf = rhf_to_cpu(packet->rhf_addr);
return ret;
@@ -757,7 +756,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
* crashing down. There is no need to eat another
* comparison in this performance critical code.
*/
- packet->rcd->dd->rhf_rcv_function_map[packet->etype](packet);
+ packet->rcd->rhf_rcv_function_map[packet->etype](packet);
packet->numpkt++;
/* Set up for the next packet */
@@ -768,7 +767,7 @@ static inline int process_rcv_packet(struct hfi1_packet *packet, int thread)
ret = check_max_packet(packet, thread);
packet->rhf_addr = (__le32 *)packet->rcd->rcvhdrq + packet->rhqoff +
- packet->rcd->dd->rhf_offset;
+ packet->rcd->rhf_offset;
packet->rhf = rhf_to_cpu(packet->rhf_addr);
return ret;
@@ -949,12 +948,12 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd,
u8 sc = SC15_PACKET;
if (etype == RHF_RCV_TYPE_IB) {
- struct ib_header *hdr = hfi1_get_msgheader(packet->rcd->dd,
+ struct ib_header *hdr = hfi1_get_msgheader(packet->rcd,
packet->rhf_addr);
sc = hfi1_9B_get_sc5(hdr, packet->rhf);
} else if (etype == RHF_RCV_TYPE_BYPASS) {
struct hfi1_16b_header *hdr = hfi1_get_16B_header(
- packet->rcd->dd,
+ packet->rcd,
packet->rhf_addr);
sc = hfi1_16B_get_sc(hdr);
}
@@ -1034,7 +1033,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
packet.rhqoff += packet.rsize;
packet.rhf_addr = (__le32 *)rcd->rcvhdrq +
packet.rhqoff +
- dd->rhf_offset;
+ rcd->rhf_offset;
packet.rhf = rhf_to_cpu(packet.rhf_addr);
} else if (skip_pkt) {
@@ -1384,7 +1383,7 @@ bail:
static inline void hfi1_setup_ib_header(struct hfi1_packet *packet)
{
packet->hdr = (struct hfi1_ib_message_header *)
- hfi1_get_msgheader(packet->rcd->dd,
+ hfi1_get_msgheader(packet->rcd,
packet->rhf_addr);
packet->hlen = (u8 *)packet->rhf_addr - (u8 *)packet->hdr;
}
@@ -1485,7 +1484,7 @@ static int hfi1_setup_bypass_packet(struct hfi1_packet *packet)
u8 l4;
packet->hdr = (struct hfi1_16b_header *)
- hfi1_get_16B_header(packet->rcd->dd,
+ hfi1_get_16B_header(packet->rcd,
packet->rhf_addr);
l4 = hfi1_16B_get_l4(packet->hdr);
if (l4 == OPA_16B_L4_IB_LOCAL) {
@@ -1575,7 +1574,7 @@ void handle_eflags(struct hfi1_packet *packet)
* The following functions are called by the interrupt handler. They are type
* specific handlers for each packet type.
*/
-int process_receive_ib(struct hfi1_packet *packet)
+static int process_receive_ib(struct hfi1_packet *packet)
{
if (hfi1_setup_9B_packet(packet))
return RHF_RCV_CONTINUE;
@@ -1607,7 +1606,7 @@ static inline bool hfi1_is_vnic_packet(struct hfi1_packet *packet)
return false;
}
-int process_receive_bypass(struct hfi1_packet *packet)
+static int process_receive_bypass(struct hfi1_packet *packet)
{
struct hfi1_devdata *dd = packet->rcd->dd;
@@ -1649,7 +1648,7 @@ int process_receive_bypass(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
-int process_receive_error(struct hfi1_packet *packet)
+static int process_receive_error(struct hfi1_packet *packet)
{
/* KHdrHCRCErr -- KDETH packet with a bad HCRC */
if (unlikely(
@@ -1668,7 +1667,7 @@ int process_receive_error(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
-int kdeth_process_expected(struct hfi1_packet *packet)
+static int kdeth_process_expected(struct hfi1_packet *packet)
{
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
@@ -1682,7 +1681,7 @@ int kdeth_process_expected(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
-int kdeth_process_eager(struct hfi1_packet *packet)
+static int kdeth_process_eager(struct hfi1_packet *packet)
{
hfi1_setup_9B_packet(packet);
if (unlikely(hfi1_dbg_should_fault_rx(packet)))
@@ -1695,7 +1694,7 @@ int kdeth_process_eager(struct hfi1_packet *packet)
return RHF_RCV_CONTINUE;
}
-int process_receive_invalid(struct hfi1_packet *packet)
+static int process_receive_invalid(struct hfi1_packet *packet)
{
dd_dev_err(packet->rcd->dd, "Invalid packet type %d. Dropping\n",
rhf_rcv_type(packet->rhf));
@@ -1719,9 +1718,8 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
init_ps_mdata(&mdata, &packet);
while (1) {
- struct hfi1_devdata *dd = rcd->dd;
__le32 *rhf_addr = (__le32 *)rcd->rcvhdrq + mdata.ps_head +
- dd->rhf_offset;
+ rcd->rhf_offset;
struct ib_header *hdr;
u64 rhf = rhf_to_cpu(rhf_addr);
u32 etype = rhf_rcv_type(rhf), qpn;
@@ -1738,7 +1736,7 @@ void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd)
if (etype > RHF_RCV_TYPE_IB)
goto next;
- packet.hdr = hfi1_get_msgheader(dd, rhf_addr);
+ packet.hdr = hfi1_get_msgheader(rcd, rhf_addr);
hdr = packet.hdr;
lnh = be16_to_cpu(hdr->lrh[0]) & 3;
@@ -1760,3 +1758,14 @@ next:
update_ps_mdata(&mdata, rcd);
}
}
+
+const rhf_rcv_function_ptr normal_rhf_rcv_functions[] = {
+ [RHF_RCV_TYPE_EXPECTED] = kdeth_process_expected,
+ [RHF_RCV_TYPE_EAGER] = kdeth_process_eager,
+ [RHF_RCV_TYPE_IB] = process_receive_ib,
+ [RHF_RCV_TYPE_ERROR] = process_receive_error,
+ [RHF_RCV_TYPE_BYPASS] = process_receive_bypass,
+ [RHF_RCV_TYPE_INVALID5] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID6] = process_receive_invalid,
+ [RHF_RCV_TYPE_INVALID7] = process_receive_invalid,
+};
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 0fc4aa9455c3..1fc75647e47b 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -411,7 +411,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
mapio = 1;
break;
case RCV_HDRQ:
- memlen = uctxt->rcvhdrq_size;
+ memlen = rcvhdrq_size(uctxt);
memvirt = uctxt->rcvhdrq;
break;
case RCV_EGRBUF: {
@@ -521,7 +521,7 @@ static int hfi1_file_mmap(struct file *fp, struct vm_area_struct *vma)
break;
case SUBCTXT_RCV_HDRQ:
memaddr = (u64)uctxt->subctxt_rcvhdr_base;
- memlen = uctxt->rcvhdrq_size * uctxt->subctxt_cnt;
+ memlen = rcvhdrq_size(uctxt) * uctxt->subctxt_cnt;
flags |= VM_IO | VM_DONTEXPAND;
vmf = 1;
break;
@@ -985,7 +985,11 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd,
* sub contexts.
* This has to be done here so the rest of the sub-contexts find the
* proper base context.
+ * NOTE: _set_bit() can be used here because the context creation is
+ * protected by the mutex (rather than the spin_lock), and will be the
+ * very first instance of this context.
*/
+ __set_bit(0, uctxt->in_use_ctxts);
if (uinfo->subctxt_cnt)
init_subctxts(uctxt, uinfo);
uctxt->userversion = uinfo->userversion;
@@ -1040,7 +1044,7 @@ static int setup_subctxt(struct hfi1_ctxtdata *uctxt)
return -ENOMEM;
/* We can take the size of the RcvHdr Queue from the master */
- uctxt->subctxt_rcvhdr_base = vmalloc_user(uctxt->rcvhdrq_size *
+ uctxt->subctxt_rcvhdr_base = vmalloc_user(rcvhdrq_size(uctxt) *
num_subctxts);
if (!uctxt->subctxt_rcvhdr_base) {
ret = -ENOMEM;
diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h
index 4ab8b5bfbed1..d9470317983f 100644
--- a/drivers/infiniband/hw/hfi1/hfi.h
+++ b/drivers/infiniband/hw/hfi1/hfi.h
@@ -169,12 +169,6 @@ extern const struct pci_error_handlers hfi1_pci_err_handler;
struct hfi1_opcode_stats_perctx;
struct ctxt_eager_bufs {
- ssize_t size; /* total size of eager buffers */
- u32 count; /* size of buffers array */
- u32 numbufs; /* number of buffers allocated */
- u32 alloced; /* number of rcvarray entries used */
- u32 rcvtid_size; /* size of each eager rcv tid */
- u32 threshold; /* head update threshold */
struct eager_buffer {
void *addr;
dma_addr_t dma;
@@ -184,6 +178,12 @@ struct ctxt_eager_bufs {
void *addr;
dma_addr_t dma;
} *rcvtids;
+ u32 size; /* total size of eager buffers */
+ u32 rcvtid_size; /* size of each eager rcv tid */
+ u16 count; /* size of buffers array */
+ u16 numbufs; /* number of buffers allocated */
+ u16 alloced; /* number of rcvarray entries used */
+ u16 threshold; /* head update threshold */
};
struct exp_tid_set {
@@ -191,43 +191,84 @@ struct exp_tid_set {
u32 count;
};
+typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
struct hfi1_ctxtdata {
- /* shadow the ctxt's RcvCtrl register */
- u64 rcvctrl;
/* rcvhdrq base, needs mmap before useful */
void *rcvhdrq;
/* kernel virtual address where hdrqtail is updated */
volatile __le64 *rcvhdrtail_kvaddr;
- /* when waiting for rcv or pioavail */
- wait_queue_head_t wait;
- /* rcvhdrq size (for freeing) */
- size_t rcvhdrq_size;
+ /* so functions that need physical port can get it easily */
+ struct hfi1_pportdata *ppd;
+ /* so file ops can get at unit */
+ struct hfi1_devdata *dd;
+ /* this receive context's assigned PIO ACK send context */
+ struct send_context *sc;
+ /* per context recv functions */
+ const rhf_rcv_function_ptr *rhf_rcv_function_map;
+ /*
+ * The interrupt handler for a particular receive context can vary
+ * throughout it's lifetime. This is not a lock protected data member so
+ * it must be updated atomically and the prev and new value must always
+ * be valid. Worst case is we process an extra interrupt and up to 64
+ * packets with the wrong interrupt handler.
+ */
+ int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
+ /* verbs rx_stats per rcd */
+ struct hfi1_opcode_stats_perctx *opstats;
+ /* clear interrupt mask */
+ u64 imask;
+ /* ctxt rcvhdrq head offset */
+ u32 head;
/* number of rcvhdrq entries */
u16 rcvhdrq_cnt;
+ u8 ireg; /* clear interrupt register */
+ /* receive packet sequence counter */
+ u8 seq_cnt;
/* size of each of the rcvhdrq entries */
- u16 rcvhdrqentsize;
+ u8 rcvhdrqentsize;
+ /* offset of RHF within receive header entry */
+ u8 rhf_offset;
+ /* dynamic receive available interrupt timeout */
+ u8 rcvavail_timeout;
+ /* Indicates that this is vnic context */
+ bool is_vnic;
+ /* vnic queue index this context is mapped to */
+ u8 vnic_q_idx;
+ /* Is ASPM interrupt supported for this context */
+ bool aspm_intr_supported;
+ /* ASPM state (enabled/disabled) for this context */
+ bool aspm_enabled;
+ /* Is ASPM processing enabled for this context (in intr context) */
+ bool aspm_intr_enable;
+ struct ctxt_eager_bufs egrbufs;
+ /* QPs waiting for context processing */
+ struct list_head qp_wait_list;
+ /* tid allocation lists */
+ struct exp_tid_set tid_group_list;
+ struct exp_tid_set tid_used_list;
+ struct exp_tid_set tid_full_list;
+
+ /* Timer for re-enabling ASPM if interrupt activity quiets down */
+ struct timer_list aspm_timer;
+ /* per-context configuration flags */
+ unsigned long flags;
+ /* array of tid_groups */
+ struct tid_group *groups;
/* mmap of hdrq, must fit in 44 bits */
dma_addr_t rcvhdrq_dma;
dma_addr_t rcvhdrqtailaddr_dma;
- struct ctxt_eager_bufs egrbufs;
- /* this receive context's assigned PIO ACK send context */
- struct send_context *sc;
-
- /* dynamic receive available interrupt timeout */
- u32 rcvavail_timeout;
+ /* Last interrupt timestamp */
+ ktime_t aspm_ts_last_intr;
+ /* Last timestamp at which we scheduled a timer for this context */
+ ktime_t aspm_ts_timer_sched;
+ /* Lock to serialize between intr, timer intr and user threads */
+ spinlock_t aspm_lock;
/* Reference count the base context usage */
struct kref kref;
-
- /* Device context index */
- u16 ctxt;
- /*
- * non-zero if ctxt can be shared, and defines the maximum number of
- * sub-contexts for this device context.
- */
- u16 subctxt_cnt;
- /* non-zero if ctxt is being shared. */
- u16 subctxt_id;
- u8 uuid[16];
+ /* numa node of this context */
+ int numa_id;
+ /* associated msix interrupt. */
+ s16 msix_intr;
/* job key */
u16 jkey;
/* number of RcvArray groups for this context. */
@@ -238,87 +279,59 @@ struct hfi1_ctxtdata {
u16 expected_count;
/* index of first expected TID entry. */
u16 expected_base;
- /* array of tid_groups */
- struct tid_group *groups;
-
- struct exp_tid_set tid_group_list;
- struct exp_tid_set tid_used_list;
- struct exp_tid_set tid_full_list;
+ /* Device context index */
+ u8 ctxt;
- /* lock protecting all Expected TID data of user contexts */
+ /* PSM Specific fields */
+ /* lock protecting all Expected TID data */
struct mutex exp_mutex;
- /* per-context configuration flags */
- unsigned long flags;
- /* per-context event flags for fileops/intr communication */
- unsigned long event_flags;
- /* total number of polled urgent packets */
- u32 urgent;
- /* saved total number of polled urgent packets for poll edge trigger */
- u32 urgent_poll;
+ /* when waiting for rcv or pioavail */
+ wait_queue_head_t wait;
+ /* uuid from PSM */
+ u8 uuid[16];
/* same size as task_struct .comm[], command that opened context */
char comm[TASK_COMM_LEN];
- /* so file ops can get at unit */
- struct hfi1_devdata *dd;
- /* so functions that need physical port can get it easily */
- struct hfi1_pportdata *ppd;
- /* associated msix interrupt */
- u32 msix_intr;
+ /* Bitmask of in use context(s) */
+ DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
+ /* per-context event flags for fileops/intr communication */
+ unsigned long event_flags;
/* A page of memory for rcvhdrhead, rcvegrhead, rcvegrtail * N */
void *subctxt_uregbase;
/* An array of pages for the eager receive buffers * N */
void *subctxt_rcvegrbuf;
/* An array of pages for the eager header queue entries * N */
void *subctxt_rcvhdr_base;
- /* Bitmask of in use context(s) */
- DECLARE_BITMAP(in_use_ctxts, HFI1_MAX_SHARED_CTXTS);
- /* The version of the library which opened this ctxt */
- u32 userversion;
+ /* total number of polled urgent packets */
+ u32 urgent;
+ /* saved total number of polled urgent packets for poll edge trigger */
+ u32 urgent_poll;
/* Type of packets or conditions we want to poll for */
u16 poll_type;
- /* receive packet sequence counter */
- u8 seq_cnt;
- /* ctxt rcvhdrq head offset */
- u32 head;
- /* QPs waiting for context processing */
- struct list_head qp_wait_list;
- /* interrupt handling */
- u64 imask; /* clear interrupt mask */
- int ireg; /* clear interrupt register */
- int numa_id; /* numa node of this context */
- /* verbs rx_stats per rcd */
- struct hfi1_opcode_stats_perctx *opstats;
-
- /* Is ASPM interrupt supported for this context */
- bool aspm_intr_supported;
- /* ASPM state (enabled/disabled) for this context */
- bool aspm_enabled;
- /* Timer for re-enabling ASPM if interrupt activity quietens down */
- struct timer_list aspm_timer;
- /* Lock to serialize between intr, timer intr and user threads */
- spinlock_t aspm_lock;
- /* Is ASPM processing enabled for this context (in intr context) */
- bool aspm_intr_enable;
- /* Last interrupt timestamp */
- ktime_t aspm_ts_last_intr;
- /* Last timestamp at which we scheduled a timer for this context */
- ktime_t aspm_ts_timer_sched;
-
+ /* non-zero if ctxt is being shared. */
+ u16 subctxt_id;
+ /* The version of the library which opened this ctxt */
+ u32 userversion;
/*
- * The interrupt handler for a particular receive context can vary
- * throughout it's lifetime. This is not a lock protected data member so
- * it must be updated atomically and the prev and new value must always
- * be valid. Worst case is we process an extra interrupt and up to 64
- * packets with the wrong interrupt handler.
+ * non-zero if ctxt can be shared, and defines the maximum number of
+ * sub-contexts for this device context.
*/
- int (*do_interrupt)(struct hfi1_ctxtdata *rcd, int threaded);
-
- /* Indicates that this is vnic context */
- bool is_vnic;
+ u8 subctxt_cnt;
- /* vnic queue index this context is mapped to */
- u8 vnic_q_idx;
};
+/**
+ * rcvhdrq_size - return total size in bytes for header queue
+ * @rcd: the receive context
+ *
+ * rcvhdrqentsize is in DWs, so we have to convert to bytes
+ *
+ */
+static inline u32 rcvhdrq_size(struct hfi1_ctxtdata *rcd)
+{
+ return PAGE_ALIGN(rcd->rcvhdrq_cnt *
+ rcd->rcvhdrqentsize * sizeof(u32));
+}
+
/*
* Represents a single packet at a high level. Put commonly computed things in
* here so we do not have to keep doing them over and over. The rule of thumb is
@@ -897,12 +910,11 @@ struct hfi1_pportdata {
u64 vl_xmit_flit_cnt[C_VL_COUNT + 1];
};
-typedef int (*rhf_rcv_function_ptr)(struct hfi1_packet *packet);
-
typedef void (*opcode_handler)(struct hfi1_packet *packet);
typedef void (*hfi1_make_req)(struct rvt_qp *qp,
struct hfi1_pkt_state *ps,
struct rvt_swqe *wqe);
+extern const rhf_rcv_function_ptr normal_rhf_rcv_functions[];
/* return values for the RHF receive functions */
@@ -1046,8 +1058,6 @@ struct hfi1_devdata {
dma_addr_t sdma_pad_phys;
/* for deallocation */
size_t sdma_heads_size;
- /* number from the chip */
- u32 chip_sdma_engines;
/* num used */
u32 num_sdma;
/* array of engines sized by num_sdma */
@@ -1102,8 +1112,6 @@ struct hfi1_devdata {
/* base receive interrupt timeout, in CSR units */
u32 rcv_intr_timeout_csr;
- u32 freezelen; /* max length of freezemsg */
- u64 __iomem *egrtidbase;
spinlock_t sendctrl_lock; /* protect changes to SendCtrl */
spinlock_t rcvctrl_lock; /* protect changes to RcvCtrl */
spinlock_t uctxt_lock; /* protect rcd changes */
@@ -1130,25 +1138,6 @@ struct hfi1_devdata {
/* Base GUID for device (network order) */
u64 base_guid;
- /* these are the "32 bit" regs */
-
- /* value we put in kr_rcvhdrsize */
- u32 rcvhdrsize;
- /* number of receive contexts the chip supports */
- u32 chip_rcv_contexts;
- /* number of receive array entries */
- u32 chip_rcv_array_count;
- /* number of PIO send contexts the chip supports */
- u32 chip_send_contexts;
- /* number of bytes in the PIO memory buffer */
- u32 chip_pio_mem_size;
- /* number of bytes in the SDMA memory buffer */
- u32 chip_sdma_mem_size;
-
- /* size of each rcvegrbuffer */
- u32 rcvegrbufsize;
- /* log2 of above */
- u16 rcvegrbufsize_shift;
/* both sides of the PCIe link are gen3 capable */
u8 link_gen3_capable;
u8 dc_shutdown;
@@ -1221,9 +1210,6 @@ struct hfi1_devdata {
u32 num_msix_entries;
u32 first_dyn_msix_idx;
- /* INTx information */
- u32 requested_intx_irq; /* did we request one? */
-
/* general interrupt: mask of handled interrupts */
u64 gi_mask[CCE_NUM_INT_CSRS];
@@ -1289,8 +1275,6 @@ struct hfi1_devdata {
u64 sw_cce_err_status_aggregate;
/* Software counter that aggregates all bypass packet rcv errors */
u64 sw_rcv_bypass_packet_errors;
- /* receive interrupt function */
- rhf_rcv_function_ptr normal_rhf_rcv_functions[8];
/* Save the enabled LCB error bits */
u64 lcb_err_en;
@@ -1329,10 +1313,7 @@ struct hfi1_devdata {
/* seqlock for sc2vl */
seqlock_t sc2vl_lock ____cacheline_aligned_in_smp;
u64 sc2vl[4];
- /* receive interrupt functions */
- rhf_rcv_function_ptr *rhf_rcv_function_map;
u64 __percpu *rcv_limit;
- u16 rhf_offset; /* offset of RHF within receive header entry */
/* adding a new field here would make it part of this cacheline */
/* OUI comes from the HW. Used everywhere as 3 separate bytes. */
@@ -1471,7 +1452,7 @@ void hfi1_make_ud_req_16B(struct rvt_qp *qp,
/* calculate the current RHF address */
static inline __le32 *get_rhf_addr(struct hfi1_ctxtdata *rcd)
{
- return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->dd->rhf_offset;
+ return (__le32 *)rcd->rcvhdrq + rcd->head + rcd->rhf_offset;
}
int hfi1_reset_device(int);
@@ -2021,12 +2002,6 @@ static inline void flush_wc(void)
}
void handle_eflags(struct hfi1_packet *packet);
-int process_receive_ib(struct hfi1_packet *packet);
-int process_receive_bypass(struct hfi1_packet *packet);
-int process_receive_error(struct hfi1_packet *packet);
-int kdeth_process_expected(struct hfi1_packet *packet);
-int kdeth_process_eager(struct hfi1_packet *packet);
-int process_receive_invalid(struct hfi1_packet *packet);
void seqfile_dump_rcd(struct seq_file *s, struct hfi1_ctxtdata *rcd);
/* global module parameter variables */
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index f110842b91f5..758d273c32cf 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -364,9 +364,9 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
hfi1_exp_tid_group_init(rcd);
rcd->ppd = ppd;
rcd->dd = dd;
- __set_bit(0, rcd->in_use_ctxts);
rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
+ rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
mutex_init(&rcd->exp_mutex);
@@ -404,6 +404,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->rcvhdrq_cnt = rcvhdrcnt;
rcd->rcvhdrqentsize = hfi1_hdrq_entsize;
+ rcd->rhf_offset =
+ rcd->rcvhdrqentsize - sizeof(u64) / sizeof(u32);
/*
* Simple Eager buffer allocation: we have already pre-allocated
* the number of RcvArray entry groups. Each ctxtdata structure
@@ -853,24 +855,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
struct hfi1_ctxtdata *rcd;
struct hfi1_pportdata *ppd;
- /* Set up recv low level handlers */
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EXPECTED] =
- kdeth_process_expected;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_EAGER] =
- kdeth_process_eager;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_IB] = process_receive_ib;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_ERROR] =
- process_receive_error;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_BYPASS] =
- process_receive_bypass;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID5] =
- process_receive_invalid;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID6] =
- process_receive_invalid;
- dd->normal_rhf_rcv_functions[RHF_RCV_TYPE_INVALID7] =
- process_receive_invalid;
- dd->rhf_rcv_function_map = dd->normal_rhf_rcv_functions;
-
/* Set up send low level handlers */
dd->process_pio_send = hfi1_verbs_send_pio;
dd->process_dma_send = hfi1_verbs_send_dma;
@@ -936,7 +920,7 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
}
/* Allocate enough memory for user event notification. */
- len = PAGE_ALIGN(dd->chip_rcv_contexts * HFI1_MAX_SHARED_CTXTS *
+ len = PAGE_ALIGN(chip_rcv_contexts(dd) * HFI1_MAX_SHARED_CTXTS *
sizeof(*dd->events));
dd->events = vmalloc_user(len);
if (!dd->events)
@@ -948,9 +932,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
dd->status = vmalloc_user(PAGE_SIZE);
if (!dd->status)
dd_dev_err(dd, "Failed to allocate dev status page\n");
- else
- dd->freezelen = PAGE_SIZE - (sizeof(*dd->status) -
- sizeof(dd->status->freezemsg));
for (pidx = 0; pidx < dd->num_pports; ++pidx) {
ppd = dd->pport + pidx;
if (dd->status)
@@ -1144,7 +1125,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
return;
if (rcd->rcvhdrq) {
- dma_free_coherent(&dd->pcidev->dev, rcd->rcvhdrq_size,
+ dma_free_coherent(&dd->pcidev->dev, rcvhdrq_size(rcd),
rcd->rcvhdrq, rcd->rcvhdrq_dma);
rcd->rcvhdrq = NULL;
if (rcd->rcvhdrtail_kvaddr) {
@@ -1855,12 +1836,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (!rcd->rcvhdrq) {
gfp_t gfp_flags;
- /*
- * rcvhdrqentsize is in DWs, so we have to convert to bytes
- * (* sizeof(u32)).
- */
- amt = PAGE_ALIGN(rcd->rcvhdrq_cnt * rcd->rcvhdrqentsize *
- sizeof(u32));
+ amt = rcvhdrq_size(rcd);
if (rcd->ctxt < dd->first_dyn_alloc_ctxt || rcd->is_vnic)
gfp_flags = GFP_KERNEL;
@@ -1885,8 +1861,6 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
if (!rcd->rcvhdrtail_kvaddr)
goto bail_free;
}
-
- rcd->rcvhdrq_size = amt;
}
/*
* These values are per-context:
@@ -1902,7 +1876,7 @@ int hfi1_create_rcvhdrq(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
& RCV_HDR_ENT_SIZE_ENT_SIZE_MASK)
<< RCV_HDR_ENT_SIZE_ENT_SIZE_SHIFT;
write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_ENT_SIZE, reg);
- reg = (dd->rcvhdrsize & RCV_HDR_SIZE_HDR_SIZE_MASK)
+ reg = ((u64)DEFAULT_RCVHDRSIZE & RCV_HDR_SIZE_HDR_SIZE_MASK)
<< RCV_HDR_SIZE_HDR_SIZE_SHIFT;
write_kctxt_csr(dd, rcd->ctxt, RCV_HDR_SIZE, reg);
@@ -1938,9 +1912,9 @@ bail:
int hfi1_setup_eagerbufs(struct hfi1_ctxtdata *rcd)
{
struct hfi1_devdata *dd = rcd->dd;
- u32 max_entries, egrtop, alloced_bytes = 0, idx = 0;
+ u32 max_entries, egrtop, alloced_bytes = 0;
gfp_t gfp_flags;
- u16 order;
+ u16 order, idx = 0;
int ret = 0;
u16 round_mtu = roundup_pow_of_two(hfi1_max_mtu);
diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c
index 4d4371bf2c7c..de3ee606034c 100644
--- a/drivers/infiniband/hw/hfi1/pcie.c
+++ b/drivers/infiniband/hw/hfi1/pcie.c
@@ -157,6 +157,7 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
unsigned long len;
resource_size_t addr;
int ret = 0;
+ u32 rcv_array_count;
addr = pci_resource_start(pdev, 0);
len = pci_resource_len(pdev, 0);
@@ -186,9 +187,9 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
goto nomem;
}
- dd->chip_rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
- dd_dev_info(dd, "RcvArray count: %u\n", dd->chip_rcv_array_count);
- dd->base2_start = RCV_ARRAY + dd->chip_rcv_array_count * 8;
+ rcv_array_count = readq(dd->kregbase1 + RCV_ARRAY_CNT);
+ dd_dev_info(dd, "RcvArray count: %u\n", rcv_array_count);
+ dd->base2_start = RCV_ARRAY + rcv_array_count * 8;
dd->kregbase2 = ioremap_nocache(
addr + dd->base2_start,
@@ -214,13 +215,13 @@ int hfi1_pcie_ddinit(struct hfi1_devdata *dd, struct pci_dev *pdev)
* to write an entire cacheline worth of entries in one shot.
*/
dd->rcvarray_wc = ioremap_wc(addr + RCV_ARRAY,
- dd->chip_rcv_array_count * 8);
+ rcv_array_count * 8);
if (!dd->rcvarray_wc) {
dd_dev_err(dd, "WC mapping of receive array failed\n");
goto nomem;
}
dd_dev_info(dd, "WC RcvArray: %p for %x\n",
- dd->rcvarray_wc, dd->chip_rcv_array_count * 8);
+ dd->rcvarray_wc, rcv_array_count * 8);
dd->flags |= HFI1_PRESENT; /* chip.c CSR routines now work */
return 0;
@@ -346,15 +347,13 @@ int pcie_speeds(struct hfi1_devdata *dd)
/*
* Returns:
* - actual number of interrupts allocated or
- * - 0 if fell back to INTx.
* - error
*/
int request_msix(struct hfi1_devdata *dd, u32 msireq)
{
int nvec;
- nvec = pci_alloc_irq_vectors(dd->pcidev, 1, msireq,
- PCI_IRQ_MSIX | PCI_IRQ_LEGACY);
+ nvec = pci_alloc_irq_vectors(dd->pcidev, msireq, msireq, PCI_IRQ_MSIX);
if (nvec < 0) {
dd_dev_err(dd, "pci_alloc_irq_vectors() failed: %d\n", nvec);
return nvec;
@@ -362,10 +361,6 @@ int request_msix(struct hfi1_devdata *dd, u32 msireq)
tune_pcie_caps(dd);
- /* check for legacy IRQ */
- if (nvec == 1 && !dd->pcidev->msix_enabled)
- return 0;
-
return nvec;
}
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index 9cac15d10c4f..c2c1cba5b23b 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015-2017 Intel Corporation.
+ * Copyright(c) 2015-2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -226,7 +226,7 @@ static const char *sc_type_name(int index)
int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
{
struct mem_pool_info mem_pool_info[NUM_SC_POOLS] = { { 0 } };
- int total_blocks = (dd->chip_pio_mem_size / PIO_BLOCK_SIZE) - 1;
+ int total_blocks = (chip_pio_mem_size(dd) / PIO_BLOCK_SIZE) - 1;
int total_contexts = 0;
int fixed_blocks;
int pool_blocks;
@@ -343,8 +343,8 @@ int init_sc_pools_and_sizes(struct hfi1_devdata *dd)
sc_type_name(i), count);
return -EINVAL;
}
- if (total_contexts + count > dd->chip_send_contexts)
- count = dd->chip_send_contexts - total_contexts;
+ if (total_contexts + count > chip_send_contexts(dd))
+ count = chip_send_contexts(dd) - total_contexts;
total_contexts += count;
@@ -507,7 +507,7 @@ static int sc_hw_alloc(struct hfi1_devdata *dd, int type, u32 *sw_index,
if (sci->type == type && sci->allocated == 0) {
sci->allocated = 1;
/* use a 1:1 mapping, but make them non-equal */
- context = dd->chip_send_contexts - index - 1;
+ context = chip_send_contexts(dd) - index - 1;
dd->hw_to_sw[context] = index;
*sw_index = index;
*hw_context = context;
@@ -1618,11 +1618,11 @@ static void sc_piobufavail(struct send_context *sc)
/* Wake up the most starved one first */
if (n)
hfi1_qp_wakeup(qps[max_idx],
- RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+ RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
for (i = 0; i < n; i++)
if (i != max_idx)
hfi1_qp_wakeup(qps[i],
- RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN);
+ RVT_S_WAIT_PIO | HFI1_S_WAIT_PIO_DRAIN);
}
/* translate a send credit update to a bit code of reasons */
diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c
index 1697d96151bd..9b1e84a6b1cc 100644
--- a/drivers/infiniband/hw/hfi1/qp.c
+++ b/drivers/infiniband/hw/hfi1/qp.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -273,7 +273,7 @@ void hfi1_modify_qp(struct rvt_qp *qp, struct ib_qp_attr *attr,
if (attr_mask & IB_QP_PATH_MIG_STATE &&
attr->path_mig_state == IB_MIG_MIGRATED &&
qp->s_mig_state == IB_MIG_ARMED) {
- qp->s_flags |= RVT_S_AHG_CLEAR;
+ qp->s_flags |= HFI1_S_AHG_CLEAR;
priv->s_sc = ah_to_sc(ibqp->device, &qp->remote_ah_attr);
priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
priv->s_sendcontext = qp_to_send_context(qp, priv->s_sc);
@@ -717,7 +717,7 @@ void hfi1_migrate_qp(struct rvt_qp *qp)
qp->remote_ah_attr = qp->alt_ah_attr;
qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
qp->s_pkey_index = qp->s_alt_pkey_index;
- qp->s_flags |= RVT_S_AHG_CLEAR;
+ qp->s_flags |= HFI1_S_AHG_CLEAR;
priv->s_sc = ah_to_sc(qp->ibqp.device, &qp->remote_ah_attr);
priv->s_sde = qp_to_sdma_engine(qp, priv->s_sc);
qp_set_16b(qp);
diff --git a/drivers/infiniband/hw/hfi1/qp.h b/drivers/infiniband/hw/hfi1/qp.h
index b2d4cba8d15b..078cff7560b6 100644
--- a/drivers/infiniband/hw/hfi1/qp.h
+++ b/drivers/infiniband/hw/hfi1/qp.h
@@ -1,7 +1,7 @@
#ifndef _QP_H
#define _QP_H
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -70,6 +70,26 @@ static inline int hfi1_send_ok(struct rvt_qp *qp)
}
/*
+ * Driver specific s_flags starting at bit 31 down to HFI1_S_MIN_BIT_MASK
+ *
+ * HFI1_S_AHG_VALID - ahg header valid on chip
+ * HFI1_S_AHG_CLEAR - have send engine clear ahg state
+ * HFI1_S_WAIT_PIO_DRAIN - qp waiting for PIOs to drain
+ * HFI1_S_MIN_BIT_MASK - the lowest bit that can be used by hfi1
+ */
+#define HFI1_S_AHG_VALID 0x80000000
+#define HFI1_S_AHG_CLEAR 0x40000000
+#define HFI1_S_WAIT_PIO_DRAIN 0x20000000
+#define HFI1_S_MIN_BIT_MASK 0x01000000
+
+/*
+ * overload wait defines
+ */
+
+#define HFI1_S_ANY_WAIT_IO (RVT_S_ANY_WAIT_IO | HFI1_S_WAIT_PIO_DRAIN)
+#define HFI1_S_ANY_WAIT (HFI1_S_ANY_WAIT_IO | RVT_S_ANY_WAIT_SEND)
+
+/*
* free_ahg - clear ahg from QP
*/
static inline void clear_ahg(struct rvt_qp *qp)
@@ -77,7 +97,7 @@ static inline void clear_ahg(struct rvt_qp *qp)
struct hfi1_qp_priv *priv = qp->priv;
priv->s_ahg->ahgcount = 0;
- qp->s_flags &= ~(RVT_S_AHG_VALID | RVT_S_AHG_CLEAR);
+ qp->s_flags &= ~(HFI1_S_AHG_VALID | HFI1_S_AHG_CLEAR);
if (priv->s_sde && qp->s_ahgidx >= 0)
sdma_ahg_free(priv->s_sde, qp->s_ahgidx);
qp->s_ahgidx = -1;
diff --git a/drivers/infiniband/hw/hfi1/rc.c b/drivers/infiniband/hw/hfi1/rc.c
index 1a1a47ac53c6..1d31bd2fa91f 100644
--- a/drivers/infiniband/hw/hfi1/rc.c
+++ b/drivers/infiniband/hw/hfi1/rc.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015, 2016 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -241,7 +241,7 @@ bail:
smp_wmb();
qp->s_flags &= ~(RVT_S_RESP_PENDING
| RVT_S_ACK_PENDING
- | RVT_S_AHG_VALID);
+ | HFI1_S_AHG_VALID);
return 0;
}
@@ -1024,7 +1024,7 @@ done:
if ((cmp_psn(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
(cmp_psn(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
qp->s_flags |= RVT_S_WAIT_PSN;
- qp->s_flags &= ~RVT_S_AHG_VALID;
+ qp->s_flags &= ~HFI1_S_AHG_VALID;
}
/*
diff --git a/drivers/infiniband/hw/hfi1/ruc.c b/drivers/infiniband/hw/hfi1/ruc.c
index ef4c566e206f..5f56f3c1b4c4 100644
--- a/drivers/infiniband/hw/hfi1/ruc.c
+++ b/drivers/infiniband/hw/hfi1/ruc.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2015 - 2017 Intel Corporation.
+ * Copyright(c) 2015 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -194,7 +194,7 @@ static void ruc_loopback(struct rvt_qp *sqp)
spin_lock_irqsave(&sqp->s_lock, flags);
/* Return if we are already busy processing a work request. */
- if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+ if ((sqp->s_flags & (RVT_S_BUSY | HFI1_S_ANY_WAIT)) ||
!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
goto unlock;
@@ -533,9 +533,9 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
{
struct hfi1_qp_priv *priv = qp->priv;
- if (unlikely(qp->s_flags & RVT_S_AHG_CLEAR))
+ if (unlikely(qp->s_flags & HFI1_S_AHG_CLEAR))
clear_ahg(qp);
- if (!(qp->s_flags & RVT_S_AHG_VALID)) {
+ if (!(qp->s_flags & HFI1_S_AHG_VALID)) {
/* first middle that needs copy */
if (qp->s_ahgidx < 0)
qp->s_ahgidx = sdma_ahg_alloc(priv->s_sde);
@@ -544,7 +544,7 @@ static inline void build_ahg(struct rvt_qp *qp, u32 npsn)
priv->s_ahg->tx_flags |= SDMA_TXREQ_F_AHG_COPY;
/* save to protect a change in another thread */
priv->s_ahg->ahgidx = qp->s_ahgidx;
- qp->s_flags |= RVT_S_AHG_VALID;
+ qp->s_flags |= HFI1_S_AHG_VALID;
}
} else {
/* subsequent middle after valid */
@@ -650,7 +650,7 @@ static inline void hfi1_make_ruc_header_16B(struct rvt_qp *qp,
if (middle)
build_ahg(qp, bth2);
else
- qp->s_flags &= ~RVT_S_AHG_VALID;
+ qp->s_flags &= ~HFI1_S_AHG_VALID;
bth0 |= pkey;
bth0 |= extra_bytes << 20;
@@ -727,7 +727,7 @@ static inline void hfi1_make_ruc_header_9B(struct rvt_qp *qp,
if (middle)
build_ahg(qp, bth2);
else
- qp->s_flags &= ~RVT_S_AHG_VALID;
+ qp->s_flags &= ~HFI1_S_AHG_VALID;
bth0 |= pkey;
bth0 |= extra_bytes << 20;
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 7fb350b87b49..88e326d6cc49 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -1351,7 +1351,7 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
struct hfi1_pportdata *ppd = dd->pport + port;
u32 per_sdma_credits;
uint idle_cnt = sdma_idle_cnt;
- size_t num_engines = dd->chip_sdma_engines;
+ size_t num_engines = chip_sdma_engines(dd);
int ret = -ENOMEM;
if (!HFI1_CAP_IS_KSET(SDMA)) {
@@ -1360,18 +1360,18 @@ int sdma_init(struct hfi1_devdata *dd, u8 port)
}
if (mod_num_sdma &&
/* can't exceed chip support */
- mod_num_sdma <= dd->chip_sdma_engines &&
+ mod_num_sdma <= chip_sdma_engines(dd) &&
/* count must be >= vls */
mod_num_sdma >= num_vls)
num_engines = mod_num_sdma;
dd_dev_info(dd, "SDMA mod_num_sdma: %u\n", mod_num_sdma);
- dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", dd->chip_sdma_engines);
+ dd_dev_info(dd, "SDMA chip_sdma_engines: %u\n", chip_sdma_engines(dd));
dd_dev_info(dd, "SDMA chip_sdma_mem_size: %u\n",
- dd->chip_sdma_mem_size);
+ chip_sdma_mem_size(dd));
per_sdma_credits =
- dd->chip_sdma_mem_size / (num_engines * SDMA_BLOCK_SIZE);
+ chip_sdma_mem_size(dd) / (num_engines * SDMA_BLOCK_SIZE);
/* set up freeze waitqueue */
init_waitqueue_head(&dd->sdma_unfreeze_wq);
diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c
index 08991874c0e2..13374c727b14 100644
--- a/drivers/infiniband/hw/hfi1/verbs.c
+++ b/drivers/infiniband/hw/hfi1/verbs.c
@@ -1007,7 +1007,7 @@ static int pio_wait(struct rvt_qp *qp,
int was_empty;
dev->n_piowait += !!(flag & RVT_S_WAIT_PIO);
- dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN);
+ dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN);
qp->s_flags |= flag;
was_empty = list_empty(&sc->piowait);
iowait_queue(ps->pkts_sent, &priv->s_iowait,
@@ -1376,7 +1376,7 @@ int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps)
return pio_wait(qp,
ps->s_txreq->psc,
ps,
- RVT_S_WAIT_PIO_DRAIN);
+ HFI1_S_WAIT_PIO_DRAIN);
return sr(qp, ps, 0);
}
@@ -1410,7 +1410,8 @@ static void hfi1_fill_device_attr(struct hfi1_devdata *dd)
rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX;
rdi->dparms.props.max_qp = hfi1_max_qps;
rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs;
- rdi->dparms.props.max_sge = hfi1_max_sges;
+ rdi->dparms.props.max_send_sge = hfi1_max_sges;
+ rdi->dparms.props.max_recv_sge = hfi1_max_sges;
rdi->dparms.props.max_sge_rd = hfi1_max_sges;
rdi->dparms.props.max_cq = hfi1_max_cqs;
rdi->dparms.props.max_ah = hfi1_max_ahs;
@@ -1497,15 +1498,6 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu :
mtu_to_enum(ppd->ibmtu, IB_MTU_4096);
- /*
- * sm_lid of 0xFFFF needs special handling so that it can
- * be differentiated from a permissve LID of 0xFFFF.
- * We set the grh_required flag here so the SA can program
- * the DGID in the address handle appropriately
- */
- if (props->sm_lid == be16_to_cpu(IB_LID_PERMISSIVE))
- props->grh_required = true;
-
return 0;
}
@@ -1892,7 +1884,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd)
ibdev->process_mad = hfi1_process_mad;
ibdev->get_dev_fw_str = hfi1_get_dev_fw_str;
- strncpy(ibdev->node_desc, init_utsname()->nodename,
+ strlcpy(ibdev->node_desc, init_utsname()->nodename,
sizeof(ibdev->node_desc));
/*
diff --git a/drivers/infiniband/hw/hfi1/vnic_main.c b/drivers/infiniband/hw/hfi1/vnic_main.c
index 5d65582fe4d9..ba160f99cf8e 100644
--- a/drivers/infiniband/hw/hfi1/vnic_main.c
+++ b/drivers/infiniband/hw/hfi1/vnic_main.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2017 Intel Corporation.
+ * Copyright(c) 2017 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -120,8 +120,7 @@ static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
uctxt->seq_cnt = 1;
uctxt->is_vnic = true;
- if (dd->num_msix_entries)
- hfi1_set_vnic_msix_info(uctxt);
+ hfi1_set_vnic_msix_info(uctxt);
hfi1_stats.sps_ctxts++;
dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
@@ -136,8 +135,7 @@ static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
flush_wc();
- if (dd->num_msix_entries)
- hfi1_reset_vnic_msix_info(uctxt);
+ hfi1_reset_vnic_msix_info(uctxt);
/*
* Disable receive context and interrupt available, reset all
@@ -818,14 +816,14 @@ struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
- dd->chip_sdma_engines, dd->num_vnic_contexts);
+ chip_sdma_engines(dd), dd->num_vnic_contexts);
if (!netdev)
return ERR_PTR(-ENOMEM);
rn = netdev_priv(netdev);
vinfo = opa_vnic_dev_priv(netdev);
vinfo->dd = dd;
- vinfo->num_tx_q = dd->chip_sdma_engines;
+ vinfo->num_tx_q = chip_sdma_engines(dd);
vinfo->num_rx_q = dd->num_vnic_contexts;
vinfo->netdev = netdev;
rn->free_rdma_netdev = hfi1_vnic_free_rn;
diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c
index d74928621559..14efa3b9adb2 100644
--- a/drivers/infiniband/hw/hns/hns_roce_ah.c
+++ b/drivers/infiniband/hw/hns/hns_roce_ah.c
@@ -44,13 +44,11 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
struct ib_udata *udata)
{
struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device);
+ const struct ib_gid_attr *gid_attr;
struct device *dev = hr_dev->dev;
- struct ib_gid_attr gid_attr;
struct hns_roce_ah *ah;
u16 vlan_tag = 0xffff;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
- union ib_gid sgid;
- int ret;
ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
if (!ah)
@@ -59,18 +57,9 @@ struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd,
/* Get mac address */
memcpy(ah->av.mac, ah_attr->roce.dmac, ETH_ALEN);
- /* Get source gid */
- ret = ib_get_cached_gid(ibpd->device, rdma_ah_get_port_num(ah_attr),
- grh->sgid_index, &sgid, &gid_attr);
- if (ret) {
- dev_err(dev, "get sgid failed! ret = %d\n", ret);
- kfree(ah);
- return ERR_PTR(ret);
- }
-
- if (is_vlan_dev(gid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
- dev_put(gid_attr.ndev);
+ gid_attr = ah_attr->grh.sgid_attr;
+ if (is_vlan_dev(gid_attr->ndev))
+ vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) &
diff --git a/drivers/infiniband/hw/hns/hns_roce_common.h b/drivers/infiniband/hw/hns/hns_roce_common.h
index 319cb74aebaf..93d4b4ec002d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_common.h
+++ b/drivers/infiniband/hw/hns/hns_roce_common.h
@@ -382,15 +382,6 @@
#define ROCEE_VF_EQ_DB_CFG0_REG 0x238
#define ROCEE_VF_EQ_DB_CFG1_REG 0x23C
-#define ROCEE_VF_SMAC_CFG0_REG 0x12000
-#define ROCEE_VF_SMAC_CFG1_REG 0x12004
-
-#define ROCEE_VF_SGID_CFG0_REG 0x10000
-#define ROCEE_VF_SGID_CFG1_REG 0x10004
-#define ROCEE_VF_SGID_CFG2_REG 0x10008
-#define ROCEE_VF_SGID_CFG3_REG 0x1000c
-#define ROCEE_VF_SGID_CFG4_REG 0x10010
-
#define ROCEE_VF_ABN_INT_CFG_REG 0x13000
#define ROCEE_VF_ABN_INT_ST_REG 0x13004
#define ROCEE_VF_ABN_INT_EN_REG 0x13008
diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 31221d506d9a..a595e72f243e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -579,22 +579,22 @@ struct hns_roce_ceqe {
};
struct hns_roce_aeqe {
- u32 asyn;
+ __le32 asyn;
union {
struct {
- u32 qp;
+ __le32 qp;
u32 rsv0;
u32 rsv1;
} qp_event;
struct {
- u32 cq;
+ __le32 cq;
u32 rsv0;
u32 rsv1;
} cq_event;
struct {
- u32 ceqe;
+ __le32 ceqe;
u32 rsv0;
u32 rsv1;
} ce_event;
@@ -720,6 +720,9 @@ struct hns_roce_caps {
u32 eqe_ba_pg_sz;
u32 eqe_buf_pg_sz;
u32 eqe_hop_num;
+ u32 sl_num;
+ u32 tsq_buf_pg_sz;
+ u32 tpq_buf_pg_sz;
u32 chunk_sz; /* chunk size in non multihop mode*/
u64 flags;
};
@@ -736,7 +739,7 @@ struct hns_roce_hw {
u16 token, int event);
int (*chk_mbox)(struct hns_roce_dev *hr_dev, unsigned long timeout);
int (*set_gid)(struct hns_roce_dev *hr_dev, u8 port, int gid_index,
- union ib_gid *gid, const struct ib_gid_attr *attr);
+ const union ib_gid *gid, const struct ib_gid_attr *attr);
int (*set_mac)(struct hns_roce_dev *hr_dev, u8 phy_port, u8 *addr);
void (*set_mtu)(struct hns_roce_dev *hr_dev, u8 phy_port,
enum ib_mtu mtu);
@@ -864,7 +867,7 @@ static inline struct hns_roce_sqp *hr_to_hr_sqp(struct hns_roce_qp *hr_qp)
return container_of(hr_qp, struct hns_roce_sqp, hr_qp);
}
-static inline void hns_roce_write64_k(__be32 val[2], void __iomem *dest)
+static inline void hns_roce_write64_k(__le32 val[2], void __iomem *dest)
{
__raw_writeq(*(u64 *) val, dest);
}
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
index 8013d69c5ac4..783d28dd3ca4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c
@@ -175,10 +175,10 @@ static int hns_roce_v1_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
UD_SEND_WQE_U32_36_FLOW_LABEL_M,
UD_SEND_WQE_U32_36_FLOW_LABEL_S, 0);
roce_set_field(ud_sq_wqe->u32_36,
- UD_SEND_WQE_U32_36_PRIORITY_M,
- UD_SEND_WQE_U32_36_PRIORITY_S,
- ah->av.sl_tclass_flowlabel >>
- HNS_ROCE_SL_SHIFT);
+ UD_SEND_WQE_U32_36_PRIORITY_M,
+ UD_SEND_WQE_U32_36_PRIORITY_S,
+ le32_to_cpu(ah->av.sl_tclass_flowlabel) >>
+ HNS_ROCE_SL_SHIFT);
roce_set_field(ud_sq_wqe->u32_36,
UD_SEND_WQE_U32_36_SGID_INDEX_M,
UD_SEND_WQE_U32_36_SGID_INDEX_S,
@@ -333,7 +333,7 @@ out:
doorbell[0] = le32_to_cpu(sq_db.u32_4);
doorbell[1] = le32_to_cpu(sq_db.u32_8);
- hns_roce_write64_k(doorbell, qp->sq.db_reg_l);
+ hns_roce_write64_k((__le32 *)doorbell, qp->sq.db_reg_l);
qp->sq_next_wqe = ind;
}
@@ -349,7 +349,7 @@ static int hns_roce_v1_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
int nreq = 0;
int ind = 0;
int i = 0;
- u32 reg_val = 0;
+ u32 reg_val;
unsigned long flags = 0;
struct hns_roce_rq_wqe_ctrl *ctrl = NULL;
struct hns_roce_wqe_data_seg *scat = NULL;
@@ -402,14 +402,18 @@ out:
wmb();
if (ibqp->qp_type == IB_QPT_GSI) {
+ __le32 tmp;
+
/* SW update GSI rq header */
reg_val = roce_read(to_hr_dev(ibqp->device),
ROCEE_QP1C_CFG3_0_REG +
QP1C_CFGN_OFFSET * hr_qp->phy_port);
- roce_set_field(reg_val,
+ tmp = cpu_to_le32(reg_val);
+ roce_set_field(tmp,
ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_M,
ROCEE_QP1C_CFG3_0_ROCEE_QP1C_RQ_HEAD_S,
hr_qp->rq.head);
+ reg_val = le32_to_cpu(tmp);
roce_write(to_hr_dev(ibqp->device),
ROCEE_QP1C_CFG3_0_REG +
QP1C_CFGN_OFFSET * hr_qp->phy_port, reg_val);
@@ -430,7 +434,8 @@ out:
doorbell[0] = le32_to_cpu(rq_db.u32_4);
doorbell[1] = le32_to_cpu(rq_db.u32_8);
- hns_roce_write64_k(doorbell, hr_qp->rq.db_reg_l);
+ hns_roce_write64_k((__le32 *)doorbell,
+ hr_qp->rq.db_reg_l);
}
}
spin_unlock_irqrestore(&hr_qp->rq.lock, flags);
@@ -441,51 +446,63 @@ out:
static void hns_roce_set_db_event_mode(struct hns_roce_dev *hr_dev,
int sdb_mode, int odb_mode)
{
+ __le32 tmp;
u32 val;
val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode);
- roce_set_bit(val, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode);
+ tmp = cpu_to_le32(val);
+ roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_SQ_MODE_S, sdb_mode);
+ roce_set_bit(tmp, ROCEE_GLB_CFG_ROCEE_DB_OTH_MODE_S, odb_mode);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
}
static void hns_roce_set_db_ext_mode(struct hns_roce_dev *hr_dev, u32 sdb_mode,
u32 odb_mode)
{
+ __le32 tmp;
u32 val;
/* Configure SDB/ODB extend mode */
val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- roce_set_bit(val, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode);
- roce_set_bit(val, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode);
+ tmp = cpu_to_le32(val);
+ roce_set_bit(tmp, ROCEE_GLB_CFG_SQ_EXT_DB_MODE_S, sdb_mode);
+ roce_set_bit(tmp, ROCEE_GLB_CFG_OTH_EXT_DB_MODE_S, odb_mode);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
}
static void hns_roce_set_sdb(struct hns_roce_dev *hr_dev, u32 sdb_alept,
u32 sdb_alful)
{
+ __le32 tmp;
u32 val;
/* Configure SDB */
val = roce_read(hr_dev, ROCEE_DB_SQ_WL_REG);
- roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_M,
ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_S, sdb_alful);
- roce_set_field(val, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M,
+ roce_set_field(tmp, ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_M,
ROCEE_DB_SQ_WL_ROCEE_DB_SQ_WL_EMPTY_S, sdb_alept);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_DB_SQ_WL_REG, val);
}
static void hns_roce_set_odb(struct hns_roce_dev *hr_dev, u32 odb_alept,
u32 odb_alful)
{
+ __le32 tmp;
u32 val;
/* Configure ODB */
val = roce_read(hr_dev, ROCEE_DB_OTHERS_WL_REG);
- roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_M,
ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_S, odb_alful);
- roce_set_field(val, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M,
+ roce_set_field(tmp, ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_M,
ROCEE_DB_OTHERS_WL_ROCEE_DB_OTH_WL_EMPTY_S, odb_alept);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_DB_OTHERS_WL_REG, val);
}
@@ -496,6 +513,7 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
struct hns_roce_v1_priv *priv;
struct hns_roce_db_table *db;
dma_addr_t sdb_dma_addr;
+ __le32 tmp;
u32 val;
priv = (struct hns_roce_v1_priv *)hr_dev->priv;
@@ -511,7 +529,8 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
/* Configure extend SDB depth */
val = roce_read(hr_dev, ROCEE_EXT_DB_SQ_H_REG);
- roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_M,
ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_SHIFT_S,
db->ext_db->esdb_dep);
/*
@@ -519,8 +538,9 @@ static void hns_roce_set_sdb_ext(struct hns_roce_dev *hr_dev, u32 ext_sdb_alept,
* using 4K page, and shift more 32 because of
* caculating the high 32 bit value evaluated to hardware.
*/
- roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M,
+ roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_M,
ROCEE_EXT_DB_SQ_H_EXT_DB_SQ_BA_H_S, sdb_dma_addr >> 44);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_EXT_DB_SQ_H_REG, val);
dev_dbg(dev, "ext SDB depth: 0x%x\n", db->ext_db->esdb_dep);
@@ -535,6 +555,7 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
struct hns_roce_v1_priv *priv;
struct hns_roce_db_table *db;
dma_addr_t odb_dma_addr;
+ __le32 tmp;
u32 val;
priv = (struct hns_roce_v1_priv *)hr_dev->priv;
@@ -550,12 +571,14 @@ static void hns_roce_set_odb_ext(struct hns_roce_dev *hr_dev, u32 ext_odb_alept,
/* Configure extend ODB depth */
val = roce_read(hr_dev, ROCEE_EXT_DB_OTH_H_REG);
- roce_set_field(val, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_M,
ROCEE_EXT_DB_OTH_H_EXT_DB_OTH_SHIFT_S,
db->ext_db->eodb_dep);
- roce_set_field(val, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M,
+ roce_set_field(tmp, ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_M,
ROCEE_EXT_DB_SQ_H_EXT_DB_OTH_BA_H_S,
db->ext_db->eodb_dep);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_EXT_DB_OTH_H_REG, val);
dev_dbg(dev, "ext ODB depth: 0x%x\n", db->ext_db->eodb_dep);
@@ -1161,9 +1184,10 @@ static void hns_roce_db_free(struct hns_roce_dev *hr_dev)
static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
{
int ret;
+ u32 val;
+ __le32 tmp;
int raq_shift = 0;
dma_addr_t addr;
- u32 val;
struct hns_roce_v1_priv *priv;
struct hns_roce_raq_table *raq;
struct device *dev = &hr_dev->pdev->dev;
@@ -1189,46 +1213,54 @@ static int hns_roce_raq_init(struct hns_roce_dev *hr_dev)
/* Configure raq_shift */
raq_shift = ilog2(HNS_ROCE_V1_RAQ_SIZE / HNS_ROCE_V1_RAQ_ENTRY);
val = roce_read(hr_dev, ROCEE_EXT_RAQ_H_REG);
- roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_M,
ROCEE_EXT_RAQ_H_EXT_RAQ_SHIFT_S, raq_shift);
/*
* 44 = 32 + 12, When evaluating addr to hardware, shift 12 because of
* using 4K page, and shift more 32 because of
* caculating the high 32 bit value evaluated to hardware.
*/
- roce_set_field(val, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M,
+ roce_set_field(tmp, ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_M,
ROCEE_EXT_RAQ_H_EXT_RAQ_BA_H_S,
raq->e_raq_buf->map >> 44);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_EXT_RAQ_H_REG, val);
dev_dbg(dev, "Configure raq_shift 0x%x.\n", val);
/* Configure raq threshold */
val = roce_read(hr_dev, ROCEE_RAQ_WL_REG);
- roce_set_field(val, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_RAQ_WL_ROCEE_RAQ_WL_M,
ROCEE_RAQ_WL_ROCEE_RAQ_WL_S,
HNS_ROCE_V1_EXT_RAQ_WF);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_RAQ_WL_REG, val);
dev_dbg(dev, "Configure raq_wl 0x%x.\n", val);
/* Enable extend raq */
val = roce_read(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG);
- roce_set_field(val,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp,
ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_M,
ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_POL_TIME_INTERVAL_S,
POL_TIME_INTERVAL_VAL);
- roce_set_bit(val, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1);
- roce_set_field(val,
+ roce_set_bit(tmp, ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_EXT_RAQ_MODE, 1);
+ roce_set_field(tmp,
ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_M,
ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_CFG_S,
2);
- roce_set_bit(val,
+ roce_set_bit(tmp,
ROCEE_WRMS_POL_TIME_INTERVAL_WRMS_RAQ_TIMEOUT_CHK_EN_S, 1);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_WRMS_POL_TIME_INTERVAL_REG, val);
dev_dbg(dev, "Configure WrmsPolTimeInterval 0x%x.\n", val);
/* Enable raq drop */
val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
- roce_set_bit(val, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1);
+ tmp = cpu_to_le32(val);
+ roce_set_bit(tmp, ROCEE_GLB_CFG_TRP_RAQ_DROP_EN_S, 1);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
dev_dbg(dev, "Configure GlbCfg = 0x%x.\n", val);
@@ -1255,20 +1287,25 @@ static void hns_roce_raq_free(struct hns_roce_dev *hr_dev)
static void hns_roce_port_enable(struct hns_roce_dev *hr_dev, int enable_flag)
{
+ __le32 tmp;
u32 val;
if (enable_flag) {
val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
/* Open all ports */
- roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
ROCEE_GLB_CFG_ROCEE_PORT_ST_S,
ALL_PORT_VAL_OPEN);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
} else {
val = roce_read(hr_dev, ROCEE_GLB_CFG_REG);
/* Close all ports */
- roce_set_field(val, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_GLB_CFG_ROCEE_PORT_ST_M,
ROCEE_GLB_CFG_ROCEE_PORT_ST_S, 0x0);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_GLB_CFG_REG, val);
}
}
@@ -1498,13 +1535,11 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
int i = 0;
struct hns_roce_caps *caps = &hr_dev->caps;
- hr_dev->vendor_id = le32_to_cpu(roce_read(hr_dev, ROCEE_VENDOR_ID_REG));
- hr_dev->vendor_part_id = le32_to_cpu(roce_read(hr_dev,
- ROCEE_VENDOR_PART_ID_REG));
- hr_dev->sys_image_guid = le32_to_cpu(roce_read(hr_dev,
- ROCEE_SYS_IMAGE_GUID_L_REG)) |
- ((u64)le32_to_cpu(roce_read(hr_dev,
- ROCEE_SYS_IMAGE_GUID_H_REG)) << 32);
+ hr_dev->vendor_id = roce_read(hr_dev, ROCEE_VENDOR_ID_REG);
+ hr_dev->vendor_part_id = roce_read(hr_dev, ROCEE_VENDOR_PART_ID_REG);
+ hr_dev->sys_image_guid = roce_read(hr_dev, ROCEE_SYS_IMAGE_GUID_L_REG) |
+ ((u64)roce_read(hr_dev,
+ ROCEE_SYS_IMAGE_GUID_H_REG) << 32);
hr_dev->hw_rev = HNS_ROCE_HW_VER1;
caps->num_qps = HNS_ROCE_V1_MAX_QP_NUM;
@@ -1557,8 +1592,7 @@ static int hns_roce_v1_profile(struct hns_roce_dev *hr_dev)
caps->ceqe_depth = HNS_ROCE_V1_COMP_EQE_NUM;
caps->aeqe_depth = HNS_ROCE_V1_ASYNC_EQE_NUM;
- caps->local_ca_ack_delay = le32_to_cpu(roce_read(hr_dev,
- ROCEE_ACK_DELAY_REG));
+ caps->local_ca_ack_delay = roce_read(hr_dev, ROCEE_ACK_DELAY_REG);
caps->max_mtu = IB_MTU_2048;
return 0;
@@ -1568,21 +1602,25 @@ static int hns_roce_v1_init(struct hns_roce_dev *hr_dev)
{
int ret;
u32 val;
+ __le32 tmp;
struct device *dev = &hr_dev->pdev->dev;
/* DMAE user config */
val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG1_REG);
- roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_M,
ROCEE_DMAE_USER_CFG1_ROCEE_CACHE_TB_CFG_S, 0xf);
- roce_set_field(val, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M,
+ roce_set_field(tmp, ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_M,
ROCEE_DMAE_USER_CFG1_ROCEE_STREAM_ID_TB_CFG_S,
1 << PAGES_SHIFT_16);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_DMAE_USER_CFG1_REG, val);
val = roce_read(hr_dev, ROCEE_DMAE_USER_CFG2_REG);
- roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_M,
ROCEE_DMAE_USER_CFG2_ROCEE_CACHE_PKT_CFG_S, 0xf);
- roce_set_field(val, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M,
+ roce_set_field(tmp, ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_M,
ROCEE_DMAE_USER_CFG2_ROCEE_STREAM_ID_PKT_CFG_S,
1 << PAGES_SHIFT_16);
@@ -1668,6 +1706,7 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
u32 __iomem *hcr = (u32 __iomem *)(hr_dev->reg_base + ROCEE_MB1_REG);
unsigned long end;
u32 val = 0;
+ __le32 tmp;
end = msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS) + jiffies;
while (hns_roce_v1_cmd_pending(hr_dev)) {
@@ -1679,15 +1718,17 @@ static int hns_roce_v1_post_mbox(struct hns_roce_dev *hr_dev, u64 in_param,
cond_resched();
}
- roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_M, ROCEE_MB6_ROCEE_MB_CMD_S,
op);
- roce_set_field(val, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
+ roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_CMD_MDF_M,
ROCEE_MB6_ROCEE_MB_CMD_MDF_S, op_modifier);
- roce_set_bit(val, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
- roce_set_bit(val, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
- roce_set_field(val, ROCEE_MB6_ROCEE_MB_TOKEN_M,
+ roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_EVENT_S, event);
+ roce_set_bit(tmp, ROCEE_MB6_ROCEE_MB_HW_RUN_S, 1);
+ roce_set_field(tmp, ROCEE_MB6_ROCEE_MB_TOKEN_M,
ROCEE_MB6_ROCEE_MB_TOKEN_S, token);
+ val = le32_to_cpu(tmp);
writeq(in_param, hcr + 0);
writeq(out_param, hcr + 2);
writel(in_modifier, hcr + 4);
@@ -1717,7 +1758,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
return -ETIMEDOUT;
}
- status = le32_to_cpu((__force __be32)
+ status = le32_to_cpu((__force __le32)
__raw_readl(hcr + HCR_STATUS_OFFSET));
if ((status & STATUS_MASK) != 0x1) {
dev_err(hr_dev->dev, "mailbox status 0x%x!\n", status);
@@ -1728,7 +1769,7 @@ static int hns_roce_v1_chk_mbox(struct hns_roce_dev *hr_dev,
}
static int hns_roce_v1_set_gid(struct hns_roce_dev *hr_dev, u8 port,
- int gid_index, union ib_gid *gid,
+ int gid_index, const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
u32 *p = NULL;
@@ -1760,6 +1801,7 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
{
u32 reg_smac_l;
u16 reg_smac_h;
+ __le32 tmp;
u16 *p_h;
u32 *p;
u32 val;
@@ -1784,10 +1826,12 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
val = roce_read(hr_dev,
ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
+ tmp = cpu_to_le32(val);
p_h = (u16 *)(&addr[4]);
reg_smac_h = *p_h;
- roce_set_field(val, ROCEE_SMAC_H_ROCEE_SMAC_H_M,
+ roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_SMAC_H_M,
ROCEE_SMAC_H_ROCEE_SMAC_H_S, reg_smac_h);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
val);
@@ -1797,12 +1841,15 @@ static int hns_roce_v1_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
static void hns_roce_v1_set_mtu(struct hns_roce_dev *hr_dev, u8 phy_port,
enum ib_mtu mtu)
{
+ __le32 tmp;
u32 val;
val = roce_read(hr_dev,
ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET);
- roce_set_field(val, ROCEE_SMAC_H_ROCEE_PORT_MTU_M,
+ tmp = cpu_to_le32(val);
+ roce_set_field(tmp, ROCEE_SMAC_H_ROCEE_PORT_MTU_M,
ROCEE_SMAC_H_ROCEE_PORT_MTU_S, mtu);
+ val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_SMAC_H_0_REG + phy_port * PHY_PORT_OFFSET,
val);
}
@@ -1848,9 +1895,9 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_MW_BIND_COUNTER_M,
MPT_BYTE_12_MW_BIND_COUNTER_S, 0);
- mpt_entry->virt_addr_l = (u32)mr->iova;
- mpt_entry->virt_addr_h = (u32)(mr->iova >> 32);
- mpt_entry->length = (u32)mr->size;
+ mpt_entry->virt_addr_l = cpu_to_le32((u32)mr->iova);
+ mpt_entry->virt_addr_h = cpu_to_le32((u32)(mr->iova >> 32));
+ mpt_entry->length = cpu_to_le32((u32)mr->size);
roce_set_field(mpt_entry->mpt_byte_28, MPT_BYTE_28_PD_M,
MPT_BYTE_28_PD_S, mr->pd);
@@ -1885,64 +1932,59 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
roce_set_field(mpt_entry->mpt_byte_36,
MPT_BYTE_36_PA0_H_M,
MPT_BYTE_36_PA0_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32)));
+ (u32)(pages[i] >> PAGES_SHIFT_32));
break;
case 1:
roce_set_field(mpt_entry->mpt_byte_36,
MPT_BYTE_36_PA1_L_M,
- MPT_BYTE_36_PA1_L_S,
- cpu_to_le32((u32)(pages[i])));
+ MPT_BYTE_36_PA1_L_S, (u32)(pages[i]));
roce_set_field(mpt_entry->mpt_byte_40,
MPT_BYTE_40_PA1_H_M,
MPT_BYTE_40_PA1_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24)));
+ (u32)(pages[i] >> PAGES_SHIFT_24));
break;
case 2:
roce_set_field(mpt_entry->mpt_byte_40,
MPT_BYTE_40_PA2_L_M,
- MPT_BYTE_40_PA2_L_S,
- cpu_to_le32((u32)(pages[i])));
+ MPT_BYTE_40_PA2_L_S, (u32)(pages[i]));
roce_set_field(mpt_entry->mpt_byte_44,
MPT_BYTE_44_PA2_H_M,
MPT_BYTE_44_PA2_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16)));
+ (u32)(pages[i] >> PAGES_SHIFT_16));
break;
case 3:
roce_set_field(mpt_entry->mpt_byte_44,
MPT_BYTE_44_PA3_L_M,
- MPT_BYTE_44_PA3_L_S,
- cpu_to_le32((u32)(pages[i])));
+ MPT_BYTE_44_PA3_L_S, (u32)(pages[i]));
roce_set_field(mpt_entry->mpt_byte_48,
MPT_BYTE_48_PA3_H_M,
MPT_BYTE_48_PA3_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_8)));
+ (u32)(pages[i] >> PAGES_SHIFT_8));
break;
case 4:
mpt_entry->pa4_l = cpu_to_le32((u32)(pages[i]));
roce_set_field(mpt_entry->mpt_byte_56,
MPT_BYTE_56_PA4_H_M,
MPT_BYTE_56_PA4_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_32)));
+ (u32)(pages[i] >> PAGES_SHIFT_32));
break;
case 5:
roce_set_field(mpt_entry->mpt_byte_56,
MPT_BYTE_56_PA5_L_M,
- MPT_BYTE_56_PA5_L_S,
- cpu_to_le32((u32)(pages[i])));
+ MPT_BYTE_56_PA5_L_S, (u32)(pages[i]));
roce_set_field(mpt_entry->mpt_byte_60,
MPT_BYTE_60_PA5_H_M,
MPT_BYTE_60_PA5_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_24)));
+ (u32)(pages[i] >> PAGES_SHIFT_24));
break;
case 6:
roce_set_field(mpt_entry->mpt_byte_60,
MPT_BYTE_60_PA6_L_M,
- MPT_BYTE_60_PA6_L_S,
- cpu_to_le32((u32)(pages[i])));
+ MPT_BYTE_60_PA6_L_S, (u32)(pages[i]));
roce_set_field(mpt_entry->mpt_byte_64,
MPT_BYTE_64_PA6_H_M,
MPT_BYTE_64_PA6_H_S,
- cpu_to_le32((u32)(pages[i] >> PAGES_SHIFT_16)));
+ (u32)(pages[i] >> PAGES_SHIFT_16));
break;
default:
break;
@@ -1951,7 +1993,7 @@ static int hns_roce_v1_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
free_page((unsigned long) pages);
- mpt_entry->pbl_addr_l = (u32)(mr->pbl_dma_addr);
+ mpt_entry->pbl_addr_l = cpu_to_le32((u32)(mr->pbl_dma_addr));
roce_set_field(mpt_entry->mpt_byte_12, MPT_BYTE_12_PBL_ADDR_H_M,
MPT_BYTE_12_PBL_ADDR_H_S,
@@ -1982,9 +2024,9 @@ static struct hns_roce_cqe *next_cqe_sw(struct hns_roce_cq *hr_cq)
static void hns_roce_v1_cq_set_ci(struct hns_roce_cq *hr_cq, u32 cons_index)
{
- u32 doorbell[2];
+ __le32 doorbell[2];
- doorbell[0] = cons_index & ((hr_cq->cq_depth << 1) - 1);
+ doorbell[0] = cpu_to_le32(cons_index & ((hr_cq->cq_depth << 1) - 1));
doorbell[1] = 0;
roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
@@ -2081,10 +2123,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
CQ_CONTEXT_CQC_BYTE_4_CQC_STATE_S, CQ_STATE_VALID);
roce_set_field(cq_context->cqc_byte_4, CQ_CONTEXT_CQC_BYTE_4_CQN_M,
CQ_CONTEXT_CQC_BYTE_4_CQN_S, hr_cq->cqn);
- cq_context->cqc_byte_4 = cpu_to_le32(cq_context->cqc_byte_4);
- cq_context->cq_bt_l = (u32)dma_handle;
- cq_context->cq_bt_l = cpu_to_le32(cq_context->cq_bt_l);
+ cq_context->cq_bt_l = cpu_to_le32((u32)dma_handle);
roce_set_field(cq_context->cqc_byte_12,
CQ_CONTEXT_CQC_BYTE_12_CQ_BT_H_M,
@@ -2096,15 +2136,12 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
ilog2((unsigned int)nent));
roce_set_field(cq_context->cqc_byte_12, CQ_CONTEXT_CQC_BYTE_12_CEQN_M,
CQ_CONTEXT_CQC_BYTE_12_CEQN_S, vector);
- cq_context->cqc_byte_12 = cpu_to_le32(cq_context->cqc_byte_12);
- cq_context->cur_cqe_ba0_l = (u32)(mtts[0]);
- cq_context->cur_cqe_ba0_l = cpu_to_le32(cq_context->cur_cqe_ba0_l);
+ cq_context->cur_cqe_ba0_l = cpu_to_le32((u32)(mtts[0]));
roce_set_field(cq_context->cqc_byte_20,
CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_M,
- CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S,
- cpu_to_le32((mtts[0]) >> 32));
+ CQ_CONTEXT_CQC_BYTE_20_CUR_CQE_BA0_H_S, (mtts[0]) >> 32);
/* Dedicated hardware, directly set 0 */
roce_set_field(cq_context->cqc_byte_20,
CQ_CONTEXT_CQC_BYTE_20_CQ_CUR_INDEX_M,
@@ -2118,9 +2155,8 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_M,
CQ_CONTEXT_CQC_BYTE_20_CQE_TPTR_ADDR_H_S,
tptr_dma_addr >> 44);
- cq_context->cqc_byte_20 = cpu_to_le32(cq_context->cqc_byte_20);
- cq_context->cqe_tptr_addr_l = (u32)(tptr_dma_addr >> 12);
+ cq_context->cqe_tptr_addr_l = cpu_to_le32((u32)(tptr_dma_addr >> 12));
roce_set_field(cq_context->cqc_byte_32,
CQ_CONTEXT_CQC_BYTE_32_CUR_CQE_BA1_H_M,
@@ -2138,7 +2174,6 @@ static void hns_roce_v1_write_cqc(struct hns_roce_dev *hr_dev,
roce_set_field(cq_context->cqc_byte_32,
CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_M,
CQ_CONTEXT_CQC_BYTE_32_CQ_CONS_IDX_S, 0);
- cq_context->cqc_byte_32 = cpu_to_le32(cq_context->cqc_byte_32);
}
static int hns_roce_v1_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
@@ -2151,7 +2186,7 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
{
struct hns_roce_cq *hr_cq = to_hr_cq(ibcq);
u32 notification_flag;
- u32 doorbell[2];
+ __le32 doorbell[2];
notification_flag = (flags & IB_CQ_SOLICITED_MASK) ==
IB_CQ_SOLICITED ? CQ_DB_REQ_NOT : CQ_DB_REQ_NOT_SOL;
@@ -2159,7 +2194,8 @@ static int hns_roce_v1_req_notify_cq(struct ib_cq *ibcq,
* flags = 0; Notification Flag = 1, next
* flags = 1; Notification Flag = 0, solocited
*/
- doorbell[0] = hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1);
+ doorbell[0] =
+ cpu_to_le32(hr_cq->cons_index & ((hr_cq->cq_depth << 1) - 1));
roce_set_bit(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_HW_SYNS_S, 1);
roce_set_field(doorbell[1], ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_M,
ROCEE_DB_OTHERS_H_ROCEE_DB_OTH_CMD_S, 3);
@@ -2416,7 +2452,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
struct device *dev = &hr_dev->pdev->dev;
struct hns_roce_v1_priv *priv;
unsigned long end = 0, flags = 0;
- uint32_t bt_cmd_val[2] = {0};
+ __le32 bt_cmd_val[2] = {0};
void __iomem *bt_cmd;
u64 bt_ba = 0;
@@ -2468,7 +2504,7 @@ static int hns_roce_v1_clear_hem(struct hns_roce_dev *hr_dev,
msleep(HW_SYNC_SLEEP_TIME_INTERVAL);
}
- bt_cmd_val[0] = (uint32_t)bt_ba;
+ bt_cmd_val[0] = (__le32)bt_ba;
roce_set_field(bt_cmd_val[1], ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_M,
ROCEE_BT_CMD_H_ROCEE_BT_CMD_BA_H_S, bt_ba >> 32);
hns_roce_write64_k(bt_cmd_val, hr_dev->reg_base + ROCEE_BT_CMD_L_REG);
@@ -2569,10 +2605,11 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
struct hns_roce_sqp_context *context;
struct device *dev = &hr_dev->pdev->dev;
dma_addr_t dma_handle = 0;
+ u32 __iomem *addr;
int rq_pa_start;
+ __le32 tmp;
u32 reg_val;
u64 *mtts;
- u32 __iomem *addr;
context = kzalloc(sizeof(*context), GFP_KERNEL);
if (!context)
@@ -2598,7 +2635,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
roce_set_field(context->qp1c_bytes_4, QP1C_BYTES_4_PD_M,
QP1C_BYTES_4_PD_S, to_hr_pd(ibqp->pd)->pdn);
- context->sq_rq_bt_l = (u32)(dma_handle);
+ context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
roce_set_field(context->qp1c_bytes_12,
QP1C_BYTES_12_SQ_RQ_BT_H_M,
QP1C_BYTES_12_SQ_RQ_BT_H_S,
@@ -2610,7 +2647,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP1C_BYTES_16_PORT_NUM_S, hr_qp->phy_port);
roce_set_bit(context->qp1c_bytes_16,
QP1C_BYTES_16_SIGNALING_TYPE_S,
- hr_qp->sq_signal_bits);
+ le32_to_cpu(hr_qp->sq_signal_bits));
roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_RQ_BA_FLG_S,
1);
roce_set_bit(context->qp1c_bytes_16, QP1C_BYTES_16_SQ_BA_FLG_S,
@@ -2624,7 +2661,8 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP1C_BYTES_20_PKEY_IDX_S, attr->pkey_index);
rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
- context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]);
+ context->cur_rq_wqe_ba_l =
+ cpu_to_le32((u32)(mtts[rq_pa_start]));
roce_set_field(context->qp1c_bytes_28,
QP1C_BYTES_28_CUR_RQ_WQE_BA_H_M,
@@ -2643,7 +2681,7 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP1C_BYTES_32_TX_CQ_NUM_S,
to_hr_cq(ibqp->send_cq)->cqn);
- context->cur_sq_wqe_ba_l = (u32)mtts[0];
+ context->cur_sq_wqe_ba_l = cpu_to_le32((u32)mtts[0]);
roce_set_field(context->qp1c_bytes_40,
QP1C_BYTES_40_CUR_SQ_WQE_BA_H_M,
@@ -2658,23 +2696,25 @@ static int hns_roce_v1_m_sqp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
ROCEE_QP1C_CFG0_0_REG +
hr_qp->phy_port * sizeof(*context));
- writel(context->qp1c_bytes_4, addr);
- writel(context->sq_rq_bt_l, addr + 1);
- writel(context->qp1c_bytes_12, addr + 2);
- writel(context->qp1c_bytes_16, addr + 3);
- writel(context->qp1c_bytes_20, addr + 4);
- writel(context->cur_rq_wqe_ba_l, addr + 5);
- writel(context->qp1c_bytes_28, addr + 6);
- writel(context->qp1c_bytes_32, addr + 7);
- writel(context->cur_sq_wqe_ba_l, addr + 8);
- writel(context->qp1c_bytes_40, addr + 9);
+ writel(le32_to_cpu(context->qp1c_bytes_4), addr);
+ writel(le32_to_cpu(context->sq_rq_bt_l), addr + 1);
+ writel(le32_to_cpu(context->qp1c_bytes_12), addr + 2);
+ writel(le32_to_cpu(context->qp1c_bytes_16), addr + 3);
+ writel(le32_to_cpu(context->qp1c_bytes_20), addr + 4);
+ writel(le32_to_cpu(context->cur_rq_wqe_ba_l), addr + 5);
+ writel(le32_to_cpu(context->qp1c_bytes_28), addr + 6);
+ writel(le32_to_cpu(context->qp1c_bytes_32), addr + 7);
+ writel(le32_to_cpu(context->cur_sq_wqe_ba_l), addr + 8);
+ writel(le32_to_cpu(context->qp1c_bytes_40), addr + 9);
}
/* Modify QP1C status */
reg_val = roce_read(hr_dev, ROCEE_QP1C_CFG0_0_REG +
hr_qp->phy_port * sizeof(*context));
- roce_set_field(reg_val, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
+ tmp = cpu_to_le32(reg_val);
+ roce_set_field(tmp, ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_M,
ROCEE_QP1C_CFG0_0_ROCEE_QP1C_QP_ST_S, new_state);
+ reg_val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_QP1C_CFG0_0_REG +
hr_qp->phy_port * sizeof(*context), reg_val);
@@ -2712,7 +2752,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
dma_addr_t dma_handle_2 = 0;
dma_addr_t dma_handle = 0;
- uint32_t doorbell[2] = {0};
+ __le32 doorbell[2] = {0};
int rq_pa_start = 0;
u64 *mtts_2 = NULL;
int ret = -EINVAL;
@@ -2887,7 +2927,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
dmac = (u8 *)attr->ah_attr.roce.dmac;
- context->sq_rq_bt_l = (u32)(dma_handle);
+ context->sq_rq_bt_l = cpu_to_le32((u32)(dma_handle));
roce_set_field(context->qpc_bytes_24,
QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_M,
QP_CONTEXT_QPC_BYTES_24_SQ_RQ_BT_H_S,
@@ -2899,7 +2939,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_M,
QP_CONTEXT_QPC_BYTES_24_MINIMUM_RNR_NAK_TIMER_S,
attr->min_rnr_timer);
- context->irrl_ba_l = (u32)(dma_handle_2);
+ context->irrl_ba_l = cpu_to_le32((u32)(dma_handle_2));
roce_set_field(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_M,
QP_CONTEXT_QPC_BYTES_32_IRRL_BA_H_S,
@@ -2913,7 +2953,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
1);
roce_set_bit(context->qpc_bytes_32,
QP_CONTEXT_QPC_BYTE_32_SIGNALING_TYPE_S,
- hr_qp->sq_signal_bits);
+ le32_to_cpu(hr_qp->sq_signal_bits));
port = (attr_mask & IB_QP_PORT) ? (attr->port_num - 1) :
hr_qp->port;
@@ -2991,7 +3031,8 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP_CONTEXT_QPC_BYTES_68_RQ_CUR_INDEX_S, 0);
rq_pa_start = (u32)hr_qp->rq.offset / PAGE_SIZE;
- context->cur_rq_wqe_ba_l = (u32)(mtts[rq_pa_start]);
+ context->cur_rq_wqe_ba_l =
+ cpu_to_le32((u32)(mtts[rq_pa_start]));
roce_set_field(context->qpc_bytes_76,
QP_CONTEXT_QPC_BYTES_76_CUR_RQ_WQE_BA_H_M,
@@ -3071,7 +3112,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
goto out;
}
- context->rx_cur_sq_wqe_ba_l = (u32)(mtts[0]);
+ context->rx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
roce_set_field(context->qpc_bytes_120,
QP_CONTEXT_QPC_BYTES_120_RX_CUR_SQ_WQE_BA_H_M,
@@ -3219,7 +3260,7 @@ static int hns_roce_v1_m_qp(struct ib_qp *ibqp, const struct ib_qp_attr *attr,
QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_M,
QP_CONTEXT_QPC_BYTES_180_SQ_HEAD_S, 0);
- context->tx_cur_sq_wqe_ba_l = (u32)(mtts[0]);
+ context->tx_cur_sq_wqe_ba_l = cpu_to_le32((u32)(mtts[0]));
roce_set_field(context->qpc_bytes_188,
QP_CONTEXT_QPC_BYTES_188_TX_CUR_SQ_WQE_BA_H_M,
@@ -3386,16 +3427,16 @@ static int hns_roce_v1_q_sqp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
addr = ROCEE_QP1C_CFG0_0_REG +
hr_qp->port * sizeof(struct hns_roce_sqp_context);
- context.qp1c_bytes_4 = roce_read(hr_dev, addr);
- context.sq_rq_bt_l = roce_read(hr_dev, addr + 1);
- context.qp1c_bytes_12 = roce_read(hr_dev, addr + 2);
- context.qp1c_bytes_16 = roce_read(hr_dev, addr + 3);
- context.qp1c_bytes_20 = roce_read(hr_dev, addr + 4);
- context.cur_rq_wqe_ba_l = roce_read(hr_dev, addr + 5);
- context.qp1c_bytes_28 = roce_read(hr_dev, addr + 6);
- context.qp1c_bytes_32 = roce_read(hr_dev, addr + 7);
- context.cur_sq_wqe_ba_l = roce_read(hr_dev, addr + 8);
- context.qp1c_bytes_40 = roce_read(hr_dev, addr + 9);
+ context.qp1c_bytes_4 = cpu_to_le32(roce_read(hr_dev, addr));
+ context.sq_rq_bt_l = cpu_to_le32(roce_read(hr_dev, addr + 1));
+ context.qp1c_bytes_12 = cpu_to_le32(roce_read(hr_dev, addr + 2));
+ context.qp1c_bytes_16 = cpu_to_le32(roce_read(hr_dev, addr + 3));
+ context.qp1c_bytes_20 = cpu_to_le32(roce_read(hr_dev, addr + 4));
+ context.cur_rq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 5));
+ context.qp1c_bytes_28 = cpu_to_le32(roce_read(hr_dev, addr + 6));
+ context.qp1c_bytes_32 = cpu_to_le32(roce_read(hr_dev, addr + 7));
+ context.cur_sq_wqe_ba_l = cpu_to_le32(roce_read(hr_dev, addr + 8));
+ context.qp1c_bytes_40 = cpu_to_le32(roce_read(hr_dev, addr + 9));
hr_qp->state = roce_get_field(context.qp1c_bytes_4,
QP1C_BYTES_4_QP_STATE_M,
@@ -3557,7 +3598,7 @@ static int hns_roce_v1_q_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
qp_attr->retry_cnt = roce_get_field(context->qpc_bytes_148,
QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_M,
QP_CONTEXT_QPC_BYTES_148_RETRY_COUNT_S);
- qp_attr->rnr_retry = context->rnr_retry;
+ qp_attr->rnr_retry = (u8)context->rnr_retry;
done:
qp_attr->cur_qp_state = qp_attr->qp_state;
@@ -3595,42 +3636,47 @@ static void hns_roce_check_sdb_status(struct hns_roce_dev *hr_dev,
u32 *old_send, u32 *old_retry,
u32 *tsp_st, u32 *success_flags)
{
+ __le32 *old_send_tmp, *old_retry_tmp;
u32 sdb_retry_cnt;
u32 sdb_send_ptr;
u32 cur_cnt, old_cnt;
+ __le32 tmp, tmp1;
u32 send_ptr;
sdb_send_ptr = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
sdb_retry_cnt = roce_read(hr_dev, ROCEE_SDB_RETRY_CNT_REG);
- cur_cnt = roce_get_field(sdb_send_ptr,
- ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
+ tmp = cpu_to_le32(sdb_send_ptr);
+ tmp1 = cpu_to_le32(sdb_retry_cnt);
+ cur_cnt = roce_get_field(tmp, ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
- roce_get_field(sdb_retry_cnt,
- ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
+ roce_get_field(tmp1, ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
+
+ old_send_tmp = (__le32 *)old_send;
+ old_retry_tmp = (__le32 *)old_retry;
if (!roce_get_bit(*tsp_st, ROCEE_CNT_CLR_CE_CNT_CLR_CE_S)) {
- old_cnt = roce_get_field(*old_send,
+ old_cnt = roce_get_field(*old_send_tmp,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
- roce_get_field(*old_retry,
+ roce_get_field(*old_retry_tmp,
ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
if (cur_cnt - old_cnt > SDB_ST_CMP_VAL)
*success_flags = 1;
} else {
- old_cnt = roce_get_field(*old_send,
+ old_cnt = roce_get_field(*old_send_tmp,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S);
if (cur_cnt - old_cnt > SDB_ST_CMP_VAL) {
*success_flags = 1;
} else {
- send_ptr = roce_get_field(*old_send,
+ send_ptr = roce_get_field(*old_send_tmp,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S) +
- roce_get_field(sdb_retry_cnt,
+ roce_get_field(tmp1,
ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_M,
ROCEE_SDB_RETRY_CNT_SDB_RETRY_CT_S);
- roce_set_field(*old_send,
+ roce_set_field(*old_send_tmp,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S,
send_ptr);
@@ -3646,11 +3692,14 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
{
struct device *dev = &hr_dev->pdev->dev;
u32 sdb_send_ptr, old_send;
+ __le32 sdb_issue_ptr_tmp;
+ __le32 sdb_send_ptr_tmp;
u32 success_flags = 0;
unsigned long end;
u32 old_retry;
u32 inv_cnt;
u32 tsp_st;
+ __le32 tmp;
if (*wait_stage > HNS_ROCE_V1_DB_STAGE2 ||
*wait_stage < HNS_ROCE_V1_DB_STAGE1) {
@@ -3679,10 +3728,12 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
ROCEE_SDB_SEND_PTR_REG);
}
- if (roce_get_field(sdb_issue_ptr,
+ sdb_send_ptr_tmp = cpu_to_le32(sdb_send_ptr);
+ sdb_issue_ptr_tmp = cpu_to_le32(sdb_issue_ptr);
+ if (roce_get_field(sdb_issue_ptr_tmp,
ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_M,
ROCEE_SDB_ISSUE_PTR_SDB_ISSUE_PTR_S) ==
- roce_get_field(sdb_send_ptr,
+ roce_get_field(sdb_send_ptr_tmp,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_M,
ROCEE_SDB_SEND_PTR_SDB_SEND_PTR_S)) {
old_send = roce_read(hr_dev, ROCEE_SDB_SEND_PTR_REG);
@@ -3690,7 +3741,8 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
do {
tsp_st = roce_read(hr_dev, ROCEE_TSP_BP_ST_REG);
- if (roce_get_bit(tsp_st,
+ tmp = cpu_to_le32(tsp_st);
+ if (roce_get_bit(tmp,
ROCEE_TSP_BP_ST_QH_FIFO_ENTRY_S) == 1) {
*wait_stage = HNS_ROCE_V1_DB_WAIT_OK;
return 0;
@@ -3699,8 +3751,9 @@ static int check_qp_db_process_status(struct hns_roce_dev *hr_dev,
if (!time_before(jiffies, end)) {
dev_dbg(dev, "QP(0x%lx) db process stage1 timeout when send ptr equals issue ptr.\n"
"issue 0x%x send 0x%x.\n",
- hr_qp->qpn, sdb_issue_ptr,
- sdb_send_ptr);
+ hr_qp->qpn,
+ le32_to_cpu(sdb_issue_ptr_tmp),
+ le32_to_cpu(sdb_send_ptr_tmp));
return 0;
}
@@ -4102,9 +4155,9 @@ static void hns_roce_v1_cq_err_handle(struct hns_roce_dev *hr_dev,
struct device *dev = &hr_dev->pdev->dev;
u32 cqn;
- cqn = le32_to_cpu(roce_get_field(aeqe->event.cq_event.cq,
+ cqn = roce_get_field(aeqe->event.cq_event.cq,
HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_M,
- HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S));
+ HNS_ROCE_AEQE_EVENT_CQ_EVENT_CQ_CQN_S);
switch (event_type) {
case HNS_ROCE_EVENT_TYPE_CQ_ACCESS_ERROR:
@@ -4340,6 +4393,7 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
u32 aeshift_val;
u32 ceshift_val;
u32 cemask_val;
+ __le32 tmp;
int i;
/*
@@ -4348,30 +4402,34 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
* interrupt, mask irq, clear irq, cancel mask operation
*/
aeshift_val = roce_read(hr_dev, ROCEE_CAEP_AEQC_AEQE_SHIFT_REG);
+ tmp = cpu_to_le32(aeshift_val);
/* AEQE overflow */
- if (roce_get_bit(aeshift_val,
+ if (roce_get_bit(tmp,
ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQ_ALM_OVF_INT_ST_S) == 1) {
dev_warn(dev, "AEQ overflow!\n");
/* Set mask */
caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(caepaemask_val,
- ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+ tmp = cpu_to_le32(caepaemask_val);
+ roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
HNS_ROCE_INT_MASK_ENABLE);
+ caepaemask_val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
/* Clear int state(INT_WC : write 1 clear) */
caepaest_val = roce_read(hr_dev, ROCEE_CAEP_AE_ST_REG);
- roce_set_bit(caepaest_val,
- ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
+ tmp = cpu_to_le32(caepaest_val);
+ roce_set_bit(tmp, ROCEE_CAEP_AE_ST_CAEP_AEQ_ALM_OVF_S, 1);
+ caepaest_val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_CAEP_AE_ST_REG, caepaest_val);
/* Clear mask */
caepaemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(caepaemask_val,
- ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+ tmp = cpu_to_le32(caepaemask_val);
+ roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
HNS_ROCE_INT_MASK_DISABLE);
+ caepaemask_val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, caepaemask_val);
}
@@ -4379,8 +4437,9 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
for (i = 0; i < hr_dev->caps.num_comp_vectors; i++) {
ceshift_val = roce_read(hr_dev, ROCEE_CAEP_CEQC_SHIFT_0_REG +
i * CEQ_REG_OFFSET);
+ tmp = cpu_to_le32(ceshift_val);
- if (roce_get_bit(ceshift_val,
+ if (roce_get_bit(tmp,
ROCEE_CAEP_CEQC_SHIFT_CAEP_CEQ_ALM_OVF_INT_ST_S) == 1) {
dev_warn(dev, "CEQ[%d] almost overflow!\n", i);
int_work++;
@@ -4389,9 +4448,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
cemask_val = roce_read(hr_dev,
ROCEE_CAEP_CE_IRQ_MASK_0_REG +
i * CEQ_REG_OFFSET);
- roce_set_bit(cemask_val,
+ tmp = cpu_to_le32(cemask_val);
+ roce_set_bit(tmp,
ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
HNS_ROCE_INT_MASK_ENABLE);
+ cemask_val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
i * CEQ_REG_OFFSET, cemask_val);
@@ -4399,9 +4460,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
cealmovf_val = roce_read(hr_dev,
ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
i * CEQ_REG_OFFSET);
- roce_set_bit(cealmovf_val,
+ tmp = cpu_to_le32(cealmovf_val);
+ roce_set_bit(tmp,
ROCEE_CAEP_CEQ_ALM_OVF_CAEP_CEQ_ALM_OVF_S,
1);
+ cealmovf_val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_CAEP_CEQ_ALM_OVF_0_REG +
i * CEQ_REG_OFFSET, cealmovf_val);
@@ -4409,9 +4472,11 @@ static irqreturn_t hns_roce_v1_msix_interrupt_abn(int irq, void *dev_id)
cemask_val = roce_read(hr_dev,
ROCEE_CAEP_CE_IRQ_MASK_0_REG +
i * CEQ_REG_OFFSET);
- roce_set_bit(cemask_val,
+ tmp = cpu_to_le32(cemask_val);
+ roce_set_bit(tmp,
ROCEE_CAEP_CE_IRQ_MASK_CAEP_CEQ_ALM_OVF_MASK_S,
HNS_ROCE_INT_MASK_DISABLE);
+ cemask_val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_CAEP_CE_IRQ_MASK_0_REG +
i * CEQ_REG_OFFSET, cemask_val);
}
@@ -4435,13 +4500,16 @@ static void hns_roce_v1_int_mask_enable(struct hns_roce_dev *hr_dev)
{
u32 aemask_val;
int masken = 0;
+ __le32 tmp;
int i;
/* AEQ INT */
aemask_val = roce_read(hr_dev, ROCEE_CAEP_AE_MASK_REG);
- roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
+ tmp = cpu_to_le32(aemask_val);
+ roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AEQ_ALM_OVF_MASK_S,
masken);
- roce_set_bit(aemask_val, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
+ roce_set_bit(tmp, ROCEE_CAEP_AE_MASK_CAEP_AE_IRQ_MASK_S, masken);
+ aemask_val = le32_to_cpu(tmp);
roce_write(hr_dev, ROCEE_CAEP_AE_MASK_REG, aemask_val);
/* CEQ INT */
@@ -4473,20 +4541,24 @@ static void hns_roce_v1_enable_eq(struct hns_roce_dev *hr_dev, int eq_num,
int enable_flag)
{
void __iomem *eqc = hr_dev->eq_table.eqc_base[eq_num];
+ __le32 tmp;
u32 val;
val = readl(eqc);
+ tmp = cpu_to_le32(val);
if (enable_flag)
- roce_set_field(val,
+ roce_set_field(tmp,
ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
HNS_ROCE_EQ_STAT_VALID);
else
- roce_set_field(val,
+ roce_set_field(tmp,
ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
HNS_ROCE_EQ_STAT_INVALID);
+
+ val = le32_to_cpu(tmp);
writel(val, eqc);
}
@@ -4499,6 +4571,9 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
u32 eqconsindx_val = 0;
u32 eqcuridx_val = 0;
u32 eqshift_val = 0;
+ __le32 tmp2 = 0;
+ __le32 tmp1 = 0;
+ __le32 tmp = 0;
int num_bas;
int ret;
int i;
@@ -4530,14 +4605,13 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
memset(eq->buf_list[i].buf, 0, HNS_ROCE_BA_SIZE);
}
eq->cons_index = 0;
- roce_set_field(eqshift_val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
+ roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_M,
ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_STATE_S,
HNS_ROCE_EQ_STAT_INVALID);
- roce_set_field(eqshift_val,
- ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
+ roce_set_field(tmp, ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_M,
ROCEE_CAEP_AEQC_AEQE_SHIFT_CAEP_AEQC_AEQE_SHIFT_S,
eq->log_entries);
+ eqshift_val = le32_to_cpu(tmp);
writel(eqshift_val, eqc);
/* Configure eq extended address 12~44bit */
@@ -4549,18 +4623,18 @@ static int hns_roce_v1_create_eq(struct hns_roce_dev *hr_dev,
* using 4K page, and shift more 32 because of
* caculating the high 32 bit value evaluated to hardware.
*/
- roce_set_field(eqcuridx_val, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
+ roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_M,
ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQ_BT_H_S,
eq->buf_list[0].map >> 44);
- roce_set_field(eqcuridx_val,
- ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
+ roce_set_field(tmp1, ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_M,
ROCEE_CAEP_AEQE_CUR_IDX_CAEP_AEQE_CUR_IDX_S, 0);
+ eqcuridx_val = le32_to_cpu(tmp1);
writel(eqcuridx_val, eqc + 8);
/* Configure eq consumer index */
- roce_set_field(eqconsindx_val,
- ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
+ roce_set_field(tmp2, ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_M,
ROCEE_CAEP_AEQE_CONS_IDX_CAEP_AEQE_CONS_IDX_S, 0);
+ eqconsindx_val = le32_to_cpu(tmp2);
writel(eqconsindx_val, eqc + 0xc);
return 0;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
index e9a2717ea7cd..66440147d9eb 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.h
@@ -260,7 +260,7 @@ struct hns_roce_cqe {
__le32 cqe_byte_4;
union {
__le32 r_key;
- __be32 immediate_data;
+ __le32 immediate_data;
};
__le32 byte_cnt;
__le32 cqe_byte_16;
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a6e11be0ea0f..951d839f1392 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -172,7 +172,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct hns_roce_v2_ud_send_wqe *ud_sq_wqe;
struct hns_roce_v2_rc_send_wqe *rc_sq_wqe;
struct hns_roce_qp *qp = to_hr_qp(ibqp);
- struct hns_roce_v2_wqe_data_seg *dseg;
struct device *dev = hr_dev->dev;
struct hns_roce_v2_db sq_db;
unsigned int sge_ind = 0;
@@ -485,7 +484,6 @@ static int hns_roce_v2_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
wqe += sizeof(struct hns_roce_v2_rc_send_wqe);
- dseg = wqe;
ret = set_rwqe_data_seg(ibqp, wr, rc_sq_wqe, wqe,
&sge_ind, bad_wr);
@@ -925,7 +923,8 @@ static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev)
static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
{
struct hns_roce_cmq_desc desc[2];
- struct hns_roce_pf_res *res;
+ struct hns_roce_pf_res_a *req_a;
+ struct hns_roce_pf_res_b *req_b;
int ret;
int i;
@@ -943,21 +942,26 @@ static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev)
if (ret)
return ret;
- res = (struct hns_roce_pf_res *)desc[0].data;
+ req_a = (struct hns_roce_pf_res_a *)desc[0].data;
+ req_b = (struct hns_roce_pf_res_b *)desc[1].data;
- hr_dev->caps.qpc_bt_num = roce_get_field(res->qpc_bt_idx_num,
+ hr_dev->caps.qpc_bt_num = roce_get_field(req_a->qpc_bt_idx_num,
PF_RES_DATA_1_PF_QPC_BT_NUM_M,
PF_RES_DATA_1_PF_QPC_BT_NUM_S);
- hr_dev->caps.srqc_bt_num = roce_get_field(res->srqc_bt_idx_num,
+ hr_dev->caps.srqc_bt_num = roce_get_field(req_a->srqc_bt_idx_num,
PF_RES_DATA_2_PF_SRQC_BT_NUM_M,
PF_RES_DATA_2_PF_SRQC_BT_NUM_S);
- hr_dev->caps.cqc_bt_num = roce_get_field(res->cqc_bt_idx_num,
+ hr_dev->caps.cqc_bt_num = roce_get_field(req_a->cqc_bt_idx_num,
PF_RES_DATA_3_PF_CQC_BT_NUM_M,
PF_RES_DATA_3_PF_CQC_BT_NUM_S);
- hr_dev->caps.mpt_bt_num = roce_get_field(res->mpt_bt_idx_num,
+ hr_dev->caps.mpt_bt_num = roce_get_field(req_a->mpt_bt_idx_num,
PF_RES_DATA_4_PF_MPT_BT_NUM_M,
PF_RES_DATA_4_PF_MPT_BT_NUM_S);
+ hr_dev->caps.sl_num = roce_get_field(req_b->qid_idx_sl_num,
+ PF_RES_DATA_3_PF_SL_NUM_M,
+ PF_RES_DATA_3_PF_SL_NUM_S);
+
return 0;
}
@@ -1203,6 +1207,7 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
caps->eqe_ba_pg_sz = 0;
caps->eqe_buf_pg_sz = 0;
caps->eqe_hop_num = HNS_ROCE_EQE_HOP_NUM;
+ caps->tsq_buf_pg_sz = 0;
caps->chunk_sz = HNS_ROCE_V2_TABLE_CHUNK_SIZE;
caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR |
@@ -1224,6 +1229,228 @@ static int hns_roce_v2_profile(struct hns_roce_dev *hr_dev)
return ret;
}
+static int hns_roce_config_link_table(struct hns_roce_dev *hr_dev,
+ enum hns_roce_link_table_type type)
+{
+ struct hns_roce_cmq_desc desc[2];
+ struct hns_roce_cfg_llm_a *req_a =
+ (struct hns_roce_cfg_llm_a *)desc[0].data;
+ struct hns_roce_cfg_llm_b *req_b =
+ (struct hns_roce_cfg_llm_b *)desc[1].data;
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_link_table *link_tbl;
+ struct hns_roce_link_table_entry *entry;
+ enum hns_roce_opcode_type opcode;
+ u32 page_num;
+ int i;
+
+ switch (type) {
+ case TSQ_LINK_TABLE:
+ link_tbl = &priv->tsq;
+ opcode = HNS_ROCE_OPC_CFG_EXT_LLM;
+ break;
+ case TPQ_LINK_TABLE:
+ link_tbl = &priv->tpq;
+ opcode = HNS_ROCE_OPC_CFG_TMOUT_LLM;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ page_num = link_tbl->npages;
+ entry = link_tbl->table.buf;
+ memset(req_a, 0, sizeof(*req_a));
+ memset(req_b, 0, sizeof(*req_b));
+
+ for (i = 0; i < 2; i++) {
+ hns_roce_cmq_setup_basic_desc(&desc[i], opcode, false);
+
+ if (i == 0)
+ desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+ else
+ desc[i].flag &= ~cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT);
+
+ if (i == 0) {
+ req_a->base_addr_l = link_tbl->table.map & 0xffffffff;
+ req_a->base_addr_h = (link_tbl->table.map >> 32) &
+ 0xffffffff;
+ roce_set_field(req_a->depth_pgsz_init_en,
+ CFG_LLM_QUE_DEPTH_M,
+ CFG_LLM_QUE_DEPTH_S,
+ link_tbl->npages);
+ roce_set_field(req_a->depth_pgsz_init_en,
+ CFG_LLM_QUE_PGSZ_M,
+ CFG_LLM_QUE_PGSZ_S,
+ link_tbl->pg_sz);
+ req_a->head_ba_l = entry[0].blk_ba0;
+ req_a->head_ba_h_nxtptr = entry[0].blk_ba1_nxt_ptr;
+ roce_set_field(req_a->head_ptr,
+ CFG_LLM_HEAD_PTR_M,
+ CFG_LLM_HEAD_PTR_S, 0);
+ } else {
+ req_b->tail_ba_l = entry[page_num - 1].blk_ba0;
+ roce_set_field(req_b->tail_ba_h,
+ CFG_LLM_TAIL_BA_H_M,
+ CFG_LLM_TAIL_BA_H_S,
+ entry[page_num - 1].blk_ba1_nxt_ptr &
+ HNS_ROCE_LINK_TABLE_BA1_M);
+ roce_set_field(req_b->tail_ptr,
+ CFG_LLM_TAIL_PTR_M,
+ CFG_LLM_TAIL_PTR_S,
+ (entry[page_num - 2].blk_ba1_nxt_ptr &
+ HNS_ROCE_LINK_TABLE_NXT_PTR_M) >>
+ HNS_ROCE_LINK_TABLE_NXT_PTR_S);
+ }
+ }
+ roce_set_field(req_a->depth_pgsz_init_en,
+ CFG_LLM_INIT_EN_M, CFG_LLM_INIT_EN_S, 1);
+
+ return hns_roce_cmq_send(hr_dev, desc, 2);
+}
+
+static int hns_roce_init_link_table(struct hns_roce_dev *hr_dev,
+ enum hns_roce_link_table_type type)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ struct hns_roce_link_table *link_tbl;
+ struct hns_roce_link_table_entry *entry;
+ struct device *dev = hr_dev->dev;
+ u32 buf_chk_sz;
+ dma_addr_t t;
+ int func_num = 1;
+ int pg_num_a;
+ int pg_num_b;
+ int pg_num;
+ int size;
+ int i;
+
+ switch (type) {
+ case TSQ_LINK_TABLE:
+ link_tbl = &priv->tsq;
+ buf_chk_sz = 1 << (hr_dev->caps.tsq_buf_pg_sz + PAGE_SHIFT);
+ pg_num_a = hr_dev->caps.num_qps * 8 / buf_chk_sz;
+ pg_num_b = hr_dev->caps.sl_num * 4 + 2;
+ break;
+ case TPQ_LINK_TABLE:
+ link_tbl = &priv->tpq;
+ buf_chk_sz = 1 << (hr_dev->caps.tpq_buf_pg_sz + PAGE_SHIFT);
+ pg_num_a = hr_dev->caps.num_cqs * 4 / buf_chk_sz;
+ pg_num_b = 2 * 4 * func_num + 2;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ pg_num = max(pg_num_a, pg_num_b);
+ size = pg_num * sizeof(struct hns_roce_link_table_entry);
+
+ link_tbl->table.buf = dma_alloc_coherent(dev, size,
+ &link_tbl->table.map,
+ GFP_KERNEL);
+ if (!link_tbl->table.buf)
+ goto out;
+
+ link_tbl->pg_list = kcalloc(pg_num, sizeof(*link_tbl->pg_list),
+ GFP_KERNEL);
+ if (!link_tbl->pg_list)
+ goto err_kcalloc_failed;
+
+ entry = link_tbl->table.buf;
+ for (i = 0; i < pg_num; ++i) {
+ link_tbl->pg_list[i].buf = dma_alloc_coherent(dev, buf_chk_sz,
+ &t, GFP_KERNEL);
+ if (!link_tbl->pg_list[i].buf)
+ goto err_alloc_buf_failed;
+
+ link_tbl->pg_list[i].map = t;
+ memset(link_tbl->pg_list[i].buf, 0, buf_chk_sz);
+
+ entry[i].blk_ba0 = (t >> 12) & 0xffffffff;
+ roce_set_field(entry[i].blk_ba1_nxt_ptr,
+ HNS_ROCE_LINK_TABLE_BA1_M,
+ HNS_ROCE_LINK_TABLE_BA1_S,
+ t >> 44);
+
+ if (i < (pg_num - 1))
+ roce_set_field(entry[i].blk_ba1_nxt_ptr,
+ HNS_ROCE_LINK_TABLE_NXT_PTR_M,
+ HNS_ROCE_LINK_TABLE_NXT_PTR_S,
+ i + 1);
+ }
+ link_tbl->npages = pg_num;
+ link_tbl->pg_sz = buf_chk_sz;
+
+ return hns_roce_config_link_table(hr_dev, type);
+
+err_alloc_buf_failed:
+ for (i -= 1; i >= 0; i--)
+ dma_free_coherent(dev, buf_chk_sz,
+ link_tbl->pg_list[i].buf,
+ link_tbl->pg_list[i].map);
+ kfree(link_tbl->pg_list);
+
+err_kcalloc_failed:
+ dma_free_coherent(dev, size, link_tbl->table.buf,
+ link_tbl->table.map);
+
+out:
+ return -ENOMEM;
+}
+
+static void hns_roce_free_link_table(struct hns_roce_dev *hr_dev,
+ struct hns_roce_link_table *link_tbl)
+{
+ struct device *dev = hr_dev->dev;
+ int size;
+ int i;
+
+ size = link_tbl->npages * sizeof(struct hns_roce_link_table_entry);
+
+ for (i = 0; i < link_tbl->npages; ++i)
+ if (link_tbl->pg_list[i].buf)
+ dma_free_coherent(dev, link_tbl->pg_sz,
+ link_tbl->pg_list[i].buf,
+ link_tbl->pg_list[i].map);
+ kfree(link_tbl->pg_list);
+
+ dma_free_coherent(dev, size, link_tbl->table.buf,
+ link_tbl->table.map);
+}
+
+static int hns_roce_v2_init(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+ int ret;
+
+ /* TSQ includes SQ doorbell and ack doorbell */
+ ret = hns_roce_init_link_table(hr_dev, TSQ_LINK_TABLE);
+ if (ret) {
+ dev_err(hr_dev->dev, "TSQ init failed, ret = %d.\n", ret);
+ return ret;
+ }
+
+ ret = hns_roce_init_link_table(hr_dev, TPQ_LINK_TABLE);
+ if (ret) {
+ dev_err(hr_dev->dev, "TPQ init failed, ret = %d.\n", ret);
+ goto err_tpq_init_failed;
+ }
+
+ return 0;
+
+err_tpq_init_failed:
+ hns_roce_free_link_table(hr_dev, &priv->tsq);
+
+ return ret;
+}
+
+static void hns_roce_v2_exit(struct hns_roce_dev *hr_dev)
+{
+ struct hns_roce_v2_priv *priv = hr_dev->priv;
+
+ hns_roce_free_link_table(hr_dev, &priv->tpq);
+ hns_roce_free_link_table(hr_dev, &priv->tsq);
+}
+
static int hns_roce_v2_cmd_pending(struct hns_roce_dev *hr_dev)
{
u32 status = readl(hr_dev->reg_base + ROCEE_VF_MB_STATUS_REG);
@@ -1307,13 +1534,45 @@ static int hns_roce_v2_chk_mbox(struct hns_roce_dev *hr_dev,
return 0;
}
+static int hns_roce_config_sgid_table(struct hns_roce_dev *hr_dev,
+ int gid_index, const union ib_gid *gid,
+ enum hns_roce_sgid_type sgid_type)
+{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_sgid_tb *sgid_tb =
+ (struct hns_roce_cfg_sgid_tb *)desc.data;
+ u32 *p;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SGID_TB, false);
+
+ roce_set_field(sgid_tb->table_idx_rsv,
+ CFG_SGID_TB_TABLE_IDX_M,
+ CFG_SGID_TB_TABLE_IDX_S, gid_index);
+ roce_set_field(sgid_tb->vf_sgid_type_rsv,
+ CFG_SGID_TB_VF_SGID_TYPE_M,
+ CFG_SGID_TB_VF_SGID_TYPE_S, sgid_type);
+
+ p = (u32 *)&gid->raw[0];
+ sgid_tb->vf_sgid_l = cpu_to_le32(*p);
+
+ p = (u32 *)&gid->raw[4];
+ sgid_tb->vf_sgid_ml = cpu_to_le32(*p);
+
+ p = (u32 *)&gid->raw[8];
+ sgid_tb->vf_sgid_mh = cpu_to_le32(*p);
+
+ p = (u32 *)&gid->raw[0xc];
+ sgid_tb->vf_sgid_h = cpu_to_le32(*p);
+
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
+}
+
static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
- int gid_index, union ib_gid *gid,
+ int gid_index, const union ib_gid *gid,
const struct ib_gid_attr *attr)
{
enum hns_roce_sgid_type sgid_type = GID_TYPE_FLAG_ROCE_V1;
- u32 *p;
- u32 val;
+ int ret;
if (!gid || !attr)
return -EINVAL;
@@ -1328,49 +1587,37 @@ static int hns_roce_v2_set_gid(struct hns_roce_dev *hr_dev, u8 port,
sgid_type = GID_TYPE_FLAG_ROCE_V2_IPV6;
}
- p = (u32 *)&gid->raw[0];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG0_REG +
- 0x20 * gid_index);
-
- p = (u32 *)&gid->raw[4];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG1_REG +
- 0x20 * gid_index);
-
- p = (u32 *)&gid->raw[8];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG2_REG +
- 0x20 * gid_index);
-
- p = (u32 *)&gid->raw[0xc];
- roce_raw_write(*p, hr_dev->reg_base + ROCEE_VF_SGID_CFG3_REG +
- 0x20 * gid_index);
-
- val = roce_read(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index);
- roce_set_field(val, ROCEE_VF_SGID_CFG4_SGID_TYPE_M,
- ROCEE_VF_SGID_CFG4_SGID_TYPE_S, sgid_type);
-
- roce_write(hr_dev, ROCEE_VF_SGID_CFG4_REG + 0x20 * gid_index, val);
+ ret = hns_roce_config_sgid_table(hr_dev, gid_index, gid, sgid_type);
+ if (ret)
+ dev_err(hr_dev->dev, "Configure sgid table failed(%d)!\n", ret);
- return 0;
+ return ret;
}
static int hns_roce_v2_set_mac(struct hns_roce_dev *hr_dev, u8 phy_port,
u8 *addr)
{
+ struct hns_roce_cmq_desc desc;
+ struct hns_roce_cfg_smac_tb *smac_tb =
+ (struct hns_roce_cfg_smac_tb *)desc.data;
u16 reg_smac_h;
u32 reg_smac_l;
- u32 val;
+
+ hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_CFG_SMAC_TB, false);
reg_smac_l = *(u32 *)(&addr[0]);
- roce_raw_write(reg_smac_l, hr_dev->reg_base + ROCEE_VF_SMAC_CFG0_REG +
- 0x08 * phy_port);
- val = roce_read(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port);
+ reg_smac_h = *(u16 *)(&addr[4]);
- reg_smac_h = *(u16 *)(&addr[4]);
- roce_set_field(val, ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M,
- ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S, reg_smac_h);
- roce_write(hr_dev, ROCEE_VF_SMAC_CFG1_REG + 0x08 * phy_port, val);
+ memset(smac_tb, 0, sizeof(*smac_tb));
+ roce_set_field(smac_tb->tb_idx_rsv,
+ CFG_SMAC_TB_IDX_M,
+ CFG_SMAC_TB_IDX_S, phy_port);
+ roce_set_field(smac_tb->vf_smac_h_rsv,
+ CFG_SMAC_TB_VF_SMAC_H_M,
+ CFG_SMAC_TB_VF_SMAC_H_S, reg_smac_h);
+ smac_tb->vf_smac_l = reg_smac_l;
- return 0;
+ return hns_roce_cmq_send(hr_dev, &desc, 1);
}
static int hns_roce_v2_write_mtpt(void *mb_buf, struct hns_roce_mr *mr,
@@ -4052,15 +4299,12 @@ static void hns_roce_mhop_free_eq(struct hns_roce_dev *hr_dev,
u32 bt_chk_sz;
u32 mhop_num;
int eqe_alloc;
- int ba_num;
int i = 0;
int j = 0;
mhop_num = hr_dev->caps.eqe_hop_num;
buf_chk_sz = 1 << (hr_dev->caps.eqe_buf_pg_sz + PAGE_SHIFT);
bt_chk_sz = 1 << (hr_dev->caps.eqe_ba_pg_sz + PAGE_SHIFT);
- ba_num = (PAGE_ALIGN(eq->entries * eq->eqe_size) + buf_chk_sz - 1) /
- buf_chk_sz;
/* hop_num = 0 */
if (mhop_num == HNS_ROCE_HOP_NUM_0) {
@@ -4725,6 +4969,8 @@ static const struct hns_roce_hw hns_roce_hw_v2 = {
.cmq_init = hns_roce_v2_cmq_init,
.cmq_exit = hns_roce_v2_cmq_exit,
.hw_profile = hns_roce_v2_profile,
+ .hw_init = hns_roce_v2_init,
+ .hw_exit = hns_roce_v2_exit,
.post_mbox = hns_roce_v2_post_mbox,
.chk_mbox = hns_roce_v2_chk_mbox,
.set_gid = hns_roce_v2_set_gid,
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index d47675f365c7..df95b3515c94 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -203,6 +203,10 @@ enum hns_roce_opcode_type {
HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004,
HNS_ROCE_OPC_QUERY_PF_RES = 0x8400,
HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401,
+ HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403,
+ HNS_ROCE_OPC_CFG_TMOUT_LLM = 0x8404,
+ HNS_ROCE_OPC_CFG_SGID_TB = 0x8500,
+ HNS_ROCE_OPC_CFG_SMAC_TB = 0x8501,
HNS_ROCE_OPC_CFG_BT_ATTR = 0x8506,
};
@@ -1061,6 +1065,40 @@ struct hns_roce_query_version {
__le32 rsv[5];
};
+struct hns_roce_cfg_llm_a {
+ __le32 base_addr_l;
+ __le32 base_addr_h;
+ __le32 depth_pgsz_init_en;
+ __le32 head_ba_l;
+ __le32 head_ba_h_nxtptr;
+ __le32 head_ptr;
+};
+
+#define CFG_LLM_QUE_DEPTH_S 0
+#define CFG_LLM_QUE_DEPTH_M GENMASK(12, 0)
+
+#define CFG_LLM_QUE_PGSZ_S 16
+#define CFG_LLM_QUE_PGSZ_M GENMASK(19, 16)
+
+#define CFG_LLM_INIT_EN_S 20
+#define CFG_LLM_INIT_EN_M GENMASK(20, 20)
+
+#define CFG_LLM_HEAD_PTR_S 0
+#define CFG_LLM_HEAD_PTR_M GENMASK(11, 0)
+
+struct hns_roce_cfg_llm_b {
+ __le32 tail_ba_l;
+ __le32 tail_ba_h;
+ __le32 tail_ptr;
+ __le32 rsv[3];
+};
+
+#define CFG_LLM_TAIL_BA_H_S 0
+#define CFG_LLM_TAIL_BA_H_M GENMASK(19, 0)
+
+#define CFG_LLM_TAIL_PTR_S 0
+#define CFG_LLM_TAIL_PTR_M GENMASK(11, 0)
+
struct hns_roce_cfg_global_param {
__le32 time_cfg_udp_port;
__le32 rsv[5];
@@ -1072,7 +1110,7 @@ struct hns_roce_cfg_global_param {
#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_S 16
#define CFG_GLOBAL_PARAM_DATA_0_ROCEE_UDP_PORT_M GENMASK(31, 16)
-struct hns_roce_pf_res {
+struct hns_roce_pf_res_a {
__le32 rsv;
__le32 qpc_bt_idx_num;
__le32 srqc_bt_idx_num;
@@ -1111,6 +1149,32 @@ struct hns_roce_pf_res {
#define PF_RES_DATA_5_PF_EQC_BT_NUM_S 16
#define PF_RES_DATA_5_PF_EQC_BT_NUM_M GENMASK(25, 16)
+struct hns_roce_pf_res_b {
+ __le32 rsv0;
+ __le32 smac_idx_num;
+ __le32 sgid_idx_num;
+ __le32 qid_idx_sl_num;
+ __le32 rsv[2];
+};
+
+#define PF_RES_DATA_1_PF_SMAC_IDX_S 0
+#define PF_RES_DATA_1_PF_SMAC_IDX_M GENMASK(7, 0)
+
+#define PF_RES_DATA_1_PF_SMAC_NUM_S 8
+#define PF_RES_DATA_1_PF_SMAC_NUM_M GENMASK(16, 8)
+
+#define PF_RES_DATA_2_PF_SGID_IDX_S 0
+#define PF_RES_DATA_2_PF_SGID_IDX_M GENMASK(7, 0)
+
+#define PF_RES_DATA_2_PF_SGID_NUM_S 8
+#define PF_RES_DATA_2_PF_SGID_NUM_M GENMASK(16, 8)
+
+#define PF_RES_DATA_3_PF_QID_IDX_S 0
+#define PF_RES_DATA_3_PF_QID_IDX_M GENMASK(9, 0)
+
+#define PF_RES_DATA_3_PF_SL_NUM_S 16
+#define PF_RES_DATA_3_PF_SL_NUM_M GENMASK(26, 16)
+
struct hns_roce_vf_res_a {
__le32 vf_id;
__le32 vf_qpc_bt_idx_num;
@@ -1179,13 +1243,6 @@ struct hns_roce_vf_res_b {
#define VF_RES_B_DATA_3_VF_SL_NUM_S 16
#define VF_RES_B_DATA_3_VF_SL_NUM_M GENMASK(19, 16)
-/* Reg field definition */
-#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_S 0
-#define ROCEE_VF_SMAC_CFG1_VF_SMAC_H_M GENMASK(15, 0)
-
-#define ROCEE_VF_SGID_CFG4_SGID_TYPE_S 0
-#define ROCEE_VF_SGID_CFG4_SGID_TYPE_M GENMASK(1, 0)
-
struct hns_roce_cfg_bt_attr {
__le32 vf_qpc_cfg;
__le32 vf_srqc_cfg;
@@ -1230,6 +1287,32 @@ struct hns_roce_cfg_bt_attr {
#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_S 8
#define CFG_BT_ATTR_DATA_3_VF_MPT_HOPNUM_M GENMASK(9, 8)
+struct hns_roce_cfg_sgid_tb {
+ __le32 table_idx_rsv;
+ __le32 vf_sgid_l;
+ __le32 vf_sgid_ml;
+ __le32 vf_sgid_mh;
+ __le32 vf_sgid_h;
+ __le32 vf_sgid_type_rsv;
+};
+#define CFG_SGID_TB_TABLE_IDX_S 0
+#define CFG_SGID_TB_TABLE_IDX_M GENMASK(7, 0)
+
+#define CFG_SGID_TB_VF_SGID_TYPE_S 0
+#define CFG_SGID_TB_VF_SGID_TYPE_M GENMASK(1, 0)
+
+struct hns_roce_cfg_smac_tb {
+ __le32 tb_idx_rsv;
+ __le32 vf_smac_l;
+ __le32 vf_smac_h_rsv;
+ __le32 rsv[3];
+};
+#define CFG_SMAC_TB_IDX_S 0
+#define CFG_SMAC_TB_IDX_M GENMASK(7, 0)
+
+#define CFG_SMAC_TB_VF_SMAC_H_S 0
+#define CFG_SMAC_TB_VF_SMAC_H_M GENMASK(15, 0)
+
struct hns_roce_cmq_desc {
__le16 opcode;
__le16 flag;
@@ -1276,8 +1359,32 @@ struct hns_roce_v2_cmq {
u16 last_status;
};
+enum hns_roce_link_table_type {
+ TSQ_LINK_TABLE,
+ TPQ_LINK_TABLE,
+};
+
+struct hns_roce_link_table {
+ struct hns_roce_buf_list table;
+ struct hns_roce_buf_list *pg_list;
+ u32 npages;
+ u32 pg_sz;
+};
+
+struct hns_roce_link_table_entry {
+ u32 blk_ba0;
+ u32 blk_ba1_nxt_ptr;
+};
+#define HNS_ROCE_LINK_TABLE_BA1_S 0
+#define HNS_ROCE_LINK_TABLE_BA1_M GENMASK(19, 0)
+
+#define HNS_ROCE_LINK_TABLE_NXT_PTR_S 20
+#define HNS_ROCE_LINK_TABLE_NXT_PTR_M GENMASK(31, 20)
+
struct hns_roce_v2_priv {
struct hns_roce_v2_cmq cmq;
+ struct hns_roce_link_table tsq;
+ struct hns_roce_link_table tpq;
};
struct hns_roce_eq_context {
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index 21b901cfa2d6..850032de8676 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -74,8 +74,7 @@ static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr)
return hr_dev->hw->set_mac(hr_dev, phy_port, addr);
}
-static int hns_roce_add_gid(const union ib_gid *gid,
- const struct ib_gid_attr *attr, void **context)
+static int hns_roce_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct hns_roce_dev *hr_dev = to_hr_dev(attr->device);
u8 port = attr->port_num - 1;
@@ -87,8 +86,7 @@ static int hns_roce_add_gid(const union ib_gid *gid,
spin_lock_irqsave(&hr_dev->iboe.lock, flags);
- ret = hr_dev->hw->set_gid(hr_dev, port, attr->index,
- (union ib_gid *)gid, attr);
+ ret = hr_dev->hw->set_gid(hr_dev, port, attr->index, &attr->gid, attr);
spin_unlock_irqrestore(&hr_dev->iboe.lock, flags);
@@ -208,7 +206,8 @@ static int hns_roce_query_device(struct ib_device *ib_dev,
props->max_qp_wr = hr_dev->caps.max_wqes;
props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
IB_DEVICE_RC_RNR_NAK_GEN;
- props->max_sge = max(hr_dev->caps.max_sq_sg, hr_dev->caps.max_rq_sg);
+ props->max_send_sge = hr_dev->caps.max_sq_sg;
+ props->max_recv_sge = hr_dev->caps.max_rq_sg;
props->max_sge_rd = 1;
props->max_cq = hr_dev->caps.num_cqs;
props->max_cqe = hr_dev->caps.max_cqes;
diff --git a/drivers/infiniband/hw/i40iw/Kconfig b/drivers/infiniband/hw/i40iw/Kconfig
index 2962979c06e9..d867ef1ac72a 100644
--- a/drivers/infiniband/hw/i40iw/Kconfig
+++ b/drivers/infiniband/hw/i40iw/Kconfig
@@ -1,6 +1,7 @@
config INFINIBAND_I40IW
tristate "Intel(R) Ethernet X722 iWARP Driver"
depends on INET && I40E
+ depends on IPV6 || !IPV6
depends on PCI
select GENERIC_ALLOCATOR
---help---
diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c
index 7b2655128b9f..423818a7d333 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_cm.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c
@@ -57,6 +57,7 @@
#include <net/addrconf.h>
#include <net/ip6_route.h>
#include <net/ip_fib.h>
+#include <net/secure_seq.h>
#include <net/tcp.h>
#include <asm/checksum.h>
@@ -2164,7 +2165,6 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
struct i40iw_cm_listener *listener)
{
struct i40iw_cm_node *cm_node;
- struct timespec ts;
int oldarpindex;
int arpindex;
struct net_device *netdev = iwdev->netdev;
@@ -2214,10 +2214,26 @@ static struct i40iw_cm_node *i40iw_make_cm_node(
cm_node->tcp_cntxt.rcv_wscale = I40IW_CM_DEFAULT_RCV_WND_SCALE;
cm_node->tcp_cntxt.rcv_wnd =
I40IW_CM_DEFAULT_RCV_WND_SCALED >> I40IW_CM_DEFAULT_RCV_WND_SCALE;
- ts = current_kernel_time();
- cm_node->tcp_cntxt.loc_seq_num = ts.tv_nsec;
- cm_node->tcp_cntxt.mss = (cm_node->ipv4) ? (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4) :
- (iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6);
+ if (cm_node->ipv4) {
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr[0]),
+ htonl(cm_node->rem_addr[0]),
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV4;
+ } else if (IS_ENABLED(CONFIG_IPV6)) {
+ __be32 loc[4] = {
+ htonl(cm_node->loc_addr[0]), htonl(cm_node->loc_addr[1]),
+ htonl(cm_node->loc_addr[2]), htonl(cm_node->loc_addr[3])
+ };
+ __be32 rem[4] = {
+ htonl(cm_node->rem_addr[0]), htonl(cm_node->rem_addr[1]),
+ htonl(cm_node->rem_addr[2]), htonl(cm_node->rem_addr[3])
+ };
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcpv6_seq(loc, rem,
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
+ cm_node->tcp_cntxt.mss = iwdev->vsi.mtu - I40IW_MTU_TO_MSS_IPV6;
+ }
cm_node->iwdev = iwdev;
cm_node->dev = &iwdev->sc_dev;
diff --git a/drivers/infiniband/hw/i40iw/i40iw_hw.c b/drivers/infiniband/hw/i40iw/i40iw_hw.c
index 2836c5420d60..55a1fbf0e670 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_hw.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_hw.c
@@ -435,45 +435,24 @@ void i40iw_process_aeq(struct i40iw_device *iwdev)
}
/**
- * i40iw_manage_apbvt - add or delete tcp port
+ * i40iw_cqp_manage_abvpt_cmd - send cqp command manage abpvt
* @iwdev: iwarp device
* @accel_local_port: port for apbvt
* @add_port: add or delete port
*/
-int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool add_port)
+static enum i40iw_status_code
+i40iw_cqp_manage_abvpt_cmd(struct i40iw_device *iwdev,
+ u16 accel_local_port,
+ bool add_port)
{
struct i40iw_apbvt_info *info;
struct i40iw_cqp_request *cqp_request;
struct cqp_commands_info *cqp_info;
- unsigned long flags;
- struct i40iw_cm_core *cm_core = &iwdev->cm_core;
- enum i40iw_status_code status = 0;
- bool in_use;
-
- /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
- * protect against race where add APBVT CQP can race ahead of the delete
- * APBVT for same port.
- */
- spin_lock_irqsave(&cm_core->apbvt_lock, flags);
-
- if (!add_port) {
- in_use = i40iw_port_in_use(cm_core, accel_local_port);
- if (in_use)
- goto exit;
- clear_bit(accel_local_port, cm_core->ports_in_use);
- } else {
- in_use = test_and_set_bit(accel_local_port,
- cm_core->ports_in_use);
- spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
- if (in_use)
- return 0;
- }
+ enum i40iw_status_code status;
cqp_request = i40iw_get_cqp_request(&iwdev->cqp, add_port);
- if (!cqp_request) {
- status = -ENOMEM;
- goto exit;
- }
+ if (!cqp_request)
+ return I40IW_ERR_NO_MEMORY;
cqp_info = &cqp_request->info;
info = &cqp_info->in.u.manage_apbvt_entry.info;
@@ -489,14 +468,54 @@ int i40iw_manage_apbvt(struct i40iw_device *iwdev, u16 accel_local_port, bool ad
status = i40iw_handle_cqp_op(iwdev, cqp_request);
if (status)
i40iw_pr_err("CQP-OP Manage APBVT entry fail");
-exit:
- if (!add_port)
- spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
return status;
}
/**
+ * i40iw_manage_apbvt - add or delete tcp port
+ * @iwdev: iwarp device
+ * @accel_local_port: port for apbvt
+ * @add_port: add or delete port
+ */
+enum i40iw_status_code i40iw_manage_apbvt(struct i40iw_device *iwdev,
+ u16 accel_local_port,
+ bool add_port)
+{
+ struct i40iw_cm_core *cm_core = &iwdev->cm_core;
+ enum i40iw_status_code status;
+ unsigned long flags;
+ bool in_use;
+
+ /* apbvt_lock is held across CQP delete APBVT OP (non-waiting) to
+ * protect against race where add APBVT CQP can race ahead of the delete
+ * APBVT for same port.
+ */
+ if (add_port) {
+ spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+ in_use = __test_and_set_bit(accel_local_port,
+ cm_core->ports_in_use);
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ if (in_use)
+ return 0;
+ return i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port,
+ true);
+ } else {
+ spin_lock_irqsave(&cm_core->apbvt_lock, flags);
+ in_use = i40iw_port_in_use(cm_core, accel_local_port);
+ if (in_use) {
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ return 0;
+ }
+ __clear_bit(accel_local_port, cm_core->ports_in_use);
+ status = i40iw_cqp_manage_abvpt_cmd(iwdev, accel_local_port,
+ false);
+ spin_unlock_irqrestore(&cm_core->apbvt_lock, flags);
+ return status;
+ }
+}
+
+/**
* i40iw_manage_arp_cache - manage hw arp cache
* @iwdev: iwarp device
* @mac_addr: mac address ptr
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 68679ad4c6da..7d85414742ff 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -71,7 +71,8 @@ static int i40iw_query_device(struct ib_device *ibdev,
props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
props->max_qp = iwdev->max_qp - iwdev->used_qps;
props->max_qp_wr = I40IW_MAX_QP_WRS;
- props->max_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
+ props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
props->max_cq = iwdev->max_cq - iwdev->used_cqs;
props->max_cqe = iwdev->max_cqe;
props->max_mr = iwdev->max_mr - iwdev->used_mrs;
@@ -1409,6 +1410,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
struct vm_area_struct *vma;
struct hstate *h;
+ down_read(&current->mm->mmap_sem);
vma = find_vma(current->mm, addr);
if (vma && is_vm_hugetlb_page(vma)) {
h = hstate_vma(vma);
@@ -1417,6 +1419,7 @@ static void i40iw_set_hugetlb_values(u64 addr, struct i40iw_mr *iwmr)
iwmr->page_msk = huge_page_mask(h);
}
}
+ up_read(&current->mm->mmap_sem);
}
/**
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 9345d5b546d1..e9e3a6f390db 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -82,12 +82,11 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
struct mlx4_ib_ah *ah)
{
struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
+ const struct ib_gid_attr *gid_attr;
struct mlx4_dev *dev = ibdev->dev;
int is_mcast = 0;
struct in6_addr in6;
u16 vlan_tag = 0xffff;
- union ib_gid sgid;
- struct ib_gid_attr gid_attr;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
int ret;
@@ -96,25 +95,30 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd,
is_mcast = 1;
memcpy(ah->av.eth.mac, ah_attr->roce.dmac, ETH_ALEN);
- ret = ib_get_cached_gid(pd->device, rdma_ah_get_port_num(ah_attr),
- grh->sgid_index, &sgid, &gid_attr);
- if (ret)
- return ERR_PTR(ret);
eth_zero_addr(ah->av.eth.s_mac);
- if (is_vlan_dev(gid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(gid_attr.ndev);
- memcpy(ah->av.eth.s_mac, gid_attr.ndev->dev_addr, ETH_ALEN);
- dev_put(gid_attr.ndev);
+
+ /*
+ * If sgid_attr is NULL we are being called by mlx4_ib_create_ah_slave
+ * and we are directly creating an AV for a slave's gid_index.
+ */
+ gid_attr = ah_attr->grh.sgid_attr;
+ if (gid_attr) {
+ if (is_vlan_dev(gid_attr->ndev))
+ vlan_tag = vlan_dev_vlan_id(gid_attr->ndev);
+ memcpy(ah->av.eth.s_mac, gid_attr->ndev->dev_addr, ETH_ALEN);
+ ret = mlx4_ib_gid_index_to_real_index(ibdev, gid_attr);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ ah->av.eth.gid_index = ret;
+ } else {
+ /* mlx4_ib_create_ah_slave fills in the s_mac and the vlan */
+ ah->av.eth.gid_index = ah_attr->grh.sgid_index;
+ }
+
if (vlan_tag < 0x1000)
vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn |
(rdma_ah_get_port_num(ah_attr) << 24));
- ret = mlx4_ib_gid_index_to_real_index(ibdev,
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index);
- if (ret < 0)
- return ERR_PTR(ret);
- ah->av.eth.gid_index = ret;
ah->av.eth.vlan = cpu_to_be16(vlan_tag);
ah->av.eth.hop_limit = grh->hop_limit;
if (rdma_ah_get_static_rate(ah_attr)) {
@@ -173,6 +177,40 @@ struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
return create_ib_ah(pd, ah_attr, ah); /* never fails */
}
+/* AH's created via this call must be free'd by mlx4_ib_destroy_ah. */
+struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ int slave_sgid_index, u8 *s_mac,
+ u16 vlan_tag)
+{
+ struct rdma_ah_attr slave_attr = *ah_attr;
+ struct mlx4_ib_ah *mah;
+ struct ib_ah *ah;
+
+ slave_attr.grh.sgid_attr = NULL;
+ slave_attr.grh.sgid_index = slave_sgid_index;
+ ah = mlx4_ib_create_ah(pd, &slave_attr, NULL);
+ if (IS_ERR(ah))
+ return ah;
+
+ ah->device = pd->device;
+ ah->pd = pd;
+ ah->type = ah_attr->type;
+ mah = to_mah(ah);
+
+ /* get rid of force-loopback bit */
+ mah->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
+
+ if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
+ memcpy(mah->av.eth.s_mac, s_mac, 6);
+
+ if (vlan_tag < 0x1000)
+ vlan_tag |= (rdma_ah_get_sl(ah_attr) & 7) << 13;
+ mah->av.eth.vlan = cpu_to_be16(vlan_tag);
+
+ return ah;
+}
+
int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr)
{
struct mlx4_ib_ah *ah = to_mah(ibah);
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 90a3e2642c2e..8d730a69793d 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1367,13 +1367,10 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
struct mlx4_mad_snd_buf *sqp_mad;
struct ib_ah *ah;
struct ib_qp *send_qp = NULL;
- struct ib_global_route *grh;
unsigned wire_tx_ix = 0;
int ret = 0;
u16 wire_pkey_ix;
int src_qpnum;
- u8 sgid_index;
-
sqp_ctx = dev->sriov.sqps[port-1];
@@ -1394,16 +1391,11 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
send_qp = sqp->qp;
/* create ah */
- grh = rdma_ah_retrieve_grh(attr);
- sgid_index = grh->sgid_index;
- grh->sgid_index = 0;
- ah = rdma_create_ah(sqp_ctx->pd, attr);
+ ah = mlx4_ib_create_ah_slave(sqp_ctx->pd, attr,
+ rdma_ah_retrieve_grh(attr)->sgid_index,
+ s_mac, vlan_id);
if (IS_ERR(ah))
return -ENOMEM;
- grh->sgid_index = sgid_index;
- to_mah(ah)->av.ib.gid_index = sgid_index;
- /* get rid of force-loopback bit */
- to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
spin_lock(&sqp->tx_lock);
if (sqp->tx_ix_head - sqp->tx_ix_tail >=
(MLX4_NUM_TUNNEL_BUFS - 1))
@@ -1445,12 +1437,6 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
wr.wr.num_sge = 1;
wr.wr.opcode = IB_WR_SEND;
wr.wr.send_flags = IB_SEND_SIGNALED;
- if (s_mac)
- memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
- if (vlan_id < 0x1000)
- vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13;
- to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
-
ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
if (!ret)
@@ -1461,7 +1447,7 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
spin_unlock(&sqp->tx_lock);
sqp->tx_ring[wire_tx_ix].ah = NULL;
out:
- rdma_destroy_ah(ah);
+ mlx4_ib_destroy_ah(ah);
return ret;
}
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 4ec519afc45b..ca0f1ee26091 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -246,9 +246,7 @@ static int mlx4_ib_update_gids(struct gid_entry *gids,
return mlx4_ib_update_gids_v1(gids, ibdev, port_num);
}
-static int mlx4_ib_add_gid(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void **context)
+static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct mlx4_ib_dev *ibdev = to_mdev(attr->device);
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
@@ -271,8 +269,9 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
port_gid_table = &iboe->gids[attr->port_num - 1];
spin_lock_bh(&iboe->lock);
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i) {
- if (!memcmp(&port_gid_table->gids[i].gid, gid, sizeof(*gid)) &&
- (port_gid_table->gids[i].gid_type == attr->gid_type)) {
+ if (!memcmp(&port_gid_table->gids[i].gid,
+ &attr->gid, sizeof(attr->gid)) &&
+ port_gid_table->gids[i].gid_type == attr->gid_type) {
found = i;
break;
}
@@ -289,7 +288,8 @@ static int mlx4_ib_add_gid(const union ib_gid *gid,
ret = -ENOMEM;
} else {
*context = port_gid_table->gids[free].ctx;
- memcpy(&port_gid_table->gids[free].gid, gid, sizeof(*gid));
+ memcpy(&port_gid_table->gids[free].gid,
+ &attr->gid, sizeof(attr->gid));
port_gid_table->gids[free].gid_type = attr->gid_type;
port_gid_table->gids[free].ctx->real_index = free;
port_gid_table->gids[free].ctx->refcount = 1;
@@ -380,17 +380,15 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context)
}
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
- u8 port_num, int index)
+ const struct ib_gid_attr *attr)
{
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
struct gid_cache_context *ctx = NULL;
- union ib_gid gid;
struct mlx4_port_gid_table *port_gid_table;
int real_index = -EINVAL;
int i;
- int ret;
unsigned long flags;
- struct ib_gid_attr attr;
+ u8 port_num = attr->port_num;
if (port_num > MLX4_MAX_PORTS)
return -EINVAL;
@@ -399,21 +397,15 @@ int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
port_num = 1;
if (!rdma_cap_roce_gid_table(&ibdev->ib_dev, port_num))
- return index;
-
- ret = ib_get_cached_gid(&ibdev->ib_dev, port_num, index, &gid, &attr);
- if (ret)
- return ret;
-
- if (attr.ndev)
- dev_put(attr.ndev);
+ return attr->index;
spin_lock_irqsave(&iboe->lock, flags);
port_gid_table = &iboe->gids[port_num - 1];
for (i = 0; i < MLX4_MAX_PORT_GIDS; ++i)
- if (!memcmp(&port_gid_table->gids[i].gid, &gid, sizeof(gid)) &&
- attr.gid_type == port_gid_table->gids[i].gid_type) {
+ if (!memcmp(&port_gid_table->gids[i].gid,
+ &attr->gid, sizeof(attr->gid)) &&
+ attr->gid_type == port_gid_table->gids[i].gid_type) {
ctx = port_gid_table->gids[i].ctx;
break;
}
@@ -525,8 +517,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->page_size_cap = dev->dev->caps.page_size_cap;
props->max_qp = dev->dev->quotas.qp;
props->max_qp_wr = dev->dev->caps.max_wqes - MLX4_IB_SQ_MAX_SPARE;
- props->max_sge = min(dev->dev->caps.max_sq_sg,
- dev->dev->caps.max_rq_sg);
+ props->max_send_sge = dev->dev->caps.max_sq_sg;
+ props->max_recv_sge = dev->dev->caps.max_rq_sg;
props->max_sge_rd = MLX4_MAX_SGE_RD;
props->max_cq = dev->dev->quotas.cq;
props->max_cqe = dev->dev->caps.max_cqes;
@@ -770,7 +762,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
IB_WIDTH_4X : IB_WIDTH_1X;
props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
IB_SPEED_FDR : IB_SPEED_QDR;
- props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
+ props->port_cap_flags = IB_PORT_CM_SUP;
+ props->ip_gids = true;
props->gid_tbl_len = mdev->dev->caps.gid_table_len[port];
props->max_msg_sz = mdev->dev->caps.max_msg_sz;
props->pkey_tbl_len = 1;
@@ -2709,6 +2702,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->ib_dev.modify_qp = mlx4_ib_modify_qp;
ibdev->ib_dev.query_qp = mlx4_ib_query_qp;
ibdev->ib_dev.destroy_qp = mlx4_ib_destroy_qp;
+ ibdev->ib_dev.drain_sq = mlx4_ib_drain_sq;
+ ibdev->ib_dev.drain_rq = mlx4_ib_drain_rq;
ibdev->ib_dev.post_send = mlx4_ib_post_send;
ibdev->ib_dev.post_recv = mlx4_ib_post_recv;
ibdev->ib_dev.create_cq = mlx4_ib_create_cq;
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 7b1429917aba..1a0fad30633b 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -760,6 +760,10 @@ void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq);
struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct rdma_ah_attr *ah_attr,
struct ib_udata *udata);
+struct ib_ah *mlx4_ib_create_ah_slave(struct ib_pd *pd,
+ struct rdma_ah_attr *ah_attr,
+ int slave_sgid_index, u8 *s_mac,
+ u16 vlan_tag);
int mlx4_ib_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr);
int mlx4_ib_destroy_ah(struct ib_ah *ah);
@@ -778,6 +782,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
struct ib_qp_init_attr *init_attr,
struct ib_udata *udata);
int mlx4_ib_destroy_qp(struct ib_qp *qp);
+void mlx4_ib_drain_sq(struct ib_qp *qp);
+void mlx4_ib_drain_rq(struct ib_qp *qp);
int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int attr_mask, struct ib_udata *udata);
int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
@@ -900,7 +906,7 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
int mr_access_flags, struct ib_pd *pd,
struct ib_udata *udata);
int mlx4_ib_gid_index_to_real_index(struct mlx4_ib_dev *ibdev,
- u8 port_num, int index);
+ const struct ib_gid_attr *attr);
void mlx4_sched_ib_sl2vl_update_work(struct mlx4_ib_dev *ibdev,
int port);
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 3b8045fd23ed..408e720fd923 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1859,8 +1859,7 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev,
if (rdma_ah_get_ah_flags(ah) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah);
int real_sgid_index =
- mlx4_ib_gid_index_to_real_index(dev, port,
- grh->sgid_index);
+ mlx4_ib_gid_index_to_real_index(dev, grh->sgid_attr);
if (real_sgid_index < 0)
return real_sgid_index;
@@ -2176,6 +2175,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
{
struct ib_uobject *ibuobject;
struct ib_srq *ibsrq;
+ const struct ib_gid_attr *gid_attr = NULL;
struct ib_rwq_ind_table *rwq_ind_tbl;
enum ib_qp_type qp_type;
struct mlx4_ib_dev *dev;
@@ -2356,29 +2356,17 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
if (attr_mask & IB_QP_AV) {
u8 port_num = mlx4_is_bonded(dev->dev) ? 1 :
attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
- union ib_gid gid;
- struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB};
u16 vlan = 0xffff;
u8 smac[ETH_ALEN];
- int status = 0;
int is_eth =
rdma_cap_eth_ah(&dev->ib_dev, port_num) &&
rdma_ah_get_ah_flags(&attr->ah_attr) & IB_AH_GRH;
if (is_eth) {
- int index =
- rdma_ah_read_grh(&attr->ah_attr)->sgid_index;
-
- status = ib_get_cached_gid(&dev->ib_dev, port_num,
- index, &gid, &gid_attr);
- if (!status) {
- vlan = rdma_vlan_dev_vlan_id(gid_attr.ndev);
- memcpy(smac, gid_attr.ndev->dev_addr, ETH_ALEN);
- dev_put(gid_attr.ndev);
- }
+ gid_attr = attr->ah_attr.grh.sgid_attr;
+ vlan = rdma_vlan_dev_vlan_id(gid_attr->ndev);
+ memcpy(smac, gid_attr->ndev->dev_addr, ETH_ALEN);
}
- if (status)
- goto out;
if (mlx4_set_path(dev, attr, attr_mask, qp, &context->pri_path,
port_num, vlan, smac))
@@ -2389,7 +2377,7 @@ static int __mlx4_ib_modify_qp(void *src, enum mlx4_ib_source_type src_type,
if (is_eth &&
(cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR)) {
- u8 qpc_roce_mode = gid_type_to_qpc(gid_attr.gid_type);
+ u8 qpc_roce_mode = gid_type_to_qpc(gid_attr->gid_type);
if (qpc_roce_mode == MLX4_QPC_ROCE_MODE_UNDEFINED) {
err = -EINVAL;
@@ -3181,10 +3169,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1].
guid_cache[ah->av.ib.gid_index];
} else {
- ib_get_cached_gid(ib_dev,
- be32_to_cpu(ah->av.ib.port_pd) >> 24,
- ah->av.ib.gid_index,
- &sqp->ud_header.grh.source_gid, NULL);
+ sqp->ud_header.grh.source_gid =
+ ah->ibah.sgid_attr->gid;
}
}
memcpy(sqp->ud_header.grh.destination_gid.raw,
@@ -3582,8 +3568,8 @@ static void add_zero_len_inline(void *wqe)
inl->byte_count = cpu_to_be32(1 << 31);
}
-int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+static int _mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr, bool drain)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
void *wqe;
@@ -3623,7 +3609,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
}
spin_lock_irqsave(&qp->sq.lock, flags);
- if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
@@ -3913,8 +3900,14 @@ out:
return err;
}
-int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ return _mlx4_ib_post_send(ibqp, wr, bad_wr, false);
+}
+
+static int _mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr, bool drain)
{
struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_wqe_data_seg *scat;
@@ -3929,7 +3922,8 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
max_gs = qp->rq.max_gs;
spin_lock_irqsave(&qp->rq.lock, flags);
- if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR &&
+ !drain) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
@@ -4000,6 +3994,12 @@ out:
return err;
}
+int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ return _mlx4_ib_post_recv(ibqp, wr, bad_wr, false);
+}
+
static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
{
switch (mlx4_state) {
@@ -4047,9 +4047,9 @@ static void to_rdma_ah_attr(struct mlx4_ib_dev *ibdev,
u8 port_num = path->sched_queue & 0x40 ? 2 : 1;
memset(ah_attr, 0, sizeof(*ah_attr));
- ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
if (port_num == 0 || port_num > dev->caps.num_ports)
return;
+ ah_attr->type = rdma_ah_find_type(&ibdev->ib_dev, port_num);
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE)
rdma_ah_set_sl(ah_attr, ((path->sched_queue >> 3) & 0x7) |
@@ -4465,3 +4465,131 @@ int mlx4_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
kfree(ib_rwq_ind_tbl);
return 0;
}
+
+struct mlx4_ib_drain_cqe {
+ struct ib_cqe cqe;
+ struct completion done;
+};
+
+static void mlx4_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct mlx4_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
+ struct mlx4_ib_drain_cqe,
+ cqe);
+
+ complete(&cqe->done);
+}
+
+/* This function returns only once the drained WR was completed */
+static void handle_drain_completion(struct ib_cq *cq,
+ struct mlx4_ib_drain_cqe *sdrain,
+ struct mlx4_ib_dev *dev)
+{
+ struct mlx4_dev *mdev = dev->dev;
+
+ if (cq->poll_ctx == IB_POLL_DIRECT) {
+ while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ return;
+ }
+
+ if (mdev->persist->state == MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ struct mlx4_ib_cq *mcq = to_mcq(cq);
+ bool triggered = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ /* Make sure that the CQ handler won't run if wasn't run yet */
+ if (!mcq->mcq.reset_notify_added)
+ mcq->mcq.reset_notify_added = 1;
+ else
+ triggered = true;
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ if (triggered) {
+ /* Wait for any scheduled/running task to be ended */
+ switch (cq->poll_ctx) {
+ case IB_POLL_SOFTIRQ:
+ irq_poll_disable(&cq->iop);
+ irq_poll_enable(&cq->iop);
+ break;
+ case IB_POLL_WORKQUEUE:
+ cancel_work_sync(&cq->work);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ }
+
+ /* Run the CQ handler - this makes sure that the drain WR will
+ * be processed if wasn't processed yet.
+ */
+ mcq->mcq.comp(&mcq->mcq);
+ }
+
+ wait_for_completion(&sdrain->done);
+}
+
+void mlx4_ib_drain_sq(struct ib_qp *qp)
+{
+ struct ib_cq *cq = qp->send_cq;
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct mlx4_ib_drain_cqe sdrain;
+ struct ib_send_wr *bad_swr;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ },
+ };
+ int ret;
+ struct mlx4_ib_dev *dev = to_mdev(qp->device);
+ struct mlx4_dev *mdev = dev->dev;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ sdrain.cqe.done = mlx4_ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
+ ret = _mlx4_ib_post_send(qp, &swr.wr, &bad_swr, true);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &sdrain, dev);
+}
+
+void mlx4_ib_drain_rq(struct ib_qp *qp)
+{
+ struct ib_cq *cq = qp->recv_cq;
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct mlx4_ib_drain_cqe rdrain;
+ struct ib_recv_wr rwr = {}, *bad_rwr;
+ int ret;
+ struct mlx4_ib_dev *dev = to_mdev(qp->device);
+ struct mlx4_dev *mdev = dev->dev;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret && mdev->persist->state != MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = mlx4_ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
+ ret = _mlx4_ib_post_recv(qp, &rwr, &bad_rwr, true);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &rdrain, dev);
+}
diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index d42b922bede8..577e4c418bae 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
mlx5_ib-y := main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o cong.o
mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c
index e6bde32a83f3..ffd03bf1a71e 100644
--- a/drivers/infiniband/hw/mlx5/ah.c
+++ b/drivers/infiniband/hw/mlx5/ah.c
@@ -37,7 +37,6 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
struct rdma_ah_attr *ah_attr)
{
enum ib_gid_type gid_type;
- int err;
if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
@@ -53,18 +52,12 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev,
ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);
if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
- err = mlx5_get_roce_gid_type(dev, ah_attr->port_num,
- ah_attr->grh.sgid_index,
- &gid_type);
- if (err)
- return ERR_PTR(err);
+ gid_type = ah_attr->grh.sgid_attr->gid_type;
memcpy(ah->av.rmac, ah_attr->roce.dmac,
sizeof(ah_attr->roce.dmac));
ah->av.udp_sport =
- mlx5_get_roce_udp_sport(dev,
- rdma_ah_get_port_num(ah_attr),
- rdma_ah_read_grh(ah_attr)->sgid_index);
+ mlx5_get_roce_udp_sport(dev, ah_attr->grh.sgid_attr);
ah->av.stat_rate_sl |= (rdma_ah_get_sl(ah_attr) & 0x7) << 1;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
#define MLX5_ECN_ENABLED BIT(1)
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index ccc0b5d06a7d..c84fef9a8a08 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -185,3 +185,15 @@ int mlx5_cmd_dealloc_memic(struct mlx5_memic *memic, u64 addr, u64 length)
return err;
}
+
+int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
+{
+ u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+ MLX5_SET(ppcnt_reg, in, local_port, 1);
+
+ MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
+ return mlx5_core_access_reg(dev, in, sz, out, sz, MLX5_REG_PPCNT,
+ 0, 0);
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 98ea4648c655..88cbb1c41703 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -41,6 +41,7 @@ int mlx5_cmd_dump_fill_mkey(struct mlx5_core_dev *dev, u32 *mkey);
int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
int mlx5_cmd_query_cong_params(struct mlx5_core_dev *dev, int cong_point,
void *out, int out_size);
+int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out);
int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
int mlx5_cmd_alloc_memic(struct mlx5_memic *memic, phys_addr_t *addr,
diff --git a/drivers/infiniband/hw/mlx5/cong.c b/drivers/infiniband/hw/mlx5/cong.c
index 985fa2637390..7e4e358a4fd8 100644
--- a/drivers/infiniband/hw/mlx5/cong.c
+++ b/drivers/infiniband/hw/mlx5/cong.c
@@ -359,9 +359,6 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
int ret;
char lbuf[11];
- if (*pos)
- return 0;
-
ret = mlx5_ib_get_cc_params(param->dev, param->port_num, offset, &var);
if (ret)
return ret;
@@ -370,11 +367,7 @@ static ssize_t get_param(struct file *filp, char __user *buf, size_t count,
if (ret < 0)
return ret;
- if (copy_to_user(buf, lbuf, ret))
- return -EFAULT;
-
- *pos += ret;
- return ret;
+ return simple_read_from_buffer(buf, count, pos, lbuf, ret);
}
static const struct file_operations dbg_cc_fops = {
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
new file mode 100644
index 000000000000..7f9d73b03421
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -0,0 +1,1107 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/uverbs_types.h>
+#include <rdma/uverbs_ioctl.h>
+#include <rdma/mlx5_user_ioctl_cmds.h>
+#include <rdma/ib_umem.h>
+#include <linux/mlx5/driver.h>
+#include <linux/mlx5/fs.h>
+#include "mlx5_ib.h"
+
+#define UVERBS_MODULE_NAME mlx5_ib
+#include <rdma/uverbs_named_ioctl.h>
+
+#define MLX5_MAX_DESTROY_INBOX_SIZE_DW MLX5_ST_SZ_DW(delete_fte_in)
+struct devx_obj {
+ struct mlx5_core_dev *mdev;
+ u32 obj_id;
+ u32 dinlen; /* destroy inbox length */
+ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW];
+};
+
+struct devx_umem {
+ struct mlx5_core_dev *mdev;
+ struct ib_umem *umem;
+ u32 page_offset;
+ int page_shift;
+ int ncont;
+ u32 dinlen;
+ u32 dinbox[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)];
+};
+
+struct devx_umem_reg_cmd {
+ void *in;
+ u32 inlen;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+};
+
+static struct mlx5_ib_ucontext *devx_ufile2uctx(struct ib_uverbs_file *file)
+{
+ return to_mucontext(ib_uverbs_get_ucontext(file));
+}
+
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+{
+ u32 in[MLX5_ST_SZ_DW(create_uctx_in)] = {0};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+ u64 general_obj_types;
+ void *hdr;
+ int err;
+
+ hdr = MLX5_ADDR_OF(create_uctx_in, in, hdr);
+
+ general_obj_types = MLX5_CAP_GEN_64(dev->mdev, general_obj_types);
+ if (!(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UCTX) ||
+ !(general_obj_types & MLX5_GENERAL_OBJ_TYPES_CAP_UMEM))
+ return -EINVAL;
+
+ if (!capable(CAP_NET_RAW))
+ return -EPERM;
+
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, hdr, obj_type, MLX5_OBJ_TYPE_UCTX);
+
+ err = mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return err;
+
+ context->devx_uid = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ return 0;
+}
+
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ucontext *context)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {0};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {0};
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_UCTX);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, context->devx_uid);
+
+ mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int devx_is_valid_obj_id(struct devx_obj *obj, const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+ u32 obj_id;
+
+ switch (opcode) {
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+ break;
+ case MLX5_CMD_OP_QUERY_MKEY:
+ obj_id = MLX5_GET(query_mkey_in, in, mkey_index);
+ break;
+ case MLX5_CMD_OP_QUERY_CQ:
+ obj_id = MLX5_GET(query_cq_in, in, cqn);
+ break;
+ case MLX5_CMD_OP_MODIFY_CQ:
+ obj_id = MLX5_GET(modify_cq_in, in, cqn);
+ break;
+ case MLX5_CMD_OP_QUERY_SQ:
+ obj_id = MLX5_GET(query_sq_in, in, sqn);
+ break;
+ case MLX5_CMD_OP_MODIFY_SQ:
+ obj_id = MLX5_GET(modify_sq_in, in, sqn);
+ break;
+ case MLX5_CMD_OP_QUERY_RQ:
+ obj_id = MLX5_GET(query_rq_in, in, rqn);
+ break;
+ case MLX5_CMD_OP_MODIFY_RQ:
+ obj_id = MLX5_GET(modify_rq_in, in, rqn);
+ break;
+ case MLX5_CMD_OP_QUERY_RMP:
+ obj_id = MLX5_GET(query_rmp_in, in, rmpn);
+ break;
+ case MLX5_CMD_OP_MODIFY_RMP:
+ obj_id = MLX5_GET(modify_rmp_in, in, rmpn);
+ break;
+ case MLX5_CMD_OP_QUERY_RQT:
+ obj_id = MLX5_GET(query_rqt_in, in, rqtn);
+ break;
+ case MLX5_CMD_OP_MODIFY_RQT:
+ obj_id = MLX5_GET(modify_rqt_in, in, rqtn);
+ break;
+ case MLX5_CMD_OP_QUERY_TIR:
+ obj_id = MLX5_GET(query_tir_in, in, tirn);
+ break;
+ case MLX5_CMD_OP_MODIFY_TIR:
+ obj_id = MLX5_GET(modify_tir_in, in, tirn);
+ break;
+ case MLX5_CMD_OP_QUERY_TIS:
+ obj_id = MLX5_GET(query_tis_in, in, tisn);
+ break;
+ case MLX5_CMD_OP_MODIFY_TIS:
+ obj_id = MLX5_GET(modify_tis_in, in, tisn);
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ obj_id = MLX5_GET(query_flow_table_in, in, table_id);
+ break;
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ obj_id = MLX5_GET(modify_flow_table_in, in, table_id);
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ obj_id = MLX5_GET(query_flow_group_in, in, group_id);
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ obj_id = MLX5_GET(query_fte_in, in, flow_index);
+ break;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ obj_id = MLX5_GET(set_fte_in, in, flow_index);
+ break;
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ obj_id = MLX5_GET(query_q_counter_in, in, counter_set_id);
+ break;
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ obj_id = MLX5_GET(query_flow_counter_in, in, flow_counter_id);
+ break;
+ case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+ obj_id = MLX5_GET(general_obj_in_cmd_hdr, in, obj_id);
+ break;
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ obj_id = MLX5_GET(query_scheduling_element_in, in,
+ scheduling_element_id);
+ break;
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ obj_id = MLX5_GET(modify_scheduling_element_in, in,
+ scheduling_element_id);
+ break;
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ break;
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ obj_id = MLX5_GET(query_l2_table_entry_in, in, table_index);
+ break;
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+ break;
+ case MLX5_CMD_OP_QUERY_QP:
+ obj_id = MLX5_GET(query_qp_in, in, qpn);
+ break;
+ case MLX5_CMD_OP_RST2INIT_QP:
+ obj_id = MLX5_GET(rst2init_qp_in, in, qpn);
+ break;
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ obj_id = MLX5_GET(init2rtr_qp_in, in, qpn);
+ break;
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ obj_id = MLX5_GET(rtr2rts_qp_in, in, qpn);
+ break;
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ obj_id = MLX5_GET(rts2rts_qp_in, in, qpn);
+ break;
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ obj_id = MLX5_GET(sqerr2rts_qp_in, in, qpn);
+ break;
+ case MLX5_CMD_OP_2ERR_QP:
+ obj_id = MLX5_GET(qp_2err_in, in, qpn);
+ break;
+ case MLX5_CMD_OP_2RST_QP:
+ obj_id = MLX5_GET(qp_2rst_in, in, qpn);
+ break;
+ case MLX5_CMD_OP_QUERY_DCT:
+ obj_id = MLX5_GET(query_dct_in, in, dctn);
+ break;
+ case MLX5_CMD_OP_QUERY_XRQ:
+ obj_id = MLX5_GET(query_xrq_in, in, xrqn);
+ break;
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ obj_id = MLX5_GET(query_xrc_srq_in, in, xrc_srqn);
+ break;
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ obj_id = MLX5_GET(arm_xrc_srq_in, in, xrc_srqn);
+ break;
+ case MLX5_CMD_OP_QUERY_SRQ:
+ obj_id = MLX5_GET(query_srq_in, in, srqn);
+ break;
+ case MLX5_CMD_OP_ARM_RQ:
+ obj_id = MLX5_GET(arm_rq_in, in, srq_number);
+ break;
+ case MLX5_CMD_OP_DRAIN_DCT:
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ obj_id = MLX5_GET(drain_dct_in, in, dctn);
+ break;
+ case MLX5_CMD_OP_ARM_XRQ:
+ obj_id = MLX5_GET(arm_xrq_in, in, xrqn);
+ break;
+ default:
+ return false;
+ }
+
+ if (obj_id == obj->obj_id)
+ return true;
+
+ return false;
+}
+
+static bool devx_is_obj_create_cmd(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ case MLX5_CMD_OP_CREATE_MKEY:
+ case MLX5_CMD_OP_CREATE_CQ:
+ case MLX5_CMD_OP_ALLOC_PD:
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ case MLX5_CMD_OP_CREATE_RMP:
+ case MLX5_CMD_OP_CREATE_SQ:
+ case MLX5_CMD_OP_CREATE_RQ:
+ case MLX5_CMD_OP_CREATE_RQT:
+ case MLX5_CMD_OP_CREATE_TIR:
+ case MLX5_CMD_OP_CREATE_TIS:
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_CREATE_QP:
+ case MLX5_CMD_OP_CREATE_SRQ:
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ case MLX5_CMD_OP_CREATE_DCT:
+ case MLX5_CMD_OP_CREATE_XRQ:
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ return true;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ {
+ u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+ if (op_mod == 0)
+ return true;
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_obj_modify_cmd(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_MODIFY_CQ:
+ case MLX5_CMD_OP_MODIFY_RMP:
+ case MLX5_CMD_OP_MODIFY_SQ:
+ case MLX5_CMD_OP_MODIFY_RQ:
+ case MLX5_CMD_OP_MODIFY_RQT:
+ case MLX5_CMD_OP_MODIFY_TIR:
+ case MLX5_CMD_OP_MODIFY_TIS:
+ case MLX5_CMD_OP_MODIFY_FLOW_TABLE:
+ case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_RST2INIT_QP:
+ case MLX5_CMD_OP_INIT2RTR_QP:
+ case MLX5_CMD_OP_RTR2RTS_QP:
+ case MLX5_CMD_OP_RTS2RTS_QP:
+ case MLX5_CMD_OP_SQERR2RTS_QP:
+ case MLX5_CMD_OP_2ERR_QP:
+ case MLX5_CMD_OP_2RST_QP:
+ case MLX5_CMD_OP_ARM_XRC_SRQ:
+ case MLX5_CMD_OP_ARM_RQ:
+ case MLX5_CMD_OP_DRAIN_DCT:
+ case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION:
+ case MLX5_CMD_OP_ARM_XRQ:
+ return true;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ {
+ u16 op_mod = MLX5_GET(set_fte_in, in, op_mod);
+
+ if (op_mod == 1)
+ return true;
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_obj_query_cmd(const void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_MKEY:
+ case MLX5_CMD_OP_QUERY_CQ:
+ case MLX5_CMD_OP_QUERY_RMP:
+ case MLX5_CMD_OP_QUERY_SQ:
+ case MLX5_CMD_OP_QUERY_RQ:
+ case MLX5_CMD_OP_QUERY_RQT:
+ case MLX5_CMD_OP_QUERY_TIR:
+ case MLX5_CMD_OP_QUERY_TIS:
+ case MLX5_CMD_OP_QUERY_Q_COUNTER:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE:
+ case MLX5_CMD_OP_QUERY_FLOW_GROUP:
+ case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
+ case MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT:
+ case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+ case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY:
+ case MLX5_CMD_OP_QUERY_QP:
+ case MLX5_CMD_OP_QUERY_SRQ:
+ case MLX5_CMD_OP_QUERY_XRC_SRQ:
+ case MLX5_CMD_OP_QUERY_DCT:
+ case MLX5_CMD_OP_QUERY_XRQ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static bool devx_is_general_cmd(void *in)
+{
+ u16 opcode = MLX5_GET(general_obj_in_cmd_hdr, in, opcode);
+
+ switch (opcode) {
+ case MLX5_CMD_OP_QUERY_HCA_CAP:
+ case MLX5_CMD_OP_QUERY_VPORT_STATE:
+ case MLX5_CMD_OP_QUERY_ADAPTER:
+ case MLX5_CMD_OP_QUERY_ISSI:
+ case MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT:
+ case MLX5_CMD_OP_QUERY_ROCE_ADDRESS:
+ case MLX5_CMD_OP_QUERY_VNIC_ENV:
+ case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
+ case MLX5_CMD_OP_GET_DROPPED_PACKET_LOG:
+ case MLX5_CMD_OP_NOP:
+ case MLX5_CMD_OP_QUERY_CONG_STATUS:
+ case MLX5_CMD_OP_QUERY_CONG_PARAMS:
+ case MLX5_CMD_OP_QUERY_CONG_STATISTICS:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_EQN)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+ int user_vector;
+ int dev_eqn;
+ unsigned int irqn;
+ int err;
+
+ if (uverbs_copy_from(&user_vector, attrs,
+ MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC))
+ return -EFAULT;
+
+ err = mlx5_vector2eqn(dev->mdev, user_vector, &dev_eqn, &irqn);
+ if (err < 0)
+ return err;
+
+ if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+ &dev_eqn, sizeof(dev_eqn)))
+ return -EFAULT;
+
+ return 0;
+}
+
+/*
+ *Security note:
+ * The hardware protection mechanism works like this: Each device object that
+ * is subject to UAR doorbells (QP/SQ/CQ) gets a UAR ID (called uar_page in
+ * the device specification manual) upon its creation. Then upon doorbell,
+ * hardware fetches the object context for which the doorbell was rang, and
+ * validates that the UAR through which the DB was rang matches the UAR ID
+ * of the object.
+ * If no match the doorbell is silently ignored by the hardware. Of course,
+ * the user cannot ring a doorbell on a UAR that was not mapped to it.
+ * Now in devx, as the devx kernel does not manipulate the QP/SQ/CQ command
+ * mailboxes (except tagging them with UID), we expose to the user its UAR
+ * ID, so it can embed it in these objects in the expected specification
+ * format. So the only thing the user can do is hurt itself by creating a
+ * QP/SQ/CQ with a UAR ID other than his, and then in this case other users
+ * may ring a doorbell on its objects.
+ * The consequence of that will be that another user can schedule a QP/SQ
+ * of the buggy user for execution (just insert it to the hardware schedule
+ * queue or arm its CQ for event generation), no further harm is expected.
+ */
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_QUERY_UAR)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+ u32 user_idx;
+ s32 dev_idx;
+
+ if (uverbs_copy_from(&user_idx, attrs,
+ MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX))
+ return -EFAULT;
+
+ dev_idx = bfregn_to_uar_index(to_mdev(ib_dev),
+ &c->bfregi, user_idx, true);
+ if (dev_idx < 0)
+ return dev_idx;
+
+ if (uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+ &dev_idx, sizeof(dev_idx)))
+ return -EFAULT;
+
+ return 0;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OTHER)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+ struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+ void *cmd_in = uverbs_attr_get_alloced_ptr(
+ attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT);
+ void *cmd_out;
+ int err;
+
+ if (!c->devx_uid)
+ return -EPERM;
+
+ /* Only white list of some general HCA commands are allowed for this method. */
+ if (!devx_is_general_cmd(cmd_in))
+ return -EINVAL;
+
+ cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+ if (!cmd_out)
+ return -ENOMEM;
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ err = mlx5_cmd_exec(dev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err)
+ goto other_cmd_free;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT, cmd_out, cmd_out_len);
+
+other_cmd_free:
+ kvfree(cmd_out);
+ return err;
+}
+
+static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
+ u32 *dinlen,
+ u32 *obj_id)
+{
+ u16 obj_type = MLX5_GET(general_obj_in_cmd_hdr, in, obj_type);
+ u16 uid = MLX5_GET(general_obj_in_cmd_hdr, in, uid);
+
+ *obj_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+ *dinlen = MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr);
+
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, uid, uid);
+
+ switch (MLX5_GET(general_obj_in_cmd_hdr, in, opcode)) {
+ case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, din, obj_type, obj_type);
+ break;
+
+ case MLX5_CMD_OP_CREATE_MKEY:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_MKEY);
+ break;
+ case MLX5_CMD_OP_CREATE_CQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
+ break;
+ case MLX5_CMD_OP_ALLOC_PD:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_PD);
+ break;
+ case MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
+ break;
+ case MLX5_CMD_OP_CREATE_RMP:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RMP);
+ break;
+ case MLX5_CMD_OP_CREATE_SQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SQ);
+ break;
+ case MLX5_CMD_OP_CREATE_RQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQ);
+ break;
+ case MLX5_CMD_OP_CREATE_RQT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT);
+ break;
+ case MLX5_CMD_OP_CREATE_TIR:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR);
+ break;
+ case MLX5_CMD_OP_CREATE_TIS:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS);
+ break;
+ case MLX5_CMD_OP_ALLOC_Q_COUNTER:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+ break;
+ case MLX5_CMD_OP_CREATE_FLOW_TABLE:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_table_in);
+ *obj_id = MLX5_GET(create_flow_table_out, out, table_id);
+ MLX5_SET(destroy_flow_table_in, din, other_vport,
+ MLX5_GET(create_flow_table_in, in, other_vport));
+ MLX5_SET(destroy_flow_table_in, din, vport_number,
+ MLX5_GET(create_flow_table_in, in, vport_number));
+ MLX5_SET(destroy_flow_table_in, din, table_type,
+ MLX5_GET(create_flow_table_in, in, table_type));
+ MLX5_SET(destroy_flow_table_in, din, table_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_TABLE);
+ break;
+ case MLX5_CMD_OP_CREATE_FLOW_GROUP:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_flow_group_in);
+ *obj_id = MLX5_GET(create_flow_group_out, out, group_id);
+ MLX5_SET(destroy_flow_group_in, din, other_vport,
+ MLX5_GET(create_flow_group_in, in, other_vport));
+ MLX5_SET(destroy_flow_group_in, din, vport_number,
+ MLX5_GET(create_flow_group_in, in, vport_number));
+ MLX5_SET(destroy_flow_group_in, din, table_type,
+ MLX5_GET(create_flow_group_in, in, table_type));
+ MLX5_SET(destroy_flow_group_in, din, table_id,
+ MLX5_GET(create_flow_group_in, in, table_id));
+ MLX5_SET(destroy_flow_group_in, din, group_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+ break;
+ case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_fte_in);
+ *obj_id = MLX5_GET(set_fte_in, in, flow_index);
+ MLX5_SET(delete_fte_in, din, other_vport,
+ MLX5_GET(set_fte_in, in, other_vport));
+ MLX5_SET(delete_fte_in, din, vport_number,
+ MLX5_GET(set_fte_in, in, vport_number));
+ MLX5_SET(delete_fte_in, din, table_type,
+ MLX5_GET(set_fte_in, in, table_type));
+ MLX5_SET(delete_fte_in, din, table_id,
+ MLX5_GET(set_fte_in, in, table_id));
+ MLX5_SET(delete_fte_in, din, flow_index, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
+ break;
+ case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
+ break;
+ case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_ENCAP_HEADER);
+ break;
+ case MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
+ break;
+ case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+ *dinlen = MLX5_ST_SZ_BYTES(destroy_scheduling_element_in);
+ *obj_id = MLX5_GET(create_scheduling_element_out, out,
+ scheduling_element_id);
+ MLX5_SET(destroy_scheduling_element_in, din,
+ scheduling_hierarchy,
+ MLX5_GET(create_scheduling_element_in, in,
+ scheduling_hierarchy));
+ MLX5_SET(destroy_scheduling_element_in, din,
+ scheduling_element_id, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+ break;
+ case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_vxlan_udp_dport_in);
+ *obj_id = MLX5_GET(add_vxlan_udp_dport_in, in, vxlan_udp_port);
+ MLX5_SET(delete_vxlan_udp_dport_in, din, vxlan_udp_port, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
+ break;
+ case MLX5_CMD_OP_SET_L2_TABLE_ENTRY:
+ *dinlen = MLX5_ST_SZ_BYTES(delete_l2_table_entry_in);
+ *obj_id = MLX5_GET(set_l2_table_entry_in, in, table_index);
+ MLX5_SET(delete_l2_table_entry_in, din, table_index, *obj_id);
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
+ break;
+ case MLX5_CMD_OP_CREATE_QP:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+ break;
+ case MLX5_CMD_OP_CREATE_SRQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_SRQ);
+ break;
+ case MLX5_CMD_OP_CREATE_XRC_SRQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode,
+ MLX5_CMD_OP_DESTROY_XRC_SRQ);
+ break;
+ case MLX5_CMD_OP_CREATE_DCT:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_DCT);
+ break;
+ case MLX5_CMD_OP_CREATE_XRQ:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_XRQ);
+ break;
+ case MLX5_CMD_OP_ATTACH_TO_MCG:
+ *dinlen = MLX5_ST_SZ_BYTES(detach_from_mcg_in);
+ MLX5_SET(detach_from_mcg_in, din, qpn,
+ MLX5_GET(attach_to_mcg_in, in, qpn));
+ memcpy(MLX5_ADDR_OF(detach_from_mcg_in, din, multicast_gid),
+ MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid),
+ MLX5_FLD_SZ_BYTES(attach_to_mcg_in, multicast_gid));
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
+ break;
+ case MLX5_CMD_OP_ALLOC_XRCD:
+ MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+ break;
+ default:
+ /* The entry must match to one of the devx_is_obj_create_cmd */
+ WARN_ON(true);
+ break;
+ }
+}
+
+static int devx_obj_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ struct devx_obj *obj = uobject->object;
+ int ret;
+
+ ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+ if (ib_is_destroy_retryable(ret, why, uobject))
+ return ret;
+
+ kfree(obj);
+ return ret;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+ struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT);
+ void *cmd_out;
+ struct ib_uobject *uobj;
+ struct devx_obj *obj;
+ int err;
+
+ if (!c->devx_uid)
+ return -EPERM;
+
+ if (!devx_is_obj_create_cmd(cmd_in))
+ return -EINVAL;
+
+ obj = kzalloc(sizeof(struct devx_obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+ if (!cmd_out) {
+ err = -ENOMEM;
+ goto obj_free;
+ }
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ err = mlx5_cmd_exec(dev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err)
+ goto cmd_free;
+
+ uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE);
+ uobj->object = obj;
+ obj->mdev = dev->mdev;
+ devx_obj_build_destroy_cmd(cmd_in, cmd_out, obj->dinbox, &obj->dinlen, &obj->obj_id);
+ WARN_ON(obj->dinlen > MLX5_MAX_DESTROY_INBOX_SIZE_DW * sizeof(u32));
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT, cmd_out, cmd_out_len);
+ if (err)
+ goto cmd_free;
+
+ kvfree(cmd_out);
+ return 0;
+
+cmd_free:
+ kvfree(cmd_out);
+obj_free:
+ kfree(obj);
+ return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_MODIFY)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+ struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE);
+ void *cmd_out;
+ int err;
+
+ if (!c->devx_uid)
+ return -EPERM;
+
+ if (!devx_is_obj_modify_cmd(cmd_in))
+ return -EINVAL;
+
+ if (!devx_is_valid_obj_id(uobj->object, cmd_in))
+ return -EINVAL;
+
+ cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+ if (!cmd_out)
+ return -ENOMEM;
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ err = mlx5_cmd_exec(dev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err)
+ goto other_cmd_free;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+ cmd_out, cmd_out_len);
+
+other_cmd_free:
+ kvfree(cmd_out);
+ return err;
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_QUERY)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+ struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+ void *cmd_in = uverbs_attr_get_alloced_ptr(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN);
+ int cmd_out_len = uverbs_attr_get_len(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT);
+ struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE);
+ void *cmd_out;
+ int err;
+
+ if (!c->devx_uid)
+ return -EPERM;
+
+ if (!devx_is_obj_query_cmd(cmd_in))
+ return -EINVAL;
+
+ if (!devx_is_valid_obj_id(uobj->object, cmd_in))
+ return -EINVAL;
+
+ cmd_out = kvzalloc(cmd_out_len, GFP_KERNEL);
+ if (!cmd_out)
+ return -ENOMEM;
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_in, uid, c->devx_uid);
+ err = mlx5_cmd_exec(dev->mdev, cmd_in,
+ uverbs_attr_get_len(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN),
+ cmd_out, cmd_out_len);
+ if (err)
+ goto other_cmd_free;
+
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT, cmd_out, cmd_out_len);
+
+other_cmd_free:
+ kvfree(cmd_out);
+ return err;
+}
+
+static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext,
+ struct uverbs_attr_bundle *attrs,
+ struct devx_umem *obj)
+{
+ u64 addr;
+ size_t size;
+ int access;
+ int npages;
+ int err;
+ u32 page_mask;
+
+ if (uverbs_copy_from(&addr, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR) ||
+ uverbs_copy_from(&size, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_LEN) ||
+ uverbs_copy_from(&access, attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS))
+ return -EFAULT;
+
+ err = ib_check_mr_access(access);
+ if (err)
+ return err;
+
+ obj->umem = ib_umem_get(ucontext, addr, size, access, 0);
+ if (IS_ERR(obj->umem))
+ return PTR_ERR(obj->umem);
+
+ mlx5_ib_cont_pages(obj->umem, obj->umem->address,
+ MLX5_MKEY_PAGE_SHIFT_MASK, &npages,
+ &obj->page_shift, &obj->ncont, NULL);
+
+ if (!npages) {
+ ib_umem_release(obj->umem);
+ return -EINVAL;
+ }
+
+ page_mask = (1 << obj->page_shift) - 1;
+ obj->page_offset = obj->umem->address & page_mask;
+
+ return 0;
+}
+
+static int devx_umem_reg_cmd_alloc(struct devx_umem *obj,
+ struct devx_umem_reg_cmd *cmd)
+{
+ cmd->inlen = MLX5_ST_SZ_BYTES(create_umem_in) +
+ (MLX5_ST_SZ_BYTES(mtt) * obj->ncont);
+ cmd->in = kvzalloc(cmd->inlen, GFP_KERNEL);
+ return cmd->in ? 0 : -ENOMEM;
+}
+
+static void devx_umem_reg_cmd_free(struct devx_umem_reg_cmd *cmd)
+{
+ kvfree(cmd->in);
+}
+
+static void devx_umem_reg_cmd_build(struct mlx5_ib_dev *dev,
+ struct devx_umem *obj,
+ struct devx_umem_reg_cmd *cmd)
+{
+ void *umem;
+ __be64 *mtt;
+
+ umem = MLX5_ADDR_OF(create_umem_in, cmd->in, umem);
+ mtt = (__be64 *)MLX5_ADDR_OF(umem, umem, mtt);
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd->in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd->in, obj_type, MLX5_OBJ_TYPE_UMEM);
+ MLX5_SET64(umem, umem, num_of_mtt, obj->ncont);
+ MLX5_SET(umem, umem, log_page_size, obj->page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(umem, umem, page_offset, obj->page_offset);
+ mlx5_ib_populate_pas(dev, obj->umem, obj->page_shift, mtt,
+ (obj->umem->writable ? MLX5_IB_MTT_WRITE : 0) |
+ MLX5_IB_MTT_READ);
+}
+
+static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_UMEM_REG)(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs)
+{
+ struct mlx5_ib_ucontext *c = devx_ufile2uctx(file);
+ struct mlx5_ib_dev *dev = to_mdev(ib_dev);
+ struct devx_umem_reg_cmd cmd;
+ struct devx_umem *obj;
+ struct ib_uobject *uobj;
+ u32 obj_id;
+ int err;
+
+ if (!c->devx_uid)
+ return -EPERM;
+
+ uobj = uverbs_attr_get_uobject(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE);
+ obj = kzalloc(sizeof(struct devx_umem), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+
+ err = devx_umem_get(dev, &c->ibucontext, attrs, obj);
+ if (err)
+ goto err_obj_free;
+
+ err = devx_umem_reg_cmd_alloc(obj, &cmd);
+ if (err)
+ goto err_umem_release;
+
+ devx_umem_reg_cmd_build(dev, obj, &cmd);
+
+ MLX5_SET(general_obj_in_cmd_hdr, cmd.in, uid, c->devx_uid);
+ err = mlx5_cmd_exec(dev->mdev, cmd.in, cmd.inlen, cmd.out,
+ sizeof(cmd.out));
+ if (err)
+ goto err_umem_reg_cmd_free;
+
+ obj->mdev = dev->mdev;
+ uobj->object = obj;
+ devx_obj_build_destroy_cmd(cmd.in, cmd.out, obj->dinbox, &obj->dinlen, &obj_id);
+ err = uverbs_copy_to(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, &obj_id, sizeof(obj_id));
+ if (err)
+ goto err_umem_destroy;
+
+ devx_umem_reg_cmd_free(&cmd);
+
+ return 0;
+
+err_umem_destroy:
+ mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, cmd.out, sizeof(cmd.out));
+err_umem_reg_cmd_free:
+ devx_umem_reg_cmd_free(&cmd);
+err_umem_release:
+ ib_umem_release(obj->umem);
+err_obj_free:
+ kfree(obj);
+ return err;
+}
+
+static int devx_umem_cleanup(struct ib_uobject *uobject,
+ enum rdma_remove_reason why)
+{
+ struct devx_umem *obj = uobject->object;
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
+ int err;
+
+ err = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out));
+ if (ib_is_destroy_retryable(err, why, uobject))
+ return err;
+
+ ib_umem_release(obj->umem);
+ kfree(obj);
+ return 0;
+}
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_UMEM_REG,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE,
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE,
+ MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_QUERY_EQN,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_QUERY_UAR,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+ UVERBS_ATTR_TYPE(u32),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OTHER,
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_CREATE,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_NEW,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD_DESTROY(
+ MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_DESTROY,
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_WRITE,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_NAMED_METHOD(
+ MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+ UVERBS_ATTR_IDR(MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE,
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_ACCESS_READ,
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_IN(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_in_cmd_hdr)),
+ UA_MANDATORY,
+ UA_ALLOC_AND_COPY),
+ UVERBS_ATTR_PTR_OUT(
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+ UVERBS_ATTR_MIN_SIZE(MLX5_ST_SZ_BYTES(general_obj_out_cmd_hdr)),
+ UA_MANDATORY));
+
+DECLARE_UVERBS_GLOBAL_METHODS(MLX5_IB_OBJECT_DEVX,
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OTHER),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_UAR),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_QUERY_EQN));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ,
+ UVERBS_TYPE_ALLOC_IDR(devx_obj_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_CREATE),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_DESTROY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_MODIFY),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_OBJ_QUERY));
+
+DECLARE_UVERBS_NAMED_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM,
+ UVERBS_TYPE_ALLOC_IDR(devx_umem_cleanup),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_REG),
+ &UVERBS_METHOD(MLX5_IB_METHOD_DEVX_UMEM_DEREG));
+
+DECLARE_UVERBS_OBJECT_TREE(devx_objects,
+ &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX),
+ &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_OBJ),
+ &UVERBS_OBJECT(MLX5_IB_OBJECT_DEVX_UMEM));
+
+const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void)
+{
+ return &devx_objects;
+}
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e52dd21519b4..d4d894e9f942 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
&props->active_width);
- props->port_cap_flags |= IB_PORT_CM_SUP;
- props->port_cap_flags |= IB_PORT_IP_BASED_GIDS;
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->ip_gids = true;
props->gid_tbl_len = MLX5_CAP_ROCE(dev->mdev,
roce_address_table_size);
@@ -510,12 +510,11 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
vlan_id, port_num);
}
-static int mlx5_ib_add_gid(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
+static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
__always_unused void **context)
{
return set_roce_addr(to_mdev(attr->device), attr->port_num,
- attr->index, gid, attr);
+ attr->index, &attr->gid, attr);
}
static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
@@ -525,41 +524,15 @@ static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
attr->index, NULL, NULL);
}
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
- int index)
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr)
{
- struct ib_gid_attr attr;
- union ib_gid gid;
-
- if (ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr))
- return 0;
-
- dev_put(attr.ndev);
-
- if (attr.gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
+ if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
return 0;
return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
}
-int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
- int index, enum ib_gid_type *gid_type)
-{
- struct ib_gid_attr attr;
- union ib_gid gid;
- int ret;
-
- ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
- if (ret)
- return ret;
-
- dev_put(attr.ndev);
-
- *gid_type = attr.gid_type;
-
- return 0;
-}
-
static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
{
if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -915,7 +888,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
sizeof(struct mlx5_wqe_raddr_seg)) /
sizeof(struct mlx5_wqe_data_seg);
- props->max_sge = min(max_rq_sg, max_sq_sg);
+ props->max_send_sge = max_sq_sg;
+ props->max_recv_sge = max_rq_sg;
props->max_sge_rd = MLX5_MAX_SGE_RD;
props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
@@ -1246,7 +1220,6 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
props->qkey_viol_cntr = rep->qkey_violation_counter;
props->subnet_timeout = rep->subnet_timeout;
props->init_type_reply = rep->init_type_reply;
- props->grh_required = rep->grh_required;
err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
if (err)
@@ -1585,31 +1558,26 @@ error:
return err;
}
-static int deallocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
+static void deallocate_uars(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ucontext *context)
{
struct mlx5_bfreg_info *bfregi;
- int err;
int i;
bfregi = &context->bfregi;
- for (i = 0; i < bfregi->num_sys_pages; i++) {
+ for (i = 0; i < bfregi->num_sys_pages; i++)
if (i < bfregi->num_static_sys_pages ||
- bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX) {
- err = mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
- if (err) {
- mlx5_ib_warn(dev, "failed to free uar %d, err=%d\n", i, err);
- return err;
- }
- }
- }
-
- return 0;
+ bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
+ mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
}
static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
{
int err;
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ return 0;
+
err = mlx5_core_alloc_transport_domain(dev->mdev, tdn);
if (err)
return err;
@@ -1631,6 +1599,9 @@ static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn)
static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn)
{
+ if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
+ return;
+
mlx5_core_dealloc_transport_domain(dev->mdev, tdn);
if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
@@ -1660,6 +1631,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
int err;
size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
max_cqe_version);
+ u32 dump_fill_mkey;
bool lib_uar_4k;
if (!dev->ib_active)
@@ -1676,8 +1648,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
if (err)
return ERR_PTR(err);
- if (req.flags)
- return ERR_PTR(-EINVAL);
+ if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
+ return ERR_PTR(-EOPNOTSUPP);
if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
return ERR_PTR(-EOPNOTSUPP);
@@ -1755,10 +1727,26 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
#endif
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain)) {
- err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
+ err = mlx5_ib_alloc_transport_domain(dev, &context->tdn);
+ if (err)
+ goto out_uars;
+
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
+ /* Block DEVX on Infiniband as of SELinux */
+ if (mlx5_ib_port_link_layer(ibdev, 1) != IB_LINK_LAYER_ETHERNET) {
+ err = -EPERM;
+ goto out_td;
+ }
+
+ err = mlx5_ib_devx_create(dev, context);
+ if (err)
+ goto out_td;
+ }
+
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+ err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
if (err)
- goto out_uars;
+ goto out_mdev;
}
INIT_LIST_HEAD(&context->vma_private_list);
@@ -1819,9 +1807,18 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
resp.response_length += sizeof(resp.num_dyn_bfregs);
}
+ if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
+ if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
+ resp.dump_fill_mkey = dump_fill_mkey;
+ resp.comp_mask |=
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
+ }
+ resp.response_length += sizeof(resp.dump_fill_mkey);
+ }
+
err = ib_copy_to_udata(udata, &resp, resp.response_length);
if (err)
- goto out_td;
+ goto out_mdev;
bfregi->ver = ver;
bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
@@ -1831,9 +1828,11 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
return &context->ibucontext;
+out_mdev:
+ if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
+ mlx5_ib_devx_destroy(dev, context);
out_td:
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
out_uars:
deallocate_uars(dev, context);
@@ -1856,9 +1855,11 @@ static int mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
struct mlx5_bfreg_info *bfregi;
+ if (context->devx_uid)
+ mlx5_ib_devx_destroy(dev, context);
+
bfregi = &context->bfregi;
- if (MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
- mlx5_ib_dealloc_transport_domain(dev, context->tdn);
+ mlx5_ib_dealloc_transport_domain(dev, context->tdn);
deallocate_uars(dev, context);
kfree(bfregi->sys_pages);
@@ -2040,7 +2041,7 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct mlx5_bfreg_info *bfregi = &context->bfregi;
int err;
unsigned long idx;
- phys_addr_t pfn, pa;
+ phys_addr_t pfn;
pgprot_t prot;
u32 bfreg_dyn_idx = 0;
u32 uar_index;
@@ -2131,8 +2132,6 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
goto err;
}
- pa = pfn << PAGE_SHIFT;
-
err = mlx5_ib_set_vma_data(vma, context);
if (err)
goto err;
@@ -2699,7 +2698,7 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,
IPPROTO_GRE);
MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
- 0xffff);
+ ntohs(ib_spec->gre.mask.protocol));
MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
ntohs(ib_spec->gre.val.protocol));
@@ -4460,7 +4459,8 @@ static void destroy_dev_resources(struct mlx5_ib_resources *devr)
cancel_work_sync(&devr->ports[port].pkey_change_work);
}
-static u32 get_core_cap_flags(struct ib_device *ibdev)
+static u32 get_core_cap_flags(struct ib_device *ibdev,
+ struct mlx5_hca_vport_context *rep)
{
struct mlx5_ib_dev *dev = to_mdev(ibdev);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, 1);
@@ -4469,11 +4469,14 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
bool raw_support = !mlx5_core_mp_enabled(dev->mdev);
u32 ret = 0;
+ if (rep->grh_required)
+ ret |= RDMA_CORE_CAP_IB_GRH_REQUIRED;
+
if (ll == IB_LINK_LAYER_INFINIBAND)
- return RDMA_CORE_PORT_IBA_IB;
+ return ret | RDMA_CORE_PORT_IBA_IB;
if (raw_support)
- ret = RDMA_CORE_PORT_RAW_PACKET;
+ ret |= RDMA_CORE_PORT_RAW_PACKET;
if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
return ret;
@@ -4496,17 +4499,23 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
struct ib_port_attr attr;
struct mlx5_ib_dev *dev = to_mdev(ibdev);
enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
+ struct mlx5_hca_vport_context rep = {0};
int err;
- immutable->core_cap_flags = get_core_cap_flags(ibdev);
-
err = ib_query_port(ibdev, port_num, &attr);
if (err)
return err;
+ if (ll == IB_LINK_LAYER_INFINIBAND) {
+ err = mlx5_query_hca_vport_context(dev->mdev, 0, port_num, 0,
+ &rep);
+ if (err)
+ return err;
+ }
+
immutable->pkey_tbl_len = attr.pkey_tbl_len;
immutable->gid_tbl_len = attr.gid_tbl_len;
- immutable->core_cap_flags = get_core_cap_flags(ibdev);
+ immutable->core_cap_flags = get_core_cap_flags(ibdev, &rep);
if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce))
immutable->max_mad_size = IB_MGMT_MAD_SIZE;
@@ -4604,7 +4613,7 @@ static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num)
}
}
-static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num)
+static int mlx5_enable_eth(struct mlx5_ib_dev *dev)
{
int err;
@@ -4683,12 +4692,21 @@ static const struct mlx5_ib_counter extended_err_cnts[] = {
INIT_Q_COUNTER(req_cqe_flush_error),
};
+#define INIT_EXT_PPCNT_COUNTER(_name) \
+ { .name = #_name, .offset = \
+ MLX5_BYTE_OFF(ppcnt_reg, \
+ counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
+
+static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
+ INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
+};
+
static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
{
int i;
for (i = 0; i < dev->num_ports; i++) {
- if (dev->port[i].cnts.set_id)
+ if (dev->port[i].cnts.set_id_valid)
mlx5_core_dealloc_q_counter(dev->mdev,
dev->port[i].cnts.set_id);
kfree(dev->port[i].cnts.names);
@@ -4718,7 +4736,10 @@ static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
num_counters += ARRAY_SIZE(cong_cnts);
}
-
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
+ num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
+ }
cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL);
if (!cnts->names)
return -ENOMEM;
@@ -4775,6 +4796,13 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
offsets[j] = cong_cnts[i].offset;
}
}
+
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
+ names[j] = ext_ppcnt_cnts[i].name;
+ offsets[j] = ext_ppcnt_cnts[i].offset;
+ }
+ }
}
static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
@@ -4820,7 +4848,8 @@ static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
return rdma_alloc_hw_stats_struct(port->cnts.names,
port->cnts.num_q_counters +
- port->cnts.num_cong_counters,
+ port->cnts.num_cong_counters +
+ port->cnts.num_ext_ppcnt_counters,
RDMA_HW_STATS_DEFAULT_LIFESPAN);
}
@@ -4853,6 +4882,34 @@ free:
return ret;
}
+static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_port *port,
+ struct rdma_hw_stats *stats)
+{
+ int offset = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+ int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+ int ret, i;
+ void *out;
+
+ out = kvzalloc(sz, GFP_KERNEL);
+ if (!out)
+ return -ENOMEM;
+
+ ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out);
+ if (ret)
+ goto free;
+
+ for (i = 0; i < port->cnts.num_ext_ppcnt_counters; i++) {
+ stats->value[i + offset] =
+ be64_to_cpup((__be64 *)(out +
+ port->cnts.offsets[i + offset]));
+ }
+
+free:
+ kvfree(out);
+ return ret;
+}
+
static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
struct rdma_hw_stats *stats,
u8 port_num, int index)
@@ -4866,13 +4923,21 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
if (!stats)
return -EINVAL;
- num_counters = port->cnts.num_q_counters + port->cnts.num_cong_counters;
+ num_counters = port->cnts.num_q_counters +
+ port->cnts.num_cong_counters +
+ port->cnts.num_ext_ppcnt_counters;
/* q_counters are per IB device, query the master mdev */
ret = mlx5_ib_query_q_counters(dev->mdev, port, stats);
if (ret)
return ret;
+ if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
+ ret = mlx5_ib_query_ext_ppcnt_counters(dev, port, stats);
+ if (ret)
+ return ret;
+ }
+
if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
&mdev_port_num);
@@ -5257,22 +5322,26 @@ static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev)
mlx5_nic_vport_disable_roce(dev->mdev);
}
-ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_dm, UVERBS_OBJECT_DM,
- UVERBS_METHOD_DM_ALLOC,
- &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)),
- &UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
- UVERBS_ATTR_TYPE(u16),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-ADD_UVERBS_ATTRIBUTES_SIMPLE(mlx5_ib_flow_action, UVERBS_OBJECT_FLOW_ACTION,
- UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
- &UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
- UVERBS_ATTR_TYPE(u64),
- UA_FLAGS(UVERBS_ATTR_SPEC_F_MANDATORY)));
-
-#define NUM_TREES 2
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_dm,
+ UVERBS_OBJECT_DM,
+ UVERBS_METHOD_DM_ALLOC,
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY),
+ UVERBS_ATTR_PTR_OUT(MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
+ UVERBS_ATTR_TYPE(u16),
+ UA_MANDATORY));
+
+ADD_UVERBS_ATTRIBUTES_SIMPLE(
+ mlx5_ib_flow_action,
+ UVERBS_OBJECT_FLOW_ACTION,
+ UVERBS_METHOD_FLOW_ACTION_ESP_CREATE,
+ UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS,
+ UVERBS_ATTR_TYPE(u64),
+ UA_MANDATORY));
+
+#define NUM_TREES 3
static int populate_specs_root(struct mlx5_ib_dev *dev)
{
const struct uverbs_object_tree_def *default_root[NUM_TREES + 1] = {
@@ -5287,15 +5356,20 @@ static int populate_specs_root(struct mlx5_ib_dev *dev)
!WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
default_root[num_trees++] = &mlx5_ib_dm;
- dev->ib_dev.specs_root =
+ if (MLX5_CAP_GEN_64(dev->mdev, general_obj_types) &
+ MLX5_GENERAL_OBJ_TYPES_CAP_UCTX &&
+ !WARN_ON(num_trees >= ARRAY_SIZE(default_root)))
+ default_root[num_trees++] = mlx5_ib_get_devx_tree();
+
+ dev->ib_dev.driver_specs_root =
uverbs_alloc_spec_tree(num_trees, default_root);
- return PTR_ERR_OR_ZERO(dev->ib_dev.specs_root);
+ return PTR_ERR_OR_ZERO(dev->ib_dev.driver_specs_root);
}
static void depopulate_specs_root(struct mlx5_ib_dev *dev)
{
- uverbs_free_spec_tree(dev->ib_dev.specs_root);
+ uverbs_free_spec_tree(dev->ib_dev.driver_specs_root);
}
static int mlx5_ib_read_counters(struct ib_counters *counters,
@@ -5546,6 +5620,8 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)
dev->ib_dev.modify_qp = mlx5_ib_modify_qp;
dev->ib_dev.query_qp = mlx5_ib_query_qp;
dev->ib_dev.destroy_qp = mlx5_ib_destroy_qp;
+ dev->ib_dev.drain_sq = mlx5_ib_drain_sq;
+ dev->ib_dev.drain_rq = mlx5_ib_drain_rq;
dev->ib_dev.post_send = mlx5_ib_post_send;
dev->ib_dev.post_recv = mlx5_ib_post_recv;
dev->ib_dev.create_cq = mlx5_ib_create_cq;
@@ -5643,9 +5719,9 @@ int mlx5_ib_stage_rep_non_default_cb(struct mlx5_ib_dev *dev)
return 0;
}
-static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
- u8 port_num)
+static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev)
{
+ u8 port_num;
int i;
for (i = 0; i < dev->num_ports; i++) {
@@ -5668,6 +5744,8 @@ static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev,
(1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) |
(1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL);
+ port_num = mlx5_core_native_port_num(dev->mdev) - 1;
+
return mlx5_add_netdev_notifier(dev, port_num);
}
@@ -5684,14 +5762,12 @@ int mlx5_ib_stage_rep_roce_init(struct mlx5_ib_dev *dev)
enum rdma_link_layer ll;
int port_type_cap;
int err = 0;
- u8 port_num;
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET)
- err = mlx5_ib_stage_common_roce_init(dev, port_num);
+ err = mlx5_ib_stage_common_roce_init(dev);
return err;
}
@@ -5706,19 +5782,17 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
- u8 port_num;
int err;
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
if (ll == IB_LINK_LAYER_ETHERNET) {
- err = mlx5_ib_stage_common_roce_init(dev, port_num);
+ err = mlx5_ib_stage_common_roce_init(dev);
if (err)
return err;
- err = mlx5_enable_eth(dev, port_num);
+ err = mlx5_enable_eth(dev);
if (err)
goto cleanup;
}
@@ -5735,9 +5809,7 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev)
struct mlx5_core_dev *mdev = dev->mdev;
enum rdma_link_layer ll;
int port_type_cap;
- u8 port_num;
- port_num = mlx5_core_native_port_num(dev->mdev) - 1;
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
@@ -5909,8 +5981,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
ib_dealloc_device((struct ib_device *)dev);
}
-static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num);
-
void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
const struct mlx5_ib_profile *profile)
{
@@ -6040,7 +6110,7 @@ static const struct mlx5_ib_profile nic_rep_profile = {
mlx5_ib_stage_rep_reg_cleanup),
};
-static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
+static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
{
struct mlx5_ib_multiport_info *mpi;
struct mlx5_ib_dev *dev;
@@ -6074,8 +6144,6 @@ static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num)
if (!bound) {
list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list);
dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n");
- } else {
- mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1);
}
mutex_unlock(&mlx5_ib_multiport_mutex);
@@ -6093,11 +6161,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
port_type_cap = MLX5_CAP_GEN(mdev, port_type);
ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap);
- if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) {
- u8 port_num = mlx5_core_native_port_num(mdev) - 1;
-
- return mlx5_ib_add_slave_port(mdev, port_num);
- }
+ if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET)
+ return mlx5_ib_add_slave_port(mdev);
dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev));
if (!dev)
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index d89c8fe626f6..04a5d82c9cf3 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -78,12 +78,6 @@ enum {
MLX5_REQ_SCAT_DATA64_CQE = 0x22,
};
-enum mlx5_ib_latency_class {
- MLX5_IB_LATENCY_CLASS_LOW,
- MLX5_IB_LATENCY_CLASS_MEDIUM,
- MLX5_IB_LATENCY_CLASS_HIGH,
-};
-
enum mlx5_ib_mad_ifc_flags {
MLX5_MAD_IFC_IGNORE_MKEY = 1,
MLX5_MAD_IFC_IGNORE_BKEY = 2,
@@ -143,6 +137,7 @@ struct mlx5_ib_ucontext {
u64 lib_caps;
DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
+ u16 devx_uid;
};
static inline struct mlx5_ib_ucontext *to_mucontext(struct ib_ucontext *ibucontext)
@@ -665,6 +660,7 @@ struct mlx5_ib_counters {
size_t *offsets;
u32 num_q_counters;
u32 num_cong_counters;
+ u32 num_ext_ppcnt_counters;
u16 set_id;
bool set_id_valid;
};
@@ -1014,6 +1010,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
struct ib_qp_init_attr *qp_init_attr);
int mlx5_ib_destroy_qp(struct ib_qp *qp);
+void mlx5_ib_drain_sq(struct ib_qp *qp);
+void mlx5_ib_drain_rq(struct ib_qp *qp);
int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
struct ib_send_wr **bad_wr);
int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
@@ -1183,10 +1181,8 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
u64 guid, int type);
-__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
- int index);
-int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
- int index, enum ib_gid_type *gid_type);
+__be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
+ const struct ib_gid_attr *attr);
void mlx5_ib_cleanup_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
int mlx5_ib_init_cong_debugfs(struct mlx5_ib_dev *dev, u8 port_num);
@@ -1217,6 +1213,21 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev,
void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev,
u8 port_num);
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+int mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ucontext *context);
+void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ucontext *context);
+const struct uverbs_object_tree_def *mlx5_ib_get_devx_tree(void);
+#else
+static inline int
+mlx5_ib_devx_create(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ucontext *context) { return -EOPNOTSUPP; };
+static inline void mlx5_ib_devx_destroy(struct mlx5_ib_dev *dev,
+ struct mlx5_ib_ucontext *context) {}
+static inline const struct uverbs_object_tree_def *
+mlx5_ib_get_devx_tree(void) { return NULL; }
+#endif
static inline void init_query_mad(struct ib_smp *mad)
{
mad->base_version = 1;
@@ -1318,4 +1329,7 @@ static inline int get_num_static_uars(struct mlx5_ib_dev *dev,
unsigned long mlx5_ib_get_xlt_emergency_page(void);
void mlx5_ib_put_xlt_emergency_page(void);
+int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, u32 bfregn,
+ bool dyn_bfreg);
#endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 90a9c461cedc..308456d28afb 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -271,16 +271,16 @@ static ssize_t size_write(struct file *filp, const char __user *buf,
{
struct mlx5_cache_ent *ent = filp->private_data;
struct mlx5_ib_dev *dev = ent->dev;
- char lbuf[20];
+ char lbuf[20] = {0};
u32 var;
int err;
int c;
- if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+ count = min(count, sizeof(lbuf) - 1);
+ if (copy_from_user(lbuf, buf, count))
return -EFAULT;
c = order2idx(dev, ent->order);
- lbuf[sizeof(lbuf) - 1] = 0;
if (sscanf(lbuf, "%u", &var) != 1)
return -EINVAL;
@@ -310,19 +310,11 @@ static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
- if (*pos)
- return 0;
-
err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
if (err < 0)
return err;
- if (copy_to_user(buf, lbuf, err))
- return -EFAULT;
-
- *pos += err;
-
- return err;
+ return simple_read_from_buffer(buf, count, pos, lbuf, err);
}
static const struct file_operations size_fops = {
@@ -337,16 +329,16 @@ static ssize_t limit_write(struct file *filp, const char __user *buf,
{
struct mlx5_cache_ent *ent = filp->private_data;
struct mlx5_ib_dev *dev = ent->dev;
- char lbuf[20];
+ char lbuf[20] = {0};
u32 var;
int err;
int c;
- if (copy_from_user(lbuf, buf, sizeof(lbuf)))
+ count = min(count, sizeof(lbuf) - 1);
+ if (copy_from_user(lbuf, buf, count))
return -EFAULT;
c = order2idx(dev, ent->order);
- lbuf[sizeof(lbuf) - 1] = 0;
if (sscanf(lbuf, "%u", &var) != 1)
return -EINVAL;
@@ -372,19 +364,11 @@ static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
char lbuf[20];
int err;
- if (*pos)
- return 0;
-
err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
if (err < 0)
return err;
- if (copy_to_user(buf, lbuf, err))
- return -EFAULT;
-
- *pos += err;
-
- return err;
+ return simple_read_from_buffer(buf, count, pos, lbuf, err);
}
static const struct file_operations limit_fops = {
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index a4f1f638509f..d4414015b64f 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -563,32 +563,21 @@ static int alloc_med_class_bfreg(struct mlx5_ib_dev *dev,
}
static int alloc_bfreg(struct mlx5_ib_dev *dev,
- struct mlx5_bfreg_info *bfregi,
- enum mlx5_ib_latency_class lat)
+ struct mlx5_bfreg_info *bfregi)
{
- int bfregn = -EINVAL;
+ int bfregn = -ENOMEM;
mutex_lock(&bfregi->lock);
- switch (lat) {
- case MLX5_IB_LATENCY_CLASS_LOW:
+ if (bfregi->ver >= 2) {
+ bfregn = alloc_high_class_bfreg(dev, bfregi);
+ if (bfregn < 0)
+ bfregn = alloc_med_class_bfreg(dev, bfregi);
+ }
+
+ if (bfregn < 0) {
BUILD_BUG_ON(NUM_NON_BLUE_FLAME_BFREGS != 1);
bfregn = 0;
bfregi->count[bfregn]++;
- break;
-
- case MLX5_IB_LATENCY_CLASS_MEDIUM:
- if (bfregi->ver < 2)
- bfregn = -ENOMEM;
- else
- bfregn = alloc_med_class_bfreg(dev, bfregi);
- break;
-
- case MLX5_IB_LATENCY_CLASS_HIGH:
- if (bfregi->ver < 2)
- bfregn = -ENOMEM;
- else
- bfregn = alloc_high_class_bfreg(dev, bfregi);
- break;
}
mutex_unlock(&bfregi->lock);
@@ -641,13 +630,13 @@ static void mlx5_ib_lock_cqs(struct mlx5_ib_cq *send_cq,
static void mlx5_ib_unlock_cqs(struct mlx5_ib_cq *send_cq,
struct mlx5_ib_cq *recv_cq);
-static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
- struct mlx5_bfreg_info *bfregi, int bfregn,
- bool dyn_bfreg)
+int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
+ struct mlx5_bfreg_info *bfregi, u32 bfregn,
+ bool dyn_bfreg)
{
- int bfregs_per_sys_page;
- int index_of_sys_page;
- int offset;
+ unsigned int bfregs_per_sys_page;
+ u32 index_of_sys_page;
+ u32 offset;
bfregs_per_sys_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k) *
MLX5_NON_FP_BFREGS_PER_UAR;
@@ -655,6 +644,10 @@ static int bfregn_to_uar_index(struct mlx5_ib_dev *dev,
if (dyn_bfreg) {
index_of_sys_page += bfregi->num_static_sys_pages;
+
+ if (index_of_sys_page >= bfregi->num_sys_pages)
+ return -EINVAL;
+
if (bfregn > bfregi->num_dyn_bfregs ||
bfregi->sys_pages[index_of_sys_page] == MLX5_IB_INVALID_UAR_INDEX) {
mlx5_ib_dbg(dev, "Invalid dynamic uar index\n");
@@ -819,21 +812,9 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
bfregn = MLX5_CROSS_CHANNEL_BFREG;
}
else {
- bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_HIGH);
- if (bfregn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate low latency BFREG\n");
- mlx5_ib_dbg(dev, "reverting to medium latency\n");
- bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_MEDIUM);
- if (bfregn < 0) {
- mlx5_ib_dbg(dev, "failed to allocate medium latency BFREG\n");
- mlx5_ib_dbg(dev, "reverting to high latency\n");
- bfregn = alloc_bfreg(dev, &context->bfregi, MLX5_IB_LATENCY_CLASS_LOW);
- if (bfregn < 0) {
- mlx5_ib_warn(dev, "bfreg allocation failed\n");
- return bfregn;
- }
- }
- }
+ bfregn = alloc_bfreg(dev, &context->bfregi);
+ if (bfregn < 0)
+ return bfregn;
}
mlx5_ib_dbg(dev, "bfregn 0x%x, uar_index 0x%x\n", bfregn, uar_index);
@@ -2555,18 +2536,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
if (ah->type == RDMA_AH_ATTR_TYPE_ROCE) {
if (!(ah_flags & IB_AH_GRH))
return -EINVAL;
- err = mlx5_get_roce_gid_type(dev, port, grh->sgid_index,
- &gid_type);
- if (err)
- return err;
+
memcpy(path->rmac, ah->roce.dmac, sizeof(ah->roce.dmac));
if (qp->ibqp.qp_type == IB_QPT_RC ||
qp->ibqp.qp_type == IB_QPT_UC ||
qp->ibqp.qp_type == IB_QPT_XRC_INI ||
qp->ibqp.qp_type == IB_QPT_XRC_TGT)
- path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
- grh->sgid_index);
+ path->udp_sport =
+ mlx5_get_roce_udp_sport(dev, ah->grh.sgid_attr);
path->dci_cfi_prio_sl = (sl & 0x7) << 4;
+ gid_type = ah->grh.sgid_attr->gid_type;
if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
path->ecn_dscp = (grh->traffic_class >> 2) & 0x3f;
} else {
@@ -4360,9 +4339,8 @@ static void finish_wqe(struct mlx5_ib_qp *qp,
qp->sq.w_list[idx].next = qp->sq.cur_post;
}
-
-int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
- struct ib_send_wr **bad_wr)
+static int _mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr, bool drain)
{
struct mlx5_wqe_ctrl_seg *ctrl = NULL; /* compiler warning */
struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
@@ -4393,7 +4371,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
spin_lock_irqsave(&qp->sq.lock, flags);
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
@@ -4690,13 +4668,19 @@ out:
return err;
}
+int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ return _mlx5_ib_post_send(ibqp, wr, bad_wr, false);
+}
+
static void set_sig_seg(struct mlx5_rwqe_sig *sig, int size)
{
sig->signature = calc_sig(sig, size);
}
-int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
- struct ib_recv_wr **bad_wr)
+static int _mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr, bool drain)
{
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_data_seg *scat;
@@ -4714,7 +4698,7 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
spin_lock_irqsave(&qp->rq.lock, flags);
- if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR && !drain) {
err = -EIO;
*bad_wr = wr;
nreq = 0;
@@ -4776,6 +4760,12 @@ out:
return err;
}
+int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ return _mlx5_ib_post_recv(ibqp, wr, bad_wr, false);
+}
+
static inline enum ib_qp_state to_ib_qp_state(enum mlx5_qp_state mlx5_state)
{
switch (mlx5_state) {
@@ -5697,3 +5687,131 @@ out:
kvfree(in);
return err;
}
+
+struct mlx5_ib_drain_cqe {
+ struct ib_cqe cqe;
+ struct completion done;
+};
+
+static void mlx5_ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct mlx5_ib_drain_cqe *cqe = container_of(wc->wr_cqe,
+ struct mlx5_ib_drain_cqe,
+ cqe);
+
+ complete(&cqe->done);
+}
+
+/* This function returns only once the drained WR was completed */
+static void handle_drain_completion(struct ib_cq *cq,
+ struct mlx5_ib_drain_cqe *sdrain,
+ struct mlx5_ib_dev *dev)
+{
+ struct mlx5_core_dev *mdev = dev->mdev;
+
+ if (cq->poll_ctx == IB_POLL_DIRECT) {
+ while (wait_for_completion_timeout(&sdrain->done, HZ / 10) <= 0)
+ ib_process_cq_direct(cq, -1);
+ return;
+ }
+
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ struct mlx5_ib_cq *mcq = to_mcq(cq);
+ bool triggered = false;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+ /* Make sure that the CQ handler won't run if wasn't run yet */
+ if (!mcq->mcq.reset_notify_added)
+ mcq->mcq.reset_notify_added = 1;
+ else
+ triggered = true;
+ spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
+
+ if (triggered) {
+ /* Wait for any scheduled/running task to be ended */
+ switch (cq->poll_ctx) {
+ case IB_POLL_SOFTIRQ:
+ irq_poll_disable(&cq->iop);
+ irq_poll_enable(&cq->iop);
+ break;
+ case IB_POLL_WORKQUEUE:
+ cancel_work_sync(&cq->work);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+ }
+
+ /* Run the CQ handler - this makes sure that the drain WR will
+ * be processed if wasn't processed yet.
+ */
+ mcq->mcq.comp(&mcq->mcq);
+ }
+
+ wait_for_completion(&sdrain->done);
+}
+
+void mlx5_ib_drain_sq(struct ib_qp *qp)
+{
+ struct ib_cq *cq = qp->send_cq;
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct mlx5_ib_drain_cqe sdrain;
+ struct ib_send_wr *bad_swr;
+ struct ib_rdma_wr swr = {
+ .wr = {
+ .next = NULL,
+ { .wr_cqe = &sdrain.cqe, },
+ .opcode = IB_WR_RDMA_WRITE,
+ },
+ };
+ int ret;
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ sdrain.cqe.done = mlx5_ib_drain_qp_done;
+ init_completion(&sdrain.done);
+
+ ret = _mlx5_ib_post_send(qp, &swr.wr, &bad_swr, true);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &sdrain, dev);
+}
+
+void mlx5_ib_drain_rq(struct ib_qp *qp)
+{
+ struct ib_cq *cq = qp->recv_cq;
+ struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+ struct mlx5_ib_drain_cqe rdrain;
+ struct ib_recv_wr rwr = {}, *bad_rwr;
+ int ret;
+ struct mlx5_ib_dev *dev = to_mdev(qp->device);
+ struct mlx5_core_dev *mdev = dev->mdev;
+
+ ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+ if (ret && mdev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ rwr.wr_cqe = &rdrain.cqe;
+ rdrain.cqe.done = mlx5_ib_drain_qp_done;
+ init_completion(&rdrain.done);
+
+ ret = _mlx5_ib_post_recv(qp, &rwr, &bad_rwr, true);
+ if (ret) {
+ WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+ return;
+ }
+
+ handle_drain_completion(cq, &rdrain, dev);
+}
diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c
index e7f6223e9c60..0823c0bc7e73 100644
--- a/drivers/infiniband/hw/mthca/mthca_av.c
+++ b/drivers/infiniband/hw/mthca/mthca_av.c
@@ -281,10 +281,7 @@ int mthca_read_ah(struct mthca_dev *dev, struct mthca_ah *ah,
header->grh.flow_label =
ah->av->sl_tclass_flowlabel & cpu_to_be32(0xfffff);
header->grh.hop_limit = ah->av->hop_limit;
- ib_get_cached_gid(&dev->ib_dev,
- be32_to_cpu(ah->av->port_pd) >> 24,
- ah->av->gid_index % dev->limits.gid_table_len,
- &header->grh.source_gid, NULL);
+ header->grh.source_gid = ah->ibah.sgid_attr->gid;
memcpy(header->grh.destination_gid.raw,
ah->av->dgid, 16);
}
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index 541f237965c7..20febafc1fdd 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -96,8 +96,9 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr
props->page_size_cap = mdev->limits.page_size_cap;
props->max_qp = mdev->limits.num_qps - mdev->limits.reserved_qps;
props->max_qp_wr = mdev->limits.max_wqes;
- props->max_sge = mdev->limits.max_sg;
- props->max_sge_rd = props->max_sge;
+ props->max_send_sge = mdev->limits.max_sg;
+ props->max_recv_sge = mdev->limits.max_sg;
+ props->max_sge_rd = mdev->limits.max_sg;
props->max_cq = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
props->max_cqe = mdev->limits.max_cqes;
props->max_mr = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index 00c27291dc26..bedaa02749fb 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -159,7 +159,7 @@ do { \
#define NES_EVENT_TIMEOUT 1200000
#else
-#define nes_debug(level, fmt, args...)
+#define nes_debug(level, fmt, args...) no_printk(fmt, ##args)
#define assert(expr) do {} while (0)
#define NES_EVENT_TIMEOUT 100000
diff --git a/drivers/infiniband/hw/nes/nes_cm.c b/drivers/infiniband/hw/nes/nes_cm.c
index 6cdfbf8c5674..2b67ace5b614 100644
--- a/drivers/infiniband/hw/nes/nes_cm.c
+++ b/drivers/infiniband/hw/nes/nes_cm.c
@@ -58,6 +58,7 @@
#include <net/neighbour.h>
#include <net/route.h>
#include <net/ip_fib.h>
+#include <net/secure_seq.h>
#include <net/tcp.h>
#include <linux/fcntl.h>
@@ -1445,7 +1446,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
struct nes_cm_listener *listener)
{
struct nes_cm_node *cm_node;
- struct timespec ts;
int oldarpindex = 0;
int arpindex = 0;
struct nes_device *nesdev;
@@ -1496,8 +1496,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
cm_node->tcp_cntxt.rcv_wscale = NES_CM_DEFAULT_RCV_WND_SCALE;
cm_node->tcp_cntxt.rcv_wnd = NES_CM_DEFAULT_RCV_WND_SCALED >>
NES_CM_DEFAULT_RCV_WND_SCALE;
- ts = current_kernel_time();
- cm_node->tcp_cntxt.loc_seq_num = htonl(ts.tv_nsec);
+ cm_node->tcp_cntxt.loc_seq_num = secure_tcp_seq(htonl(cm_node->loc_addr),
+ htonl(cm_node->rem_addr),
+ htons(cm_node->loc_port),
+ htons(cm_node->rem_port));
cm_node->tcp_cntxt.mss = nesvnic->max_frame_size - sizeof(struct iphdr) -
sizeof(struct tcphdr) - ETH_HLEN - VLAN_HLEN;
cm_node->tcp_cntxt.rcv_nxt = 0;
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 18a7de1c3923..bd0675d8f298 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -70,8 +70,7 @@ static void nes_process_mac_intr(struct nes_device *nesdev, u32 mac_number);
static unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode);
static void nes_terminate_start_timer(struct nes_qp *nesqp);
-#ifdef CONFIG_INFINIBAND_NES_DEBUG
-static unsigned char *nes_iwarp_state_str[] = {
+static const char *const nes_iwarp_state_str[] = {
"Non-Existent",
"Idle",
"RTS",
@@ -82,7 +81,7 @@ static unsigned char *nes_iwarp_state_str[] = {
"RSVD2",
};
-static unsigned char *nes_tcp_state_str[] = {
+static const char *const nes_tcp_state_str[] = {
"Non-Existent",
"Closed",
"Listen",
@@ -100,7 +99,6 @@ static unsigned char *nes_tcp_state_str[] = {
"RSVD3",
"RSVD4",
};
-#endif
static inline void print_ip(struct nes_cm_node *cm_node)
{
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 32f26556c808..82b8f9630ee8 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -436,7 +436,8 @@ static int nes_query_device(struct ib_device *ibdev, struct ib_device_attr *prop
props->max_mr_size = 0x80000000;
props->max_qp = nesibdev->max_qp;
props->max_qp_wr = nesdev->nesadapter->max_qp_wr - 2;
- props->max_sge = nesdev->nesadapter->max_sge;
+ props->max_send_sge = nesdev->nesadapter->max_sge;
+ props->max_recv_sge = nesdev->nesadapter->max_sge;
props->max_cq = nesibdev->max_cq;
props->max_cqe = nesdev->nesadapter->max_cqe;
props->max_mr = nesibdev->max_mr;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index 3897b64532e1..8cc9459a9f9b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
@@ -71,7 +71,7 @@ static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
}
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
- struct rdma_ah_attr *attr, union ib_gid *sgid,
+ struct rdma_ah_attr *attr, const union ib_gid *sgid,
int pdid, bool *isvlan, u16 vlan_tag)
{
int status;
@@ -164,17 +164,14 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
struct ocrdma_ah *ah;
bool isvlan = false;
u16 vlan_tag = 0xffff;
- struct ib_gid_attr sgid_attr;
+ const struct ib_gid_attr *sgid_attr;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
- const struct ib_global_route *grh;
- union ib_gid sgid;
if ((attr->type != RDMA_AH_ATTR_TYPE_ROCE) ||
!(rdma_ah_get_ah_flags(attr) & IB_AH_GRH))
return ERR_PTR(-EINVAL);
- grh = rdma_ah_read_grh(attr);
if (atomic_cmpxchg(&dev->update_sl, 1, 0))
ocrdma_init_service_level(dev);
@@ -186,20 +183,15 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
if (status)
goto av_err;
- status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index, &sgid,
- &sgid_attr);
- if (status) {
- pr_err("%s(): Failed to query sgid, status = %d\n",
- __func__, status);
- goto av_conf_err;
- }
- if (is_vlan_dev(sgid_attr.ndev))
- vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
- dev_put(sgid_attr.ndev);
+ sgid_attr = attr->grh.sgid_attr;
+ if (is_vlan_dev(sgid_attr->ndev))
+ vlan_tag = vlan_dev_vlan_id(sgid_attr->ndev);
+
/* Get network header type for this GID */
- ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ ah->hdr_type = rdma_gid_attr_network_type(sgid_attr);
- status = set_av_attr(dev, ah, attr, &sgid, pd->id, &isvlan, vlan_tag);
+ status = set_av_attr(dev, ah, attr, &sgid_attr->gid, pd->id,
+ &isvlan, vlan_tag);
if (status)
goto av_conf_err;
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
index 6c136e5017fe..c6c87cba943b 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c
@@ -2494,8 +2494,7 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
{
int status;
struct rdma_ah_attr *ah_attr = &attrs->ah_attr;
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
+ const struct ib_gid_attr *sgid_attr;
u32 vlan_id = 0xFFFF;
u8 mac_addr[6], hdr_type;
union {
@@ -2525,25 +2524,23 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
memcpy(&cmd->params.dgid[0], &grh->dgid.raw[0],
sizeof(cmd->params.dgid));
- status = ib_get_cached_gid(&dev->ibdev, 1, grh->sgid_index,
- &sgid, &sgid_attr);
- if (!status) {
- vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
- memcpy(mac_addr, sgid_attr.ndev->dev_addr, ETH_ALEN);
- dev_put(sgid_attr.ndev);
- }
+ sgid_attr = ah_attr->grh.sgid_attr;
+ vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
+ memcpy(mac_addr, sgid_attr->ndev->dev_addr, ETH_ALEN);
qp->sgid_idx = grh->sgid_index;
- memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
+ memcpy(&cmd->params.sgid[0], &sgid_attr->gid.raw[0],
+ sizeof(cmd->params.sgid));
status = ocrdma_resolve_dmac(dev, ah_attr, &mac_addr[0]);
if (status)
return status;
+
cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
(mac_addr[2] << 16) | (mac_addr[3] << 24);
- hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+ hdr_type = rdma_gid_attr_network_type(sgid_attr);
if (hdr_type == RDMA_NETWORK_IPV4) {
- rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
+ rdma_gid2ip(&sgid_addr._sockaddr, &sgid_attr->gid);
rdma_gid2ip(&dgid_addr._sockaddr, &grh->dgid);
memcpy(&cmd->params.dgid[0],
&dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 82e20fc32890..86b22f6b7271 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
@@ -89,7 +89,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
IB_DEVICE_SYS_IMAGE_GUID |
IB_DEVICE_LOCAL_DMA_LKEY |
IB_DEVICE_MEM_MGT_EXTENSIONS;
- attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge);
+ attr->max_send_sge = dev->attr.max_send_sge;
+ attr->max_recv_sge = dev->attr.max_recv_sge;
attr->max_sge_rd = dev->attr.max_rdma_sge;
attr->max_cq = dev->attr.max_cq;
attr->max_cqe = dev->attr.max_cqe;
@@ -196,11 +197,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
props->sm_lid = 0;
props->sm_sl = 0;
props->state = port_state;
- props->port_cap_flags =
- IB_PORT_CM_SUP |
- IB_PORT_REINIT_SUP |
- IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP |
- IB_PORT_IP_BASED_GIDS;
+ props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
+ IB_PORT_DEVICE_MGMT_SUP |
+ IB_PORT_VENDOR_CLASS_SUP;
+ props->ip_gids = true;
props->gid_tbl_len = OCRDMA_MAX_SGID;
props->pkey_tbl_len = 1;
props->bad_pkey_cntr = 0;
@@ -1774,13 +1774,13 @@ int ocrdma_destroy_qp(struct ib_qp *ibqp)
* protect against proessing in-flight CQEs for this QP.
*/
spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
- if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+ if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
spin_lock(&qp->rq_cq->cq_lock);
-
- ocrdma_del_qpn_map(dev, qp);
-
- if (qp->rq_cq && (qp->rq_cq != qp->sq_cq))
+ ocrdma_del_qpn_map(dev, qp);
spin_unlock(&qp->rq_cq->cq_lock);
+ } else {
+ ocrdma_del_qpn_map(dev, qp);
+ }
spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
if (!pd->uctx) {
diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
index 0f14e687bb91..2e1f352c037d 100644
--- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c
+++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c
@@ -387,11 +387,10 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
bool has_vlan = false, has_grh_ipv6 = true;
struct rdma_ah_attr *ah_attr = &get_qedr_ah(ud_wr(swr)->ah)->attr;
const struct ib_global_route *grh = rdma_ah_read_grh(ah_attr);
- union ib_gid sgid;
+ const struct ib_gid_attr *sgid_attr = grh->sgid_attr;
int send_size = 0;
u16 vlan_id = 0;
u16 ether_type;
- struct ib_gid_attr sgid_attr;
int rc;
int ip_ver = 0;
@@ -402,28 +401,16 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
for (i = 0; i < swr->num_sge; ++i)
send_size += swr->sg_list[i].length;
- rc = ib_get_cached_gid(qp->ibqp.device, rdma_ah_get_port_num(ah_attr),
- grh->sgid_index, &sgid, &sgid_attr);
- if (rc) {
- DP_ERR(dev,
- "gsi post send: failed to get cached GID (port=%d, ix=%d)\n",
- rdma_ah_get_port_num(ah_attr),
- grh->sgid_index);
- return rc;
- }
-
- vlan_id = rdma_vlan_dev_vlan_id(sgid_attr.ndev);
+ vlan_id = rdma_vlan_dev_vlan_id(sgid_attr->ndev);
if (vlan_id < VLAN_CFI_MASK)
has_vlan = true;
- dev_put(sgid_attr.ndev);
-
- has_udp = (sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
+ has_udp = (sgid_attr->gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP);
if (!has_udp) {
/* RoCE v1 */
ether_type = ETH_P_IBOE;
*roce_mode = ROCE_V1;
- } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+ } else if (ipv6_addr_v4mapped((struct in6_addr *)&sgid_attr->gid)) {
/* RoCE v2 IPv4 */
ip_ver = 4;
ether_type = ETH_P_IP;
@@ -471,7 +458,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
udh->grh.flow_label = grh->flow_label;
udh->grh.hop_limit = grh->hop_limit;
udh->grh.destination_gid = grh->dgid;
- memcpy(&udh->grh.source_gid.raw, &sgid.raw,
+ memcpy(&udh->grh.source_gid.raw, sgid_attr->gid.raw,
sizeof(udh->grh.source_gid.raw));
} else {
/* IPv4 header */
@@ -482,7 +469,7 @@ static inline int qedr_gsi_build_header(struct qedr_dev *dev,
udh->ip4.frag_off = htons(IP_DF);
udh->ip4.ttl = grh->hop_limit;
- ipv4_addr = qedr_get_ipv4_from_gid(sgid.raw);
+ ipv4_addr = qedr_get_ipv4_from_gid(sgid_attr->gid.raw);
udh->ip4.saddr = ipv4_addr;
ipv4_addr = qedr_get_ipv4_from_gid(grh->dgid.raw);
udh->ip4.daddr = ipv4_addr;
diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index f7ac8fc9b531..b82c5d5fb0e3 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -112,7 +112,8 @@ int qedr_query_device(struct ib_device *ibdev,
IB_DEVICE_RC_RNR_NAK_GEN |
IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS;
- attr->max_sge = qattr->max_sge;
+ attr->max_send_sge = qattr->max_sge;
+ attr->max_recv_sge = qattr->max_sge;
attr->max_sge_rd = qattr->max_sge;
attr->max_cq = qattr->max_cq;
attr->max_cqe = qattr->max_cqe;
@@ -224,7 +225,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
attr->lmc = 0;
attr->sm_lid = 0;
attr->sm_sl = 0;
- attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
+ attr->ip_gids = true;
if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
attr->gid_tbl_len = 1;
attr->pkey_tbl_len = 1;
@@ -1075,27 +1076,19 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
struct qed_rdma_modify_qp_in_params
*qp_params)
{
+ const struct ib_gid_attr *gid_attr;
enum rdma_network_type nw_type;
- struct ib_gid_attr gid_attr;
const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr);
- union ib_gid gid;
u32 ipv4_addr;
- int rc = 0;
int i;
- rc = ib_get_cached_gid(ibqp->device,
- rdma_ah_get_port_num(&attr->ah_attr),
- grh->sgid_index, &gid, &gid_attr);
- if (rc)
- return rc;
-
- qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr.ndev);
+ gid_attr = grh->sgid_attr;
+ qp_params->vlan_id = rdma_vlan_dev_vlan_id(gid_attr->ndev);
- dev_put(gid_attr.ndev);
- nw_type = ib_gid_to_network_type(gid_attr.gid_type, &gid);
+ nw_type = rdma_gid_attr_network_type(gid_attr);
switch (nw_type) {
case RDMA_NETWORK_IPV6:
- memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+ memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
sizeof(qp_params->sgid));
memcpy(&qp_params->dgid.bytes[0],
&grh->dgid,
@@ -1105,7 +1098,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
QED_ROCE_MODIFY_QP_VALID_ROCE_MODE, 1);
break;
case RDMA_NETWORK_IB:
- memcpy(&qp_params->sgid.bytes[0], &gid.raw[0],
+ memcpy(&qp_params->sgid.bytes[0], &gid_attr->gid.raw[0],
sizeof(qp_params->sgid));
memcpy(&qp_params->dgid.bytes[0],
&grh->dgid,
@@ -1115,7 +1108,7 @@ static inline int get_gid_info_from_table(struct ib_qp *ibqp,
case RDMA_NETWORK_IPV4:
memset(&qp_params->sgid, 0, sizeof(qp_params->sgid));
memset(&qp_params->dgid, 0, sizeof(qp_params->dgid));
- ipv4_addr = qedr_get_ipv4_from_gid(gid.raw);
+ ipv4_addr = qedr_get_ipv4_from_gid(gid_attr->gid.raw);
qp_params->sgid.ipv4_addr = ipv4_addr;
ipv4_addr =
qedr_get_ipv4_from_gid(grh->dgid.raw);
@@ -2302,7 +2295,7 @@ struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct rdma_ah_attr *attr,
if (!ah)
return ERR_PTR(-ENOMEM);
- ah->attr = *attr;
+ rdma_copy_ah_attr(&ah->attr, attr);
return &ah->ibah;
}
@@ -2311,6 +2304,7 @@ int qedr_destroy_ah(struct ib_ah *ibah)
{
struct qedr_ah *ah = get_qedr_ah(ibah);
+ rdma_destroy_ah_attr(&ah->attr);
kfree(ah);
return 0;
}
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
index 14b4057a2b8f..41babbc0db58 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.c
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -1489,7 +1489,8 @@ static void qib_fill_device_attr(struct qib_devdata *dd)
rdi->dparms.props.max_mr_size = ~0ULL;
rdi->dparms.props.max_qp = ib_qib_max_qps;
rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
- rdi->dparms.props.max_sge = ib_qib_max_sges;
+ rdi->dparms.props.max_send_sge = ib_qib_max_sges;
+ rdi->dparms.props.max_recv_sge = ib_qib_max_sges;
rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
rdi->dparms.props.max_cq = ib_qib_max_cqs;
rdi->dparms.props.max_cqe = ib_qib_max_cqes;
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
index f9a46768a19a..e72562a8959a 100644
--- a/drivers/infiniband/hw/qib/qib_verbs.h
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -78,9 +78,6 @@ struct qib_verbs_txreq;
#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0)
-/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
-#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
-
#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
/* Values for set/get portinfo VLCap OperationalVLs */
diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
index a688a5669168..9524524fade4 100644
--- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
+++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c
@@ -666,7 +666,7 @@ int usnic_ib_dereg_mr(struct ib_mr *ibmr)
usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
- usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing);
+ usnic_uiom_reg_release(mr->umem, ibmr->uobject->context);
kfree(mr);
return 0;
}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 4381c0a9a873..9dd39daa602b 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -41,6 +41,7 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <linux/pci.h>
+#include <rdma/ib_verbs.h>
#include "usnic_log.h"
#include "usnic_uiom.h"
@@ -88,7 +89,7 @@ static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
for_each_sg(chunk->page_list, sg, chunk->nents, i) {
page = sg_page(sg);
pa = sg_phys(sg);
- if (dirty)
+ if (!PageDirty(page) && dirty)
set_page_dirty_lock(page);
put_page(page);
usnic_dbg("pa: %pa\n", &pa);
@@ -114,6 +115,16 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
dma_addr_t pa;
unsigned int gup_flags;
+ /*
+ * If the combination of the addr and size requested for this memory
+ * region causes an integer overflow, return error.
+ */
+ if (((addr + size) < addr) || PAGE_ALIGN(addr + size) < (addr + size))
+ return -EINVAL;
+
+ if (!size)
+ return -EINVAL;
+
if (!can_do_mlock())
return -EPERM;
@@ -127,7 +138,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
down_write(&current->mm->mmap_sem);
- locked = npages + current->mm->locked_vm;
+ locked = npages + current->mm->pinned_vm;
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
@@ -143,7 +154,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
ret = 0;
while (npages) {
- ret = get_user_pages(cur_base,
+ ret = get_user_pages_longterm(cur_base,
min_t(unsigned long, npages,
PAGE_SIZE / sizeof(struct page *)),
gup_flags, page_list, NULL);
@@ -186,7 +197,7 @@ out:
if (ret < 0)
usnic_uiom_put_pages(chunk_list, 0);
else
- current->mm->locked_vm = locked;
+ current->mm->pinned_vm = locked;
up_write(&current->mm->mmap_sem);
free_page((unsigned long) page_list);
@@ -420,18 +431,22 @@ out_free_uiomr:
return ERR_PTR(err);
}
-void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+ struct ib_ucontext *ucontext)
{
+ struct task_struct *task;
struct mm_struct *mm;
unsigned long diff;
__usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
- mm = get_task_mm(current);
- if (!mm) {
- kfree(uiomr);
- return;
- }
+ task = get_pid_task(ucontext->tgid, PIDTYPE_PID);
+ if (!task)
+ goto out;
+ mm = get_task_mm(task);
+ put_task_struct(task);
+ if (!mm)
+ goto out;
diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
@@ -443,7 +458,7 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
* up here and not be able to take the mmap_sem. In that case
* we defer the vm_locked accounting to the system workqueue.
*/
- if (closing) {
+ if (ucontext->closing) {
if (!down_write_trylock(&mm->mmap_sem)) {
INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
uiomr->mm = mm;
@@ -455,9 +470,10 @@ void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
} else
down_write(&mm->mmap_sem);
- current->mm->locked_vm -= diff;
+ mm->pinned_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
+out:
kfree(uiomr);
}
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.h b/drivers/infiniband/hw/usnic/usnic_uiom.h
index 431efe4143f4..8c096acff123 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.h
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.h
@@ -39,6 +39,8 @@
#include "usnic_uiom_interval_tree.h"
+struct ib_ucontext;
+
#define USNIC_UIOM_READ (1)
#define USNIC_UIOM_WRITE (2)
@@ -89,7 +91,8 @@ void usnic_uiom_free_dev_list(struct device **devs);
struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
unsigned long addr, size_t size,
int access, int dmasync);
-void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing);
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr,
+ struct ib_ucontext *ucontext);
int usnic_uiom_init(char *drv_name);
void usnic_uiom_fini(void);
#endif /* USNIC_UIOM_H_ */
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
index 44cb1cfba417..42b8685c997e 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma.h
@@ -378,11 +378,6 @@ static inline enum ib_port_speed pvrdma_port_speed_to_ib(
return (enum ib_port_speed)speed;
}
-static inline int pvrdma_qp_attr_mask_to_ib(int attr_mask)
-{
- return attr_mask;
-}
-
static inline int ib_qp_attr_mask_to_pvrdma(int attr_mask)
{
return attr_mask & PVRDMA_MASK(PVRDMA_QP_ATTR_MASK_MAX);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
index 0be33a81bbe6..912933549dfb 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
@@ -62,9 +62,7 @@ static DEFINE_MUTEX(pvrdma_device_list_lock);
static LIST_HEAD(pvrdma_device_list);
static struct workqueue_struct *event_wq;
-static int pvrdma_add_gid(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void **context);
+static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context);
static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context);
static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -650,13 +648,11 @@ static int pvrdma_add_gid_at_index(struct pvrdma_dev *dev,
return 0;
}
-static int pvrdma_add_gid(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
- void **context)
+static int pvrdma_add_gid(const struct ib_gid_attr *attr, void **context)
{
struct pvrdma_dev *dev = to_vdev(attr->device);
- return pvrdma_add_gid_at_index(dev, gid,
+ return pvrdma_add_gid_at_index(dev, &attr->gid,
ib_gid_type_to_pvrdma(attr->gid_type),
attr->index);
}
@@ -699,8 +695,12 @@ static int pvrdma_del_gid(const struct ib_gid_attr *attr, void **context)
}
static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
+ struct net_device *ndev,
unsigned long event)
{
+ struct pci_dev *pdev_net;
+ unsigned int slot;
+
switch (event) {
case NETDEV_REBOOT:
case NETDEV_DOWN:
@@ -718,6 +718,24 @@ static void pvrdma_netdevice_event_handle(struct pvrdma_dev *dev,
else
pvrdma_dispatch_event(dev, 1, IB_EVENT_PORT_ACTIVE);
break;
+ case NETDEV_UNREGISTER:
+ dev_put(dev->netdev);
+ dev->netdev = NULL;
+ break;
+ case NETDEV_REGISTER:
+ /* vmxnet3 will have same bus, slot. But func will be 0 */
+ slot = PCI_SLOT(dev->pdev->devfn);
+ pdev_net = pci_get_slot(dev->pdev->bus,
+ PCI_DEVFN(slot, 0));
+ if ((dev->netdev == NULL) &&
+ (pci_get_drvdata(pdev_net) == ndev)) {
+ /* this is our netdev */
+ dev->netdev = ndev;
+ dev_hold(ndev);
+ }
+ pci_dev_put(pdev_net);
+ break;
+
default:
dev_dbg(&dev->pdev->dev, "ignore netdevice event %ld on %s\n",
event, dev->ib_dev.name);
@@ -734,8 +752,11 @@ static void pvrdma_netdevice_event_work(struct work_struct *work)
mutex_lock(&pvrdma_device_list_lock);
list_for_each_entry(dev, &pvrdma_device_list, device_link) {
- if (dev->netdev == netdev_work->event_netdev) {
- pvrdma_netdevice_event_handle(dev, netdev_work->event);
+ if ((netdev_work->event == NETDEV_REGISTER) ||
+ (dev->netdev == netdev_work->event_netdev)) {
+ pvrdma_netdevice_event_handle(dev,
+ netdev_work->event_netdev,
+ netdev_work->event);
break;
}
}
@@ -968,6 +989,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
ret = -ENODEV;
goto err_free_cq_ring;
}
+ dev_hold(dev->netdev);
dev_info(&pdev->dev, "paired device to %s\n", dev->netdev->name);
@@ -1040,6 +1062,10 @@ err_free_intrs:
pvrdma_free_irq(dev);
pci_free_irq_vectors(pdev);
err_free_cq_ring:
+ if (dev->netdev) {
+ dev_put(dev->netdev);
+ dev->netdev = NULL;
+ }
pvrdma_page_dir_cleanup(dev, &dev->cq_pdir);
err_free_async_ring:
pvrdma_page_dir_cleanup(dev, &dev->async_pdir);
@@ -1079,6 +1105,11 @@ static void pvrdma_pci_remove(struct pci_dev *pdev)
flush_workqueue(event_wq);
+ if (dev->netdev) {
+ dev_put(dev->netdev);
+ dev->netdev = NULL;
+ }
+
/* Unregister ib device */
ib_unregister_device(&dev->ib_dev);
diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
index a51463cd2f37..b65d10b0a875 100644
--- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
+++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
@@ -82,7 +82,8 @@ int pvrdma_query_device(struct ib_device *ibdev,
props->max_qp = dev->dsr->caps.max_qp;
props->max_qp_wr = dev->dsr->caps.max_qp_wr;
props->device_cap_flags = dev->dsr->caps.device_cap_flags;
- props->max_sge = dev->dsr->caps.max_sge;
+ props->max_send_sge = dev->dsr->caps.max_sge;
+ props->max_recv_sge = dev->dsr->caps.max_sge;
props->max_sge_rd = PVRDMA_GET_CAP(dev, dev->dsr->caps.max_sge,
dev->dsr->caps.max_sge_rd);
props->max_srq = dev->dsr->caps.max_srq;
@@ -154,7 +155,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
props->gid_tbl_len = resp->attrs.gid_tbl_len;
props->port_cap_flags =
pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
- props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
+ props->port_cap_flags |= IB_PORT_CM_SUP;
+ props->ip_gids = true;
props->max_msg_sz = resp->attrs.max_msg_sz;
props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
diff --git a/drivers/infiniband/sw/rdmavt/ah.c b/drivers/infiniband/sw/rdmavt/ah.c
index ba3639a0d77c..89ec0f64abfc 100644
--- a/drivers/infiniband/sw/rdmavt/ah.c
+++ b/drivers/infiniband/sw/rdmavt/ah.c
@@ -120,7 +120,8 @@ struct ib_ah *rvt_create_ah(struct ib_pd *pd,
dev->n_ahs_allocated++;
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
- ah->attr = *ah_attr;
+ rdma_copy_ah_attr(&ah->attr, ah_attr);
+
atomic_set(&ah->refcount, 0);
if (dev->driver_f.notify_new_ah)
@@ -148,6 +149,7 @@ int rvt_destroy_ah(struct ib_ah *ibah)
dev->n_ahs_allocated--;
spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+ rdma_destroy_ah_attr(&ah->attr);
kfree(ah);
return 0;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 41183bd665ca..d29e3c943399 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -780,14 +780,15 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
if (!rdi)
return ERR_PTR(-EINVAL);
- if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
+ if (init_attr->cap.max_send_sge > rdi->dparms.props.max_send_sge ||
init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
init_attr->create_flags)
return ERR_PTR(-EINVAL);
/* Check receive queue parameters if no SRQ is specified. */
if (!init_attr->srq) {
- if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
+ if (init_attr->cap.max_recv_sge >
+ rdi->dparms.props.max_recv_sge ||
init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
return ERR_PTR(-EINVAL);
@@ -1336,13 +1337,13 @@ int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
qp->qp_access_flags = attr->qp_access_flags;
if (attr_mask & IB_QP_AV) {
- qp->remote_ah_attr = attr->ah_attr;
+ rdma_replace_ah_attr(&qp->remote_ah_attr, &attr->ah_attr);
qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
}
if (attr_mask & IB_QP_ALT_PATH) {
- qp->alt_ah_attr = attr->alt_ah_attr;
+ rdma_replace_ah_attr(&qp->alt_ah_attr, &attr->alt_ah_attr);
qp->s_alt_pkey_index = attr->alt_pkey_index;
}
@@ -1459,6 +1460,8 @@ int rvt_destroy_qp(struct ib_qp *ibqp)
vfree(qp->s_wq);
rdi->driver_f.qp_priv_free(rdi, qp);
kfree(qp->s_ack_queue);
+ rdma_destroy_ah_attr(&qp->remote_ah_attr);
+ rdma_destroy_ah_attr(&qp->alt_ah_attr);
kfree(qp);
return 0;
}
diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 7121e1b1eb89..10999fa69281 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -91,7 +91,8 @@ static void rxe_init_device_param(struct rxe_dev *rxe)
rxe->attr.max_qp = RXE_MAX_QP;
rxe->attr.max_qp_wr = RXE_MAX_QP_WR;
rxe->attr.device_cap_flags = RXE_DEVICE_CAP_FLAGS;
- rxe->attr.max_sge = RXE_MAX_SGE;
+ rxe->attr.max_send_sge = RXE_MAX_SGE;
+ rxe->attr.max_recv_sge = RXE_MAX_SGE;
rxe->attr.max_sge_rd = RXE_MAX_SGE_RD;
rxe->attr.max_cq = RXE_MAX_CQ;
rxe->attr.max_cqe = (1 << RXE_MAX_LOG_CQE) - 1;
diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 7f1ae364088a..26fe8d7dbc55 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -55,29 +55,41 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
struct rdma_ah_attr *attr)
{
+ const struct ib_global_route *grh = rdma_ah_read_grh(attr);
+
memset(av, 0, sizeof(*av));
- memcpy(&av->grh, rdma_ah_read_grh(attr),
- sizeof(*rdma_ah_read_grh(attr)));
+ memcpy(av->grh.dgid.raw, grh->dgid.raw, sizeof(grh->dgid.raw));
+ av->grh.flow_label = grh->flow_label;
+ av->grh.sgid_index = grh->sgid_index;
+ av->grh.hop_limit = grh->hop_limit;
+ av->grh.traffic_class = grh->traffic_class;
av->port_num = port_num;
}
void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr)
{
+ struct ib_global_route *grh = rdma_ah_retrieve_grh(attr);
+
attr->type = RDMA_AH_ATTR_TYPE_ROCE;
- memcpy(rdma_ah_retrieve_grh(attr), &av->grh, sizeof(av->grh));
+
+ memcpy(grh->dgid.raw, av->grh.dgid.raw, sizeof(av->grh.dgid.raw));
+ grh->flow_label = av->grh.flow_label;
+ grh->sgid_index = av->grh.sgid_index;
+ grh->hop_limit = av->grh.hop_limit;
+ grh->traffic_class = av->grh.traffic_class;
+
rdma_ah_set_ah_flags(attr, IB_AH_GRH);
rdma_ah_set_port_num(attr, av->port_num);
}
-void rxe_av_fill_ip_info(struct rxe_av *av,
- struct rdma_ah_attr *attr,
- struct ib_gid_attr *sgid_attr,
- union ib_gid *sgid)
+void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr)
{
- rdma_gid2ip((struct sockaddr *)&av->sgid_addr, sgid);
+ const struct ib_gid_attr *sgid_attr = attr->grh.sgid_attr;
+
+ rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
rdma_gid2ip((struct sockaddr *)&av->dgid_addr,
&rdma_ah_read_grh(attr)->dgid);
- av->network_type = ib_gid_to_network_type(sgid_attr->gid_type, sgid);
+ av->network_type = rdma_gid_attr_network_type(sgid_attr);
}
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt)
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index a51ece596c43..87d14f7ef21b 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -43,10 +43,7 @@ void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
-void rxe_av_fill_ip_info(struct rxe_av *av,
- struct rdma_ah_attr *attr,
- struct ib_gid_attr *sgid_attr,
- union ib_gid *sgid);
+void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 59ec6d918ed4..8094cbaa54a9 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -182,39 +182,19 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
#endif
-/*
- * Derive the net_device from the av.
- * For physical devices, this will just return rxe->ndev.
- * But for VLAN devices, it will return the vlan dev.
- * Caller should dev_put() the returned net_device.
- */
-static struct net_device *rxe_netdev_from_av(struct rxe_dev *rxe,
- int port_num,
- struct rxe_av *av)
-{
- union ib_gid gid;
- struct ib_gid_attr attr;
- struct net_device *ndev = rxe->ndev;
-
- if (ib_get_cached_gid(&rxe->ib_dev, port_num, av->grh.sgid_index,
- &gid, &attr) == 0 &&
- attr.ndev && attr.ndev != ndev)
- ndev = attr.ndev;
- else
- /* Only to ensure that caller may call dev_put() */
- dev_hold(ndev);
-
- return ndev;
-}
-
static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
struct rxe_qp *qp,
struct rxe_av *av)
{
+ const struct ib_gid_attr *attr;
struct dst_entry *dst = NULL;
struct net_device *ndev;
- ndev = rxe_netdev_from_av(rxe, qp->attr.port_num, av);
+ attr = rdma_get_gid_attr(&rxe->ib_dev, qp->attr.port_num,
+ av->grh.sgid_index);
+ if (IS_ERR(attr))
+ return NULL;
+ ndev = attr->ndev;
if (qp_type(qp) == IB_QPT_RC)
dst = sk_dst_get(qp->sk->sk);
@@ -243,9 +223,13 @@ static struct dst_entry *rxe_find_route(struct rxe_dev *rxe,
rt6_get_cookie((struct rt6_info *)dst);
#endif
}
- }
- dev_put(ndev);
+ if (dst && (qp_type(qp) == IB_QPT_RC)) {
+ dst_hold(dst);
+ sk_dst_set(qp->sk->sk, dst);
+ }
+ }
+ rdma_put_gid_attr(attr);
return dst;
}
@@ -418,11 +402,7 @@ static int prepare4(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
prepare_ipv4_hdr(dst, skb, saddr->s_addr, daddr->s_addr, IPPROTO_UDP,
av->grh.traffic_class, av->grh.hop_limit, df, xnet);
- if (qp_type(qp) == IB_QPT_RC)
- sk_dst_set(qp->sk->sk, dst);
- else
- dst_release(dst);
-
+ dst_release(dst);
return 0;
}
@@ -450,11 +430,7 @@ static int prepare6(struct rxe_dev *rxe, struct rxe_pkt_info *pkt,
av->grh.traffic_class,
av->grh.hop_limit);
- if (qp_type(qp) == IB_QPT_RC)
- sk_dst_set(qp->sk->sk, dst);
- else
- dst_release(dst);
-
+ dst_release(dst);
return 0;
}
@@ -536,9 +512,13 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
unsigned int hdr_len;
struct sk_buff *skb;
struct net_device *ndev;
+ const struct ib_gid_attr *attr;
const int port_num = 1;
- ndev = rxe_netdev_from_av(rxe, port_num, av);
+ attr = rdma_get_gid_attr(&rxe->ib_dev, port_num, av->grh.sgid_index);
+ if (IS_ERR(attr))
+ return NULL;
+ ndev = attr->ndev;
if (av->network_type == RDMA_NETWORK_IPV4)
hdr_len = ETH_HLEN + sizeof(struct udphdr) +
@@ -550,10 +530,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
skb = alloc_skb(paylen + hdr_len + LL_RESERVED_SPACE(ndev),
GFP_ATOMIC);
- if (unlikely(!skb)) {
- dev_put(ndev);
- return NULL;
- }
+ if (unlikely(!skb))
+ goto out;
skb_reserve(skb, hdr_len + LL_RESERVED_SPACE(rxe->ndev));
@@ -568,7 +546,8 @@ struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
pkt->hdr = skb_put_zero(skb, paylen);
pkt->mask |= RXE_GRH_MASK;
- dev_put(ndev);
+out:
+ rdma_put_gid_attr(attr);
return skb;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 1b596fbbe251..4555510d86c4 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -83,7 +83,7 @@ enum rxe_device_param {
RXE_MAX_SGE_RD = 32,
RXE_MAX_CQ = 16384,
RXE_MAX_LOG_CQE = 15,
- RXE_MAX_MR = 2 * 1024,
+ RXE_MAX_MR = 256 * 1024,
RXE_MAX_PD = 0x7ffc,
RXE_MAX_QP_RD_ATOM = 128,
RXE_MAX_EE_RD_ATOM = 0,
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index b9f7aa1114b2..c58452daffc7 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -49,9 +49,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
goto err1;
}
- if (cap->max_send_sge > rxe->attr.max_sge) {
+ if (cap->max_send_sge > rxe->attr.max_send_sge) {
pr_warn("invalid send sge = %d > %d\n",
- cap->max_send_sge, rxe->attr.max_sge);
+ cap->max_send_sge, rxe->attr.max_send_sge);
goto err1;
}
@@ -62,9 +62,9 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
goto err1;
}
- if (cap->max_recv_sge > rxe->attr.max_sge) {
+ if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
pr_warn("invalid recv sge = %d > %d\n",
- cap->max_recv_sge, rxe->attr.max_sge);
+ cap->max_recv_sge, rxe->attr.max_recv_sge);
goto err1;
}
}
@@ -580,9 +580,6 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
struct ib_udata *udata)
{
int err;
- struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
int max_rd_atomic = __roundup_pow_of_two(attr->max_rd_atomic);
@@ -623,30 +620,14 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
qp->attr.qkey = attr->qkey;
if (mask & IB_QP_AV) {
- ib_get_cached_gid(&rxe->ib_dev, 1,
- rdma_ah_read_grh(&attr->ah_attr)->sgid_index,
- &sgid, &sgid_attr);
rxe_av_from_attr(attr->port_num, &qp->pri_av, &attr->ah_attr);
- rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr,
- &sgid_attr, &sgid);
- if (sgid_attr.ndev)
- dev_put(sgid_attr.ndev);
+ rxe_av_fill_ip_info(&qp->pri_av, &attr->ah_attr);
}
if (mask & IB_QP_ALT_PATH) {
- u8 sgid_index =
- rdma_ah_read_grh(&attr->alt_ah_attr)->sgid_index;
-
- ib_get_cached_gid(&rxe->ib_dev, 1, sgid_index,
- &sgid, &sgid_attr);
-
rxe_av_from_attr(attr->alt_port_num, &qp->alt_av,
&attr->alt_ah_attr);
- rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr,
- &sgid_attr, &sgid);
- if (sgid_attr.ndev)
- dev_put(sgid_attr.ndev);
-
+ rxe_av_fill_ip_info(&qp->alt_av, &attr->alt_ah_attr);
qp->attr.alt_port_num = attr->alt_port_num;
qp->attr.alt_pkey_index = attr->alt_pkey_index;
qp->attr.alt_timeout = attr->alt_timeout;
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index dfba44a40f0b..cc5cfd156758 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -256,8 +256,7 @@ static int hdr_check(struct rxe_pkt_info *pkt)
return 0;
err2:
- if (qp)
- rxe_drop_ref(qp);
+ rxe_drop_ref(qp);
err1:
return -EINVAL;
}
@@ -328,6 +327,7 @@ err1:
static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
{
+ const struct ib_gid_attr *gid_attr;
union ib_gid dgid;
union ib_gid *pdgid;
@@ -339,9 +339,14 @@ static int rxe_match_dgid(struct rxe_dev *rxe, struct sk_buff *skb)
pdgid = (union ib_gid *)&ipv6_hdr(skb)->daddr;
}
- return ib_find_cached_gid_by_port(&rxe->ib_dev, pdgid,
- IB_GID_TYPE_ROCE_UDP_ENCAP,
- 1, skb->dev, NULL);
+ gid_attr = rdma_find_gid_by_port(&rxe->ib_dev, pdgid,
+ IB_GID_TYPE_ROCE_UDP_ENCAP,
+ 1, skb->dev);
+ if (IS_ERR(gid_attr))
+ return PTR_ERR(gid_attr);
+
+ rdma_put_gid_attr(gid_attr);
+ return 0;
}
/* rxe_rcv is called from the interface driver */
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 5b57de30dee4..aa5833318372 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -884,6 +884,11 @@ static enum resp_states do_complete(struct rxe_qp *qp,
else
wc->network_hdr_type = RDMA_NETWORK_IPV6;
+ if (is_vlan_dev(skb->dev)) {
+ wc->wc_flags |= IB_WC_WITH_VLAN;
+ wc->vlan_id = vlan_dev_vlan_id(skb->dev);
+ }
+
if (pkt->mask & RXE_IMMDT_MASK) {
wc->wc_flags |= IB_WC_WITH_IMM;
wc->ex.imm_data = immdt_imm(pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 9deafc3aa6af..1188e163204d 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -222,25 +222,11 @@ static int rxe_dealloc_pd(struct ib_pd *ibpd)
return 0;
}
-static int rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
- struct rxe_av *av)
+static void rxe_init_av(struct rxe_dev *rxe, struct rdma_ah_attr *attr,
+ struct rxe_av *av)
{
- int err;
- union ib_gid sgid;
- struct ib_gid_attr sgid_attr;
-
- err = ib_get_cached_gid(&rxe->ib_dev, rdma_ah_get_port_num(attr),
- rdma_ah_read_grh(attr)->sgid_index, &sgid,
- &sgid_attr);
- if (err) {
- pr_err("Failed to query sgid. err = %d\n", err);
- return err;
- }
-
rxe_av_from_attr(rdma_ah_get_port_num(attr), av, attr);
- rxe_av_fill_ip_info(av, attr, &sgid_attr, &sgid);
- dev_put(sgid_attr.ndev);
- return 0;
+ rxe_av_fill_ip_info(av, attr);
}
static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
@@ -255,28 +241,17 @@ static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd,
err = rxe_av_chk_attr(rxe, attr);
if (err)
- goto err1;
+ return ERR_PTR(err);
ah = rxe_alloc(&rxe->ah_pool);
- if (!ah) {
- err = -ENOMEM;
- goto err1;
- }
+ if (!ah)
+ return ERR_PTR(-ENOMEM);
rxe_add_ref(pd);
ah->pd = pd;
- err = rxe_init_av(rxe, attr, &ah->av);
- if (err)
- goto err2;
-
+ rxe_init_av(rxe, attr, &ah->av);
return &ah->ibah;
-
-err2:
- rxe_drop_ref(pd);
- rxe_drop_ref(ah);
-err1:
- return ERR_PTR(err);
}
static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
@@ -289,10 +264,7 @@ static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
if (err)
return err;
- err = rxe_init_av(rxe, attr, &ah->av);
- if (err)
- return err;
-
+ rxe_init_av(rxe, attr, &ah->av);
return 0;
}
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index a50b062ed13e..e255a7e5a4c3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -252,11 +252,11 @@ struct ipoib_cm_tx {
struct ipoib_neigh *neigh;
struct ipoib_path *path;
struct ipoib_tx_buf *tx_ring;
- unsigned tx_head;
- unsigned tx_tail;
+ unsigned int tx_head;
+ unsigned int tx_tail;
unsigned long flags;
u32 mtu;
- unsigned max_send_sge;
+ unsigned int max_send_sge;
};
struct ipoib_cm_rx_buf {
@@ -373,8 +373,8 @@ struct ipoib_dev_priv {
struct ipoib_rx_buf *rx_ring;
struct ipoib_tx_buf *tx_ring;
- unsigned tx_head;
- unsigned tx_tail;
+ unsigned int tx_head;
+ unsigned int tx_tail;
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_ud_wr tx_wr;
struct ib_wc send_wc[MAX_SEND_CQE];
@@ -404,7 +404,7 @@ struct ipoib_dev_priv {
#endif
u64 hca_caps;
struct ipoib_ethtool_st ethtool;
- unsigned max_send_sge;
+ unsigned int max_send_sge;
bool sm_fullmember_sendonly_support;
const struct net_device_ops *rn_ops;
};
@@ -414,7 +414,7 @@ struct ipoib_ah {
struct ib_ah *ah;
struct list_head list;
struct kref ref;
- unsigned last_send;
+ unsigned int last_send;
int valid;
};
@@ -729,7 +729,7 @@ void ipoib_cm_dev_stop(struct net_device *dev)
static inline
int ipoib_cm_dev_init(struct net_device *dev)
{
- return -ENOSYS;
+ return -EOPNOTSUPP;
}
static inline
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 6535d9beb24d..582f199887b0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -275,7 +275,7 @@ static struct ib_qp *ipoib_cm_create_rx_qp(struct net_device *dev,
static int ipoib_cm_modify_rx_qp(struct net_device *dev,
struct ib_cm_id *cm_id, struct ib_qp *qp,
- unsigned psn)
+ unsigned int psn)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ib_qp_attr qp_attr;
@@ -363,7 +363,7 @@ static int ipoib_cm_nonsrq_init_rx(struct net_device *dev, struct ib_cm_id *cm_i
if (!rx->rx_ring)
return -ENOMEM;
- t = kmalloc(sizeof *t, GFP_KERNEL);
+ t = kmalloc(sizeof(*t), GFP_KERNEL);
if (!t) {
ret = -ENOMEM;
goto err_free_1;
@@ -422,7 +422,7 @@ err_free_1:
static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
struct ib_qp *qp, struct ib_cm_req_event_param *req,
- unsigned psn)
+ unsigned int psn)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_data data = {};
@@ -432,7 +432,7 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
data.mtu = cpu_to_be32(IPOIB_CM_BUF_SIZE);
rep.private_data = &data;
- rep.private_data_len = sizeof data;
+ rep.private_data_len = sizeof(data);
rep.flow_control = 0;
rep.rnr_retry_count = req->rnr_retry_count;
rep.srq = ipoib_cm_has_srq(dev);
@@ -446,11 +446,11 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
struct net_device *dev = cm_id->context;
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_rx *p;
- unsigned psn;
+ unsigned int psn;
int ret;
ipoib_dbg(priv, "REQ arrived\n");
- p = kzalloc(sizeof *p, GFP_KERNEL);
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;
p->dev = dev;
@@ -547,7 +547,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space,
0, PAGE_SIZE);
--skb_shinfo(skb)->nr_frags;
} else {
- size = min(length, (unsigned) PAGE_SIZE);
+ size = min_t(unsigned int, length, PAGE_SIZE);
skb_frag_size_set(frag, size);
skb->data_len += size;
@@ -641,8 +641,9 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
}
- frags = PAGE_ALIGN(wc->byte_len - min(wc->byte_len,
- (unsigned)IPOIB_CM_HEAD_SIZE)) / PAGE_SIZE;
+ frags = PAGE_ALIGN(wc->byte_len -
+ min_t(u32, wc->byte_len, IPOIB_CM_HEAD_SIZE)) /
+ PAGE_SIZE;
newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
mapping, GFP_ATOMIC);
@@ -657,7 +658,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
ipoib_cm_dma_unmap_rx(priv, frags, rx_ring[wr_id].mapping);
- memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof *mapping);
+ memcpy(rx_ring[wr_id].mapping, mapping, (frags + 1) * sizeof(*mapping));
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
@@ -712,7 +713,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_tx_buf *tx_req;
int rc;
- unsigned usable_sge = tx->max_send_sge - !!skb_headlen(skb);
+ unsigned int usable_sge = tx->max_send_sge - !!skb_headlen(skb);
if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -1068,8 +1069,8 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_
struct ib_qp *tx_qp;
if (dev->features & NETIF_F_SG)
- attr.cap.max_send_sge =
- min_t(u32, priv->ca->attrs.max_sge, MAX_SKB_FRAGS + 1);
+ attr.cap.max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
+ MAX_SKB_FRAGS + 1);
tx_qp = ib_create_qp(priv->pd, &attr);
tx->max_send_sge = attr.cap.max_send_sge;
@@ -1094,7 +1095,7 @@ static int ipoib_cm_send_req(struct net_device *dev,
req.qp_num = qp->qp_num;
req.qp_type = qp->qp_type;
req.private_data = &data;
- req.private_data_len = sizeof data;
+ req.private_data_len = sizeof(data);
req.flow_control = 0;
req.starting_psn = 0; /* FIXME */
@@ -1152,7 +1153,7 @@ static int ipoib_cm_tx_init(struct ipoib_cm_tx *p, u32 qpn,
ret = -ENOMEM;
goto err_tx;
}
- memset(p->tx_ring, 0, ipoib_sendq_size * sizeof *p->tx_ring);
+ memset(p->tx_ring, 0, ipoib_sendq_size * sizeof(*p->tx_ring));
p->qp = ipoib_cm_create_tx_qp(p->dev, p);
memalloc_noio_restore(noio_flag);
@@ -1305,7 +1306,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
struct ipoib_dev_priv *priv = ipoib_priv(dev);
struct ipoib_cm_tx *tx;
- tx = kzalloc(sizeof *tx, GFP_ATOMIC);
+ tx = kzalloc(sizeof(*tx), GFP_ATOMIC);
if (!tx)
return NULL;
@@ -1370,7 +1371,7 @@ static void ipoib_cm_tx_start(struct work_struct *work)
neigh->daddr + QPN_AND_OPTIONS_OFFSET);
goto free_neigh;
}
- memcpy(&pathrec, &p->path->pathrec, sizeof pathrec);
+ memcpy(&pathrec, &p->path->pathrec, sizeof(pathrec));
spin_unlock_irqrestore(&priv->lock, flags);
netif_tx_unlock_bh(dev);
@@ -1428,7 +1429,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
struct net_device *dev = priv->dev;
struct sk_buff *skb;
unsigned long flags;
- unsigned mtu = priv->mcast_mtu;
+ unsigned int mtu = priv->mcast_mtu;
netif_tx_lock_bh(dev);
spin_lock_irqsave(&priv->lock, flags);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_fs.c b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
index ea302b054601..178488028734 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_fs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_fs.c
@@ -262,15 +262,15 @@ static const struct file_operations ipoib_path_fops = {
void ipoib_create_debug_files(struct net_device *dev)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
- char name[IFNAMSIZ + sizeof "_path"];
+ char name[IFNAMSIZ + sizeof("_path")];
- snprintf(name, sizeof name, "%s_mcg", dev->name);
+ snprintf(name, sizeof(name), "%s_mcg", dev->name);
priv->mcg_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
ipoib_root, dev, &ipoib_mcg_fops);
if (!priv->mcg_dentry)
ipoib_warn(priv, "failed to create mcg debug file\n");
- snprintf(name, sizeof name, "%s_path", dev->name);
+ snprintf(name, sizeof(name), "%s_path", dev->name);
priv->path_dentry = debugfs_create_file(name, S_IFREG | S_IRUGO,
ipoib_root, dev, &ipoib_path_fops);
if (!priv->path_dentry)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index f47f9ace1f48..5f5d42bad2ea 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -40,6 +40,7 @@
#include <linux/ip.h>
#include <linux/tcp.h>
+#include <rdma/ib_cache.h>
#include "ipoib.h"
@@ -57,7 +58,7 @@ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ipoib_ah *ah;
struct ib_ah *vah;
- ah = kmalloc(sizeof *ah, GFP_KERNEL);
+ ah = kmalloc(sizeof(*ah), GFP_KERNEL);
if (!ah)
return ERR_PTR(-ENOMEM);
@@ -202,7 +203,7 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
}
memcpy(mapping, priv->rx_ring[wr_id].mapping,
- IPOIB_UD_RX_SG * sizeof *mapping);
+ IPOIB_UD_RX_SG * sizeof(*mapping));
/*
* If we can't allocate a new RX buffer, dump
@@ -568,7 +569,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
struct ipoib_tx_buf *tx_req;
int hlen, rc;
void *phead;
- unsigned usable_sge = priv->max_send_sge - !!skb_headlen(skb);
+ unsigned int usable_sge = priv->max_send_sge - !!skb_headlen(skb);
if (skb_is_gso(skb)) {
hlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
@@ -1069,7 +1070,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
bool ret = false;
netdev_gid = (union ib_gid *)(priv->dev->dev_addr + 4);
- if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
+ if (rdma_query_gid(priv->ca, priv->port, 0, &gid0))
return false;
netif_addr_lock_bh(priv->dev);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 26cde95bc0f3..012c9e3970ac 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -634,7 +634,7 @@ struct ipoib_path_iter *ipoib_path_iter_init(struct net_device *dev)
{
struct ipoib_path_iter *iter;
- iter = kmalloc(sizeof *iter, GFP_KERNEL);
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return NULL;
@@ -770,8 +770,10 @@ static void path_rec_completion(int status,
struct rdma_ah_attr av;
if (!ib_init_ah_attr_from_path(priv->ca, priv->port,
- pathrec, &av))
+ pathrec, &av, NULL)) {
ah = ipoib_create_ah(dev, priv->pd, &av);
+ rdma_destroy_ah_attr(&av);
+ }
}
spin_lock_irqsave(&priv->lock, flags);
@@ -883,7 +885,7 @@ static struct ipoib_path *path_rec_create(struct net_device *dev, void *gid)
if (!priv->broadcast)
return NULL;
- path = kzalloc(sizeof *path, GFP_ATOMIC);
+ path = kzalloc(sizeof(*path), GFP_ATOMIC);
if (!path)
return NULL;
@@ -1199,11 +1201,13 @@ static void ipoib_timeout(struct net_device *dev)
static int ipoib_hard_header(struct sk_buff *skb,
struct net_device *dev,
unsigned short type,
- const void *daddr, const void *saddr, unsigned len)
+ const void *daddr,
+ const void *saddr,
+ unsigned int len)
{
struct ipoib_header *header;
- header = skb_push(skb, sizeof *header);
+ header = skb_push(skb, sizeof(*header));
header->proto = htons(type);
header->reserved = 0;
@@ -1371,7 +1375,7 @@ static struct ipoib_neigh *ipoib_neigh_ctor(u8 *daddr,
{
struct ipoib_neigh *neigh;
- neigh = kzalloc(sizeof *neigh, GFP_ATOMIC);
+ neigh = kzalloc(sizeof(*neigh), GFP_ATOMIC);
if (!neigh)
return NULL;
@@ -1526,7 +1530,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
return -ENOMEM;
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
size = roundup_pow_of_two(arp_tbl.gc_thresh3);
- buckets = kcalloc(size, sizeof(*buckets), GFP_KERNEL);
+ buckets = kvcalloc(size, sizeof(*buckets), GFP_KERNEL);
if (!buckets) {
kfree(htbl);
return -ENOMEM;
@@ -1554,7 +1558,7 @@ static void neigh_hash_free_rcu(struct rcu_head *head)
struct ipoib_neigh __rcu **buckets = htbl->buckets;
struct ipoib_neigh_table *ntbl = htbl->ntbl;
- kfree(buckets);
+ kvfree(buckets);
kfree(htbl);
complete(&ntbl->deleted);
}
@@ -2287,9 +2291,9 @@ static struct net_device *ipoib_add_port(const char *format,
priv->dev->broadcast[8] = priv->pkey >> 8;
priv->dev->broadcast[9] = priv->pkey & 0xff;
- result = ib_query_gid(hca, port, 0, &priv->local_gid, NULL);
+ result = rdma_query_gid(hca, port, 0, &priv->local_gid);
if (result) {
- pr_warn("%s: ib_query_gid port %d failed (ret = %d)\n",
+ pr_warn("%s: rdma_query_gid port %d failed (ret = %d)\n",
hca->name, port, result);
goto device_init_failed;
}
@@ -2362,7 +2366,7 @@ static void ipoib_add_one(struct ib_device *device)
int p;
int count = 0;
- dev_list = kmalloc(sizeof *dev_list, GFP_KERNEL);
+ dev_list = kmalloc(sizeof(*dev_list), GFP_KERNEL);
if (!dev_list)
return;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 6709328d90f8..f696ea49c97a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -140,7 +140,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev,
{
struct ipoib_mcast *mcast;
- mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
+ mcast = kzalloc(sizeof(*mcast), can_sleep ? GFP_KERNEL : GFP_ATOMIC);
if (!mcast)
return NULL;
@@ -917,7 +917,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
continue;
- memcpy(mgid.raw, ha->addr + 4, sizeof mgid);
+ memcpy(mgid.raw, ha->addr + 4, sizeof(mgid));
mcast = __ipoib_mcast_find(dev, &mgid);
if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -997,7 +997,7 @@ struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct net_device *dev)
{
struct ipoib_mcast_iter *iter;
- iter = kmalloc(sizeof *iter, GFP_KERNEL);
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return NULL;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 984a88096f39..9f36ca786df8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -52,7 +52,7 @@ int ipoib_mcast_attach(struct net_device *dev, struct ib_device *hca,
if (set_qkey) {
ret = -ENOMEM;
- qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
+ qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
if (!qp_attr)
goto out;
@@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
.cap = {
.max_send_wr = ipoib_sendq_size,
.max_recv_wr = ipoib_recvq_size,
- .max_send_sge = min_t(u32, priv->ca->attrs.max_sge,
+ .max_send_sge = min_t(u32, priv->ca->attrs.max_send_sge,
MAX_SKB_FRAGS + 1),
.max_recv_sge = IPOIB_UD_RX_SG
},
@@ -168,8 +168,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
else
size += ipoib_recvq_size * ipoib_max_conn_qp;
} else
- if (ret != -ENOSYS)
- return -ENODEV;
+ if (ret != -EOPNOTSUPP)
+ return ret;
req_vec = (priv->port - 1) * 2;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 55a9b71ed05a..b067ad5e4c7e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -130,7 +130,7 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
return -EPERM;
- snprintf(intf_name, sizeof intf_name, "%s.%04x",
+ snprintf(intf_name, sizeof(intf_name), "%s.%04x",
ppriv->dev->name, pkey);
if (!mutex_trylock(&ppriv->sysfs_mutex))
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 9a6434c31db2..3fecd87c9f2b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -610,12 +610,10 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
uint32_t initial_cmdsn)
{
struct iscsi_cls_session *cls_session;
- struct iscsi_session *session;
struct Scsi_Host *shost;
struct iser_conn *iser_conn = NULL;
struct ib_conn *ib_conn;
u32 max_fr_sectors;
- u16 max_cmds;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
@@ -633,8 +631,8 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
*/
if (ep) {
iser_conn = ep->dd_data;
- max_cmds = iser_conn->max_cmds;
shost->sg_tablesize = iser_conn->scsi_sg_tablesize;
+ shost->can_queue = min_t(u16, cmds_max, iser_conn->max_cmds);
mutex_lock(&iser_conn->state_mutex);
if (iser_conn->state != ISER_CONN_UP) {
@@ -660,7 +658,7 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
}
mutex_unlock(&iser_conn->state_mutex);
} else {
- max_cmds = ISER_DEF_XMIT_CMDS_MAX;
+ shost->can_queue = min_t(u16, cmds_max, ISER_DEF_XMIT_CMDS_MAX);
if (iscsi_host_add(shost, NULL))
goto free_host;
}
@@ -676,21 +674,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
iser_warn("max_sectors was reduced from %u to %u\n",
iser_max_sectors, shost->max_sectors);
- if (cmds_max > max_cmds) {
- iser_info("cmds_max changed from %u to %u\n",
- cmds_max, max_cmds);
- cmds_max = max_cmds;
- }
-
cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
- cmds_max, 0,
+ shost->can_queue, 0,
sizeof(struct iscsi_iser_task),
initial_cmdsn, 0);
if (!cls_session)
goto remove_host;
- session = cls_session->dd_data;
- shost->can_queue = session->scsi_cmds_max;
return cls_session;
remove_host:
diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index cccbcf0eb035..7e056f3c82a0 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -136,7 +136,7 @@ isert_create_qp(struct isert_conn *isert_conn,
attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS + 1;
attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
attr.cap.max_rdma_ctxs = ISCSI_DEF_XMIT_CMDS_MAX;
- attr.cap.max_send_sge = device->ib_device->attrs.max_sge;
+ attr.cap.max_send_sge = device->ib_device->attrs.max_send_sge;
attr.cap.max_recv_sge = 1;
attr.sq_sig_type = IB_SIGNAL_REQ_WR;
attr.qp_type = IB_QPT_RC;
@@ -299,7 +299,8 @@ isert_create_device_ib_res(struct isert_device *device)
struct ib_device *ib_dev = device->ib_device;
int ret;
- isert_dbg("devattr->max_sge: %d\n", ib_dev->attrs.max_sge);
+ isert_dbg("devattr->max_send_sge: %d devattr->max_recv_sge %d\n",
+ ib_dev->attrs.max_send_sge, ib_dev->attrs.max_recv_sge);
isert_dbg("devattr->max_sge_rd: %d\n", ib_dev->attrs.max_sge_rd);
ret = isert_alloc_comps(device);
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 9786b24b956f..117dc1082e58 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -57,13 +57,10 @@
#define DRV_NAME "ib_srp"
#define PFX DRV_NAME ": "
-#define DRV_VERSION "2.0"
-#define DRV_RELDATE "July 26, 2015"
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_INFO(release_date, DRV_RELDATE);
#if !defined(CONFIG_DYNAMIC_DEBUG)
#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
@@ -3843,7 +3840,7 @@ static ssize_t srp_create_target(struct device *dev,
INIT_WORK(&target->tl_err_work, srp_tl_err_work);
INIT_WORK(&target->remove_work, srp_remove_work);
spin_lock_init(&target->lock);
- ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
+ ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
if (ret)
goto out;
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index 3081c629a7f7..8bd7373cb828 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -575,8 +575,7 @@ static int srpt_refresh_port(struct srpt_port *sport)
sport->sm_lid = port_attr.sm_lid;
sport->lid = port_attr.lid;
- ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid,
- NULL);
+ ret = rdma_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
if (ret)
goto err_query_port;
@@ -720,7 +719,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
WARN_ON(ioctx_size != sizeof(struct srpt_recv_ioctx)
&& ioctx_size != sizeof(struct srpt_send_ioctx));
- ring = kmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL);
+ ring = kvmalloc_array(ring_size, sizeof(ring[0]), GFP_KERNEL);
if (!ring)
goto out;
for (i = 0; i < ring_size; ++i) {
@@ -734,7 +733,7 @@ static struct srpt_ioctx **srpt_alloc_ioctx_ring(struct srpt_device *sdev,
err:
while (--i >= 0)
srpt_free_ioctx(sdev, ring[i], dma_size, dir);
- kfree(ring);
+ kvfree(ring);
ring = NULL;
out:
return ring;
@@ -759,7 +758,7 @@ static void srpt_free_ioctx_ring(struct srpt_ioctx **ioctx_ring,
for (i = 0; i < ring_size; ++i)
srpt_free_ioctx(sdev, ioctx_ring[i], dma_size, dir);
- kfree(ioctx_ring);
+ kvfree(ioctx_ring);
}
/**
@@ -1754,13 +1753,15 @@ retry:
*/
qp_init->cap.max_send_wr = min(sq_size / 2, attrs->max_qp_wr);
qp_init->cap.max_rdma_ctxs = sq_size / 2;
- qp_init->cap.max_send_sge = min(attrs->max_sge, SRPT_MAX_SG_PER_WQE);
+ qp_init->cap.max_send_sge = min(attrs->max_send_sge,
+ SRPT_MAX_SG_PER_WQE);
qp_init->port_num = ch->sport->port;
if (sdev->use_srq) {
qp_init->srq = sdev->srq;
} else {
qp_init->cap.max_recv_wr = ch->rq_size;
- qp_init->cap.max_recv_sge = qp_init->cap.max_send_sge;
+ qp_init->cap.max_recv_sge = min(attrs->max_recv_sge,
+ SRPT_MAX_SG_PER_WQE);
}
if (ch->using_rdma_cm) {
@@ -1833,8 +1834,7 @@ static bool srpt_close_ch(struct srpt_rdma_ch *ch)
int ret;
if (!srpt_set_ch_state(ch, CH_DRAINING)) {
- pr_debug("%s-%d: already closed\n", ch->sess_name,
- ch->qp->qp_num);
+ pr_debug("%s: already closed\n", ch->sess_name);
return false;
}
@@ -1940,8 +1940,8 @@ static void __srpt_close_all_ch(struct srpt_port *sport)
list_for_each_entry(nexus, &sport->nexus_list, entry) {
list_for_each_entry(ch, &nexus->ch_list, list) {
if (srpt_disconnect_ch(ch) >= 0)
- pr_info("Closing channel %s-%d because target %s_%d has been disabled\n",
- ch->sess_name, ch->qp->qp_num,
+ pr_info("Closing channel %s because target %s_%d has been disabled\n",
+ ch->sess_name,
sport->sdev->device->name, sport->port);
srpt_close_ch(ch);
}
@@ -2087,7 +2087,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
struct rdma_conn_param rdma_cm;
struct ib_cm_rep_param ib_cm;
} *rep_param = NULL;
- struct srpt_rdma_ch *ch;
+ struct srpt_rdma_ch *ch = NULL;
char i_port_id[36];
u32 it_iu_len;
int i, ret;
@@ -2234,13 +2234,15 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
TARGET_PROT_NORMAL,
i_port_id + 2, ch, NULL);
if (IS_ERR_OR_NULL(ch->sess)) {
+ WARN_ON_ONCE(ch->sess == NULL);
ret = PTR_ERR(ch->sess);
+ ch->sess = NULL;
pr_info("Rejected login for initiator %s: ret = %d.\n",
ch->sess_name, ret);
rej->reason = cpu_to_be32(ret == -ENOMEM ?
SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES :
SRP_LOGIN_REJ_CHANNEL_LIMIT_REACHED);
- goto reject;
+ goto destroy_ib;
}
mutex_lock(&sport->mutex);
@@ -2279,7 +2281,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev,
rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
pr_err("rejected SRP_LOGIN_REQ because enabling RTR failed (error code = %d)\n",
ret);
- goto destroy_ib;
+ goto reject;
}
pr_debug("Establish connection sess=%p name=%s ch=%p\n", ch->sess,
@@ -2358,8 +2360,11 @@ free_ring:
srpt_free_ioctx_ring((struct srpt_ioctx **)ch->ioctx_ring,
ch->sport->sdev, ch->rq_size,
ch->max_rsp_size, DMA_TO_DEVICE);
+
free_ch:
- if (ib_cm_id)
+ if (rdma_cm_id)
+ rdma_cm_id->context = NULL;
+ else
ib_cm_id->context = NULL;
kfree(ch);
ch = NULL;
@@ -2379,6 +2384,15 @@ reject:
ib_send_cm_rej(ib_cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
rej, sizeof(*rej));
+ if (ch && ch->sess) {
+ srpt_close_ch(ch);
+ /*
+ * Tell the caller not to free cm_id since
+ * srpt_release_channel_work() will do that.
+ */
+ ret = 0;
+ }
+
out:
kfree(rep_param);
kfree(rsp);
@@ -2969,7 +2983,8 @@ static void srpt_add_one(struct ib_device *device)
pr_debug("device = %p\n", device);
- sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
+ sdev = kzalloc(struct_size(sdev, port, device->phys_port_cnt),
+ GFP_KERNEL);
if (!sdev)
goto err;
@@ -3023,8 +3038,6 @@ static void srpt_add_one(struct ib_device *device)
srpt_event_handler);
ib_register_event_handler(&sdev->event_handler);
- WARN_ON(sdev->device->phys_port_cnt > ARRAY_SIZE(sdev->port));
-
for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
sport = &sdev->port[i - 1];
INIT_LIST_HEAD(&sport->nexus_list);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index 2361483476a0..444dfd7281b5 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -396,9 +396,9 @@ struct srpt_port {
* @sdev_mutex: Serializes use_srq changes.
* @use_srq: Whether or not to use SRQ.
* @ioctx_ring: Per-HCA SRQ.
- * @port: Information about the ports owned by this HCA.
* @event_handler: Per-HCA asynchronous IB event handler.
* @list: Node in srpt_dev_list.
+ * @port: Information about the ports owned by this HCA.
*/
struct srpt_device {
struct ib_device *device;
@@ -410,9 +410,9 @@ struct srpt_device {
struct mutex sdev_mutex;
bool use_srq;
struct srpt_recv_ioctx **ioctx_ring;
- struct srpt_port port[2];
struct ib_event_handler event_handler;
struct list_head list;
+ struct srpt_port port[];
};
#endif /* IB_SRPT_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 041c18faea46..381dbfa6a68e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -433,6 +433,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
case MLX5_CMD_OP_FPGA_QUERY_QP:
case MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS:
case MLX5_CMD_OP_CREATE_GENERAL_OBJECT:
+ case MLX5_CMD_OP_MODIFY_GENERAL_OBJECT:
+ case MLX5_CMD_OP_QUERY_GENERAL_OBJECT:
*status = MLX5_DRIVER_STATUS_ABORTED;
*synd = MLX5_DRIVER_SYND;
return -EIO;
@@ -612,6 +614,9 @@ const char *mlx5_command_str(int command)
MLX5_COMMAND_STR_CASE(ARM_XRQ);
MLX5_COMMAND_STR_CASE(CREATE_GENERAL_OBJECT);
MLX5_COMMAND_STR_CASE(DESTROY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(MODIFY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(QUERY_GENERAL_OBJECT);
+ MLX5_COMMAND_STR_CASE(QUERY_MODIFY_HEADER_CONTEXT);
default: return "unknown command opcode";
}
}
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index 52e0c5d579a7..0d7f3d603f1d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -874,7 +874,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
qp_attr.cap.max_rdma_ctxs = queue->send_queue_size;
qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
- ndev->device->attrs.max_sge);
+ ndev->device->attrs.max_send_sge);
if (ndev->srq) {
qp_attr.srq = ndev->srq;
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index e459c97151b3..c5a1cddd8856 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1661,9 +1661,16 @@ static struct smbd_connection *_smbd_get_connection(
info->max_receive_size = smbd_max_receive_size;
info->keep_alive_interval = smbd_keep_alive_interval;
- if (info->id->device->attrs.max_sge < SMBDIRECT_MAX_SGE) {
- log_rdma_event(ERR, "warning: device max_sge = %d too small\n",
- info->id->device->attrs.max_sge);
+ if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SGE) {
+ log_rdma_event(ERR,
+ "warning: device max_send_sge = %d too small\n",
+ info->id->device->attrs.max_send_sge);
+ log_rdma_event(ERR, "Queue Pair creation may fail\n");
+ }
+ if (info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_SGE) {
+ log_rdma_event(ERR,
+ "warning: device max_recv_sge = %d too small\n",
+ info->id->device->attrs.max_recv_sge);
log_rdma_event(ERR, "Queue Pair creation may fail\n");
}
diff --git a/include/linux/idr.h b/include/linux/idr.h
index e856f4e0ab35..3e8215b2c371 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -98,6 +98,17 @@ static inline void idr_set_cursor(struct idr *idr, unsigned int val)
* period).
*/
+#define idr_lock(idr) xa_lock(&(idr)->idr_rt)
+#define idr_unlock(idr) xa_unlock(&(idr)->idr_rt)
+#define idr_lock_bh(idr) xa_lock_bh(&(idr)->idr_rt)
+#define idr_unlock_bh(idr) xa_unlock_bh(&(idr)->idr_rt)
+#define idr_lock_irq(idr) xa_lock_irq(&(idr)->idr_rt)
+#define idr_unlock_irq(idr) xa_unlock_irq(&(idr)->idr_rt)
+#define idr_lock_irqsave(idr, flags) \
+ xa_lock_irqsave(&(idr)->idr_rt, flags)
+#define idr_unlock_irqrestore(idr, flags) \
+ xa_unlock_irqrestore(&(idr)->idr_rt, flags)
+
void idr_preload(gfp_t gfp_mask);
int idr_alloc(struct idr *, void *ptr, int start, int end, gfp_t);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c14b81559505..5e04e2053fd7 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -82,6 +82,7 @@ enum {
enum {
MLX5_OBJ_TYPE_UCTX = 0x0004,
+ MLX5_OBJ_TYPE_UMEM = 0x0005,
};
enum {
@@ -246,12 +247,15 @@ enum {
MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e,
MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940,
MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941,
+ MLX5_CMD_OP_QUERY_MODIFY_HEADER_CONTEXT = 0x942,
MLX5_CMD_OP_FPGA_CREATE_QP = 0x960,
MLX5_CMD_OP_FPGA_MODIFY_QP = 0x961,
MLX5_CMD_OP_FPGA_QUERY_QP = 0x962,
MLX5_CMD_OP_FPGA_DESTROY_QP = 0x963,
MLX5_CMD_OP_FPGA_QUERY_QP_COUNTERS = 0x964,
MLX5_CMD_OP_CREATE_GENERAL_OBJECT = 0xa00,
+ MLX5_CMD_OP_MODIFY_GENERAL_OBJECT = 0xa01,
+ MLX5_CMD_OP_QUERY_GENERAL_OBJECT = 0xa02,
MLX5_CMD_OP_DESTROY_GENERAL_OBJECT = 0xa03,
MLX5_CMD_OP_MAX
};
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index c2c8b1fdeead..715394f6d18a 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -58,6 +58,7 @@
* @bound_dev_if: An optional device interface index.
* @transport: The transport type used.
* @net: Network namespace containing the bound_dev_if net_dev.
+ * @sgid_attr: GID attribute to use for identified SGID
*/
struct rdma_dev_addr {
unsigned char src_dev_addr[MAX_ADDR_LEN];
@@ -67,6 +68,7 @@ struct rdma_dev_addr {
int bound_dev_if;
enum rdma_transport_type transport;
struct net *net;
+ const struct ib_gid_attr *sgid_attr;
enum rdma_network_type network;
int hoplimit;
};
diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h
index a5f249828115..1108d4220276 100644
--- a/include/rdma/ib_cache.h
+++ b/include/rdma/ib_cache.h
@@ -37,45 +37,23 @@
#include <rdma/ib_verbs.h>
-/**
- * ib_get_cached_gid - Returns a cached GID table entry
- * @device: The device to query.
- * @port_num: The port number of the device to query.
- * @index: The index into the cached GID table to query.
- * @gid: The GID value found at the specified index.
- * @attr: The GID attribute found at the specified index (only in RoCE).
- * NULL means ignore (output parameter).
- *
- * ib_get_cached_gid() fetches the specified GID table entry stored in
- * the local software cache.
- */
-int ib_get_cached_gid(struct ib_device *device,
- u8 port_num,
- int index,
- union ib_gid *gid,
- struct ib_gid_attr *attr);
-
-int ib_find_cached_gid(struct ib_device *device,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- struct net_device *ndev,
- u8 *port_num,
- u16 *index);
-
-int ib_find_cached_gid_by_port(struct ib_device *device,
- const union ib_gid *gid,
- enum ib_gid_type gid_type,
- u8 port_num,
- struct net_device *ndev,
- u16 *index);
+int rdma_query_gid(struct ib_device *device, u8 port_num, int index,
+ union ib_gid *gid);
+const struct ib_gid_attr *rdma_find_gid(struct ib_device *device,
+ const union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ struct net_device *ndev);
+const struct ib_gid_attr *rdma_find_gid_by_port(struct ib_device *ib_dev,
+ const union ib_gid *gid,
+ enum ib_gid_type gid_type,
+ u8 port,
+ struct net_device *ndev);
+const struct ib_gid_attr *rdma_find_gid_by_filter(
+ struct ib_device *device, const union ib_gid *gid, u8 port_num,
+ bool (*filter)(const union ib_gid *gid, const struct ib_gid_attr *,
+ void *),
+ void *context);
-int ib_find_gid_by_filter(struct ib_device *device,
- const union ib_gid *gid,
- u8 port_num,
- bool (*filter)(const union ib_gid *gid,
- const struct ib_gid_attr *,
- void *),
- void *context, u16 *index);
/**
* ib_get_cached_pkey - Returns a cached PKey table entry
* @device: The device to query.
@@ -150,4 +128,8 @@ int ib_get_cached_port_state(struct ib_device *device,
enum ib_port_state *port_active);
bool rdma_is_zero_gid(const union ib_gid *gid);
+const struct ib_gid_attr *rdma_get_gid_attr(struct ib_device *device,
+ u8 port_num, int index);
+void rdma_put_gid_attr(const struct ib_gid_attr *attr);
+void rdma_hold_gid_attr(const struct ib_gid_attr *attr);
#endif /* _IB_CACHE_H */
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 7979cb04f529..c98d603c0b63 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -246,6 +246,7 @@ struct ib_cm_sidr_rep_event_param {
u32 qkey;
u32 qpn;
void *info;
+ const struct ib_gid_attr *sgid_attr;
u8 info_len;
};
@@ -365,6 +366,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
struct ib_cm_req_param {
struct sa_path_rec *primary_path;
struct sa_path_rec *alternate_path;
+ const struct ib_gid_attr *ppath_sgid_attr;
__be64 service_id;
u32 qp_num;
enum ib_qp_type qp_type;
@@ -566,6 +568,7 @@ int ib_send_cm_apr(struct ib_cm_id *cm_id,
struct ib_cm_sidr_req_param {
struct sa_path_rec *path;
+ const struct ib_gid_attr *sgid_attr;
__be64 service_id;
int timeout_ms;
const void *private_data;
diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
index 2f4f1768ded4..f6ba366051c7 100644
--- a/include/rdma/ib_mad.h
+++ b/include/rdma/ib_mad.h
@@ -262,6 +262,39 @@ struct ib_class_port_info {
__be32 trap_qkey;
};
+/* PortInfo CapabilityMask */
+enum ib_port_capability_mask_bits {
+ IB_PORT_SM = 1 << 1,
+ IB_PORT_NOTICE_SUP = 1 << 2,
+ IB_PORT_TRAP_SUP = 1 << 3,
+ IB_PORT_OPT_IPD_SUP = 1 << 4,
+ IB_PORT_AUTO_MIGR_SUP = 1 << 5,
+ IB_PORT_SL_MAP_SUP = 1 << 6,
+ IB_PORT_MKEY_NVRAM = 1 << 7,
+ IB_PORT_PKEY_NVRAM = 1 << 8,
+ IB_PORT_LED_INFO_SUP = 1 << 9,
+ IB_PORT_SM_DISABLED = 1 << 10,
+ IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
+ IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
+ IB_PORT_CM_SUP = 1 << 16,
+ IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
+ IB_PORT_REINIT_SUP = 1 << 18,
+ IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
+ IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
+ IB_PORT_DR_NOTICE_SUP = 1 << 21,
+ IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
+ IB_PORT_BOOT_MGMT_SUP = 1 << 23,
+ IB_PORT_LINK_LATENCY_SUP = 1 << 24,
+ IB_PORT_CLIENT_REG_SUP = 1 << 25,
+ IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26,
+ IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
+ IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
+ IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
+ IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30,
+ IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
+};
+
#define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
struct opa_class_port_info {
diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h
index bacb144f7780..b6ddf2a1b9d8 100644
--- a/include/rdma/ib_sa.h
+++ b/include/rdma/ib_sa.h
@@ -172,12 +172,7 @@ struct sa_path_rec_ib {
*/
struct sa_path_rec_roce {
bool route_resolved;
- u8 dmac[ETH_ALEN];
- /* ignored in IB */
- int ifindex;
- /* ignored in IB */
- struct net *net;
-
+ u8 dmac[ETH_ALEN];
};
struct sa_path_rec_opa {
@@ -556,13 +551,10 @@ int ib_init_ah_from_mcmember(struct ib_device *device, u8 port_num,
enum ib_gid_type gid_type,
struct rdma_ah_attr *ah_attr);
-/**
- * ib_init_ah_attr_from_path - Initialize address handle attributes based on
- * an SA path record.
- */
int ib_init_ah_attr_from_path(struct ib_device *device, u8 port_num,
struct sa_path_rec *rec,
- struct rdma_ah_attr *ah_attr);
+ struct rdma_ah_attr *ah_attr,
+ const struct ib_gid_attr *sgid_attr);
/**
* ib_sa_pack_path - Conert a path record from struct ib_sa_path_rec
@@ -667,45 +659,10 @@ static inline void sa_path_set_dmac_zero(struct sa_path_rec *rec)
eth_zero_addr(rec->roce.dmac);
}
-static inline void sa_path_set_ifindex(struct sa_path_rec *rec, int ifindex)
-{
- if (sa_path_is_roce(rec))
- rec->roce.ifindex = ifindex;
-}
-
-static inline void sa_path_set_ndev(struct sa_path_rec *rec, struct net *net)
-{
- if (sa_path_is_roce(rec))
- rec->roce.net = net;
-}
-
static inline u8 *sa_path_get_dmac(struct sa_path_rec *rec)
{
if (sa_path_is_roce(rec))
return rec->roce.dmac;
return NULL;
}
-
-static inline int sa_path_get_ifindex(struct sa_path_rec *rec)
-{
- if (sa_path_is_roce(rec))
- return rec->roce.ifindex;
- return 0;
-}
-
-static inline struct net *sa_path_get_ndev(struct sa_path_rec *rec)
-{
- if (sa_path_is_roce(rec))
- return rec->roce.net;
- return NULL;
-}
-
-static inline struct net_device *ib_get_ndev_from_path(struct sa_path_rec *rec)
-{
- return sa_path_get_ndev(rec) ?
- dev_get_by_index(sa_path_get_ndev(rec),
- sa_path_get_ifindex(rec))
- : NULL;
-}
-
#endif /* IB_SA_H */
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 4c6241bc2039..08348e53082c 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -94,6 +94,7 @@ enum ib_gid_type {
struct ib_gid_attr {
struct net_device *ndev;
struct ib_device *device;
+ union ib_gid gid;
enum ib_gid_type gid_type;
u16 index;
u8 port_num;
@@ -148,13 +149,13 @@ static inline enum ib_gid_type ib_network_to_gid_type(enum rdma_network_type net
return IB_GID_TYPE_IB;
}
-static inline enum rdma_network_type ib_gid_to_network_type(enum ib_gid_type gid_type,
- union ib_gid *gid)
+static inline enum rdma_network_type
+rdma_gid_attr_network_type(const struct ib_gid_attr *attr)
{
- if (gid_type == IB_GID_TYPE_IB)
+ if (attr->gid_type == IB_GID_TYPE_IB)
return RDMA_NETWORK_IB;
- if (ipv6_addr_v4mapped((struct in6_addr *)gid))
+ if (ipv6_addr_v4mapped((struct in6_addr *)&attr->gid))
return RDMA_NETWORK_IPV4;
else
return RDMA_NETWORK_IPV6;
@@ -344,7 +345,8 @@ struct ib_device_attr {
int max_qp;
int max_qp_wr;
u64 device_cap_flags;
- int max_sge;
+ int max_send_sge;
+ int max_recv_sge;
int max_sge_rd;
int max_cq;
int max_cqe;
@@ -430,33 +432,6 @@ enum ib_port_state {
IB_PORT_ACTIVE_DEFER = 5
};
-enum ib_port_cap_flags {
- IB_PORT_SM = 1 << 1,
- IB_PORT_NOTICE_SUP = 1 << 2,
- IB_PORT_TRAP_SUP = 1 << 3,
- IB_PORT_OPT_IPD_SUP = 1 << 4,
- IB_PORT_AUTO_MIGR_SUP = 1 << 5,
- IB_PORT_SL_MAP_SUP = 1 << 6,
- IB_PORT_MKEY_NVRAM = 1 << 7,
- IB_PORT_PKEY_NVRAM = 1 << 8,
- IB_PORT_LED_INFO_SUP = 1 << 9,
- IB_PORT_SM_DISABLED = 1 << 10,
- IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
- IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
- IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
- IB_PORT_CM_SUP = 1 << 16,
- IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
- IB_PORT_REINIT_SUP = 1 << 18,
- IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
- IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
- IB_PORT_DR_NOTICE_SUP = 1 << 21,
- IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
- IB_PORT_BOOT_MGMT_SUP = 1 << 23,
- IB_PORT_LINK_LATENCY_SUP = 1 << 24,
- IB_PORT_CLIENT_REG_SUP = 1 << 25,
- IB_PORT_IP_BASED_GIDS = 1 << 26,
-};
-
enum ib_port_width {
IB_WIDTH_1X = 1,
IB_WIDTH_4X = 2,
@@ -554,6 +529,7 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_CAP_AF_IB 0x00001000
#define RDMA_CORE_CAP_ETH_AH 0x00002000
#define RDMA_CORE_CAP_OPA_AH 0x00004000
+#define RDMA_CORE_CAP_IB_GRH_REQUIRED 0x00008000
/* Protocol 0xFFF00000 */
#define RDMA_CORE_CAP_PROT_IB 0x00100000
@@ -563,6 +539,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
#define RDMA_CORE_CAP_PROT_RAW_PACKET 0x01000000
#define RDMA_CORE_CAP_PROT_USNIC 0x02000000
+#define RDMA_CORE_PORT_IB_GRH_REQUIRED (RDMA_CORE_CAP_IB_GRH_REQUIRED \
+ | RDMA_CORE_CAP_PROT_ROCE \
+ | RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP)
+
#define RDMA_CORE_PORT_IBA_IB (RDMA_CORE_CAP_PROT_IB \
| RDMA_CORE_CAP_IB_MAD \
| RDMA_CORE_CAP_IB_SMI \
@@ -595,6 +575,8 @@ struct ib_port_attr {
enum ib_mtu max_mtu;
enum ib_mtu active_mtu;
int gid_tbl_len;
+ unsigned int ip_gids:1;
+ /* This is the value from PortInfo CapabilityMask, defined by IBA */
u32 port_cap_flags;
u32 max_msg_sz;
u32 bad_pkey_cntr;
@@ -610,7 +592,6 @@ struct ib_port_attr {
u8 active_width;
u8 active_speed;
u8 phys_state;
- bool grh_required;
};
enum ib_device_modify_flags {
@@ -689,6 +670,7 @@ struct ib_event_handler {
} while (0)
struct ib_global_route {
+ const struct ib_gid_attr *sgid_attr;
union ib_gid dgid;
u32 flow_label;
u8 sgid_index;
@@ -1443,14 +1425,16 @@ struct ib_recv_wr {
};
enum ib_access_flags {
- IB_ACCESS_LOCAL_WRITE = 1,
- IB_ACCESS_REMOTE_WRITE = (1<<1),
- IB_ACCESS_REMOTE_READ = (1<<2),
- IB_ACCESS_REMOTE_ATOMIC = (1<<3),
- IB_ACCESS_MW_BIND = (1<<4),
- IB_ZERO_BASED = (1<<5),
- IB_ACCESS_ON_DEMAND = (1<<6),
- IB_ACCESS_HUGETLB = (1<<7),
+ IB_ACCESS_LOCAL_WRITE = IB_UVERBS_ACCESS_LOCAL_WRITE,
+ IB_ACCESS_REMOTE_WRITE = IB_UVERBS_ACCESS_REMOTE_WRITE,
+ IB_ACCESS_REMOTE_READ = IB_UVERBS_ACCESS_REMOTE_READ,
+ IB_ACCESS_REMOTE_ATOMIC = IB_UVERBS_ACCESS_REMOTE_ATOMIC,
+ IB_ACCESS_MW_BIND = IB_UVERBS_ACCESS_MW_BIND,
+ IB_ZERO_BASED = IB_UVERBS_ACCESS_ZERO_BASED,
+ IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
+ IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
+
+ IB_ACCESS_SUPPORTED = ((IB_ACCESS_HUGETLB << 1) - 1)
};
/*
@@ -1473,7 +1457,10 @@ struct ib_fmr_attr {
struct ib_umem;
enum rdma_remove_reason {
- /* Userspace requested uobject deletion. Call could fail */
+ /*
+ * Userspace requested uobject deletion or initial try
+ * to remove uobject via cleanup. Call could fail
+ */
RDMA_REMOVE_DESTROY,
/* Context deletion. This call should delete the actual object itself */
RDMA_REMOVE_CLOSE,
@@ -1494,12 +1481,7 @@ struct ib_ucontext {
struct ib_uverbs_file *ufile;
int closing;
- /* locking the uobjects_list */
- struct mutex uobjects_lock;
- struct list_head uobjects;
- /* protects cleanup process from other actions */
- struct rw_semaphore cleanup_rwsem;
- enum rdma_remove_reason cleanup_reason;
+ bool cleanup_retryable;
struct pid *tgid;
#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
@@ -1524,6 +1506,9 @@ struct ib_ucontext {
struct ib_uobject {
u64 user_handle; /* handle given to us by userspace */
+ /* ufile & ucontext owning this object */
+ struct ib_uverbs_file *ufile;
+ /* FIXME, save memory: ufile->context == context */
struct ib_ucontext *context; /* associated user context */
void *object; /* containing object */
struct list_head list; /* link to context's list */
@@ -1536,12 +1521,6 @@ struct ib_uobject {
const struct uverbs_obj_type *type;
};
-struct ib_uobject_file {
- struct ib_uobject uobj;
- /* ufile contains the lock between context release and file close */
- struct ib_uverbs_file *ufile;
-};
-
struct ib_udata {
const void __user *inbuf;
void __user *outbuf;
@@ -1578,6 +1557,7 @@ struct ib_ah {
struct ib_device *device;
struct ib_pd *pd;
struct ib_uobject *uobject;
+ const struct ib_gid_attr *sgid_attr;
enum rdma_ah_attr_type type;
};
@@ -1776,6 +1756,9 @@ struct ib_qp {
struct ib_uobject *uobject;
void (*event_handler)(struct ib_event *, void *);
void *qp_context;
+ /* sgid_attrs associated with the AV's */
+ const struct ib_gid_attr *av_sgid_attr;
+ const struct ib_gid_attr *alt_path_sgid_attr;
u32 qp_num;
u32 max_write_sge;
u32 max_read_sge;
@@ -2242,11 +2225,6 @@ struct ib_counters {
atomic_t usecnt;
};
-enum ib_read_counters_flags {
- /* prefer read values from driver cache */
- IB_READ_COUNTERS_ATTR_PREFER_CACHED = 1 << 0,
-};
-
struct ib_counters_read_attr {
u64 *counters_buff;
u32 ncounters;
@@ -2341,8 +2319,7 @@ struct ib_device {
* concurrently for different ports. This function is only called when
* roce_gid_table is used.
*/
- int (*add_gid)(const union ib_gid *gid,
- const struct ib_gid_attr *attr,
+ int (*add_gid)(const struct ib_gid_attr *attr,
void **context);
/* When calling del_gid, the HW vendor's driver should delete the
* gid of device @device at gid index gid_index of port port_num
@@ -2592,7 +2569,7 @@ struct ib_device {
const struct cpumask *(*get_vector_affinity)(struct ib_device *ibdev,
int comp_vector);
- struct uverbs_root_spec *specs_root;
+ struct uverbs_root_spec *driver_specs_root;
enum rdma_driver_id driver_id;
};
@@ -2679,6 +2656,46 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata,
}
/**
+ * ib_is_destroy_retryable - Check whether the uobject destruction
+ * is retryable.
+ * @ret: The initial destruction return code
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * This function is a helper function that IB layer and low-level drivers
+ * can use to consider whether the destruction of the given uobject is
+ * retry-able.
+ * It checks the original return code, if it wasn't success the destruction
+ * is retryable according to the ucontext state (i.e. cleanup_retryable) and
+ * the remove reason. (i.e. why).
+ * Must be called with the object locked for destroy.
+ */
+static inline bool ib_is_destroy_retryable(int ret, enum rdma_remove_reason why,
+ struct ib_uobject *uobj)
+{
+ return ret && (why == RDMA_REMOVE_DESTROY ||
+ uobj->context->cleanup_retryable);
+}
+
+/**
+ * ib_destroy_usecnt - Called during destruction to check the usecnt
+ * @usecnt: The usecnt atomic
+ * @why: remove reason
+ * @uobj: The uobject that is destroyed
+ *
+ * Non-zero usecnts will block destruction unless destruction was triggered by
+ * a ucontext cleanup.
+ */
+static inline int ib_destroy_usecnt(atomic_t *usecnt,
+ enum rdma_remove_reason why,
+ struct ib_uobject *uobj)
+{
+ if (atomic_read(usecnt) && ib_is_destroy_retryable(-EBUSY, why, uobj))
+ return -EBUSY;
+ return 0;
+}
+
+/**
* ib_modify_qp_is_ok - Check that the supplied attribute mask
* contains all required attributes and no attributes not allowed for
* the given QP state transition.
@@ -2755,6 +2772,13 @@ static inline int rdma_is_port_valid(const struct ib_device *device,
port <= rdma_end_port(device));
}
+static inline bool rdma_is_grh_required(const struct ib_device *device,
+ u8 port_num)
+{
+ return device->port_immutable[port_num].core_cap_flags &
+ RDMA_CORE_PORT_IB_GRH_REQUIRED;
+}
+
static inline bool rdma_protocol_ib(const struct ib_device *device, u8 port_num)
{
return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_IB;
@@ -3046,10 +3070,6 @@ static inline bool rdma_cap_read_inv(struct ib_device *dev, u32 port_num)
return rdma_protocol_iwarp(dev, port_num);
}
-int ib_query_gid(struct ib_device *device,
- u8 port_num, int index, union ib_gid *gid,
- struct ib_gid_attr *attr);
-
int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
int state);
int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
@@ -3148,6 +3168,13 @@ int ib_get_rdma_header_version(const union rdma_network_hdr *hdr);
* ignored unless the work completion indicates that the GRH is valid.
* @ah_attr: Returned attributes that can be used when creating an address
* handle for replying to the message.
+ * When ib_init_ah_attr_from_wc() returns success,
+ * (a) for IB link layer it optionally contains a reference to SGID attribute
+ * when GRH is present for IB link layer.
+ * (b) for RoCE link layer it contains a reference to SGID attribute.
+ * User must invoke rdma_cleanup_ah_attr_gid_attr() to release reference to SGID
+ * attributes which are initialized using ib_init_ah_attr_from_wc().
+ *
*/
int ib_init_ah_attr_from_wc(struct ib_device *device, u8 port_num,
const struct ib_wc *wc, const struct ib_grh *grh,
@@ -3798,10 +3825,6 @@ struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller);
*/
int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
-struct ib_flow *ib_create_flow(struct ib_qp *qp,
- struct ib_flow_attr *flow_attr, int domain);
-int ib_destroy_flow(struct ib_flow *flow_id);
-
static inline int ib_check_mr_access(int flags)
{
/*
@@ -4030,8 +4053,19 @@ static inline void rdma_ah_set_grh(struct rdma_ah_attr *attr,
grh->sgid_index = sgid_index;
grh->hop_limit = hop_limit;
grh->traffic_class = traffic_class;
+ grh->sgid_attr = NULL;
}
+void rdma_destroy_ah_attr(struct rdma_ah_attr *ah_attr);
+void rdma_move_grh_sgid_attr(struct rdma_ah_attr *attr, union ib_gid *dgid,
+ u32 flow_label, u8 hop_limit, u8 traffic_class,
+ const struct ib_gid_attr *sgid_attr);
+void rdma_copy_ah_attr(struct rdma_ah_attr *dest,
+ const struct rdma_ah_attr *src);
+void rdma_replace_ah_attr(struct rdma_ah_attr *old,
+ const struct rdma_ah_attr *new);
+void rdma_move_ah_attr(struct rdma_ah_attr *dest, struct rdma_ah_attr *src);
+
/**
* rdma_ah_find_type - Return address handle type.
*
@@ -4107,4 +4141,9 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector)
*/
void rdma_roce_rescan_device(struct ib_device *ibdev);
+struct ib_ucontext *ib_uverbs_get_ucontext(struct ib_uverbs_file *ufile);
+
+int uverbs_destroy_def_handler(struct ib_device *ib_dev,
+ struct ib_uverbs_file *file,
+ struct uverbs_attr_bundle *attrs);
#endif /* IB_VERBS_H */
diff --git a/include/rdma/opa_addr.h b/include/rdma/opa_addr.h
index 2bbb7a67e643..66d4393d339c 100644
--- a/include/rdma/opa_addr.h
+++ b/include/rdma/opa_addr.h
@@ -120,7 +120,7 @@ static inline bool rdma_is_valid_unicast_lid(struct rdma_ah_attr *attr)
if (attr->type == RDMA_AH_ATTR_TYPE_IB) {
if (!rdma_ah_get_dlid(attr) ||
rdma_ah_get_dlid(attr) >=
- be32_to_cpu(IB_MULTICAST_LID_BASE))
+ be16_to_cpu(IB_MULTICAST_LID_BASE))
return false;
} else if (attr->type == RDMA_AH_ATTR_TYPE_OPA) {
if (!rdma_ah_get_dlid(attr) ||
diff --git a/include/rdma/rdmavt_qp.h b/include/rdma/rdmavt_qp.h
index 1145a4c154b2..927f6d5b6d0f 100644
--- a/include/rdma/rdmavt_qp.h
+++ b/include/rdma/rdmavt_qp.h
@@ -2,7 +2,7 @@
#define DEF_RDMAVT_INCQP_H
/*
- * Copyright(c) 2016, 2017 Intel Corporation.
+ * Copyright(c) 2016 - 2018 Intel Corporation.
*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
@@ -91,6 +91,7 @@
* RVT_S_WAIT_ACK - waiting for an ACK packet before sending more requests
* RVT_S_SEND_ONE - send one packet, request ACK, then wait for ACK
* RVT_S_ECN - a BECN was queued to the send engine
+ * RVT_S_MAX_BIT_MASK - The max bit that can be used by rdmavt
*/
#define RVT_S_SIGNAL_REQ_WR 0x0001
#define RVT_S_BUSY 0x0002
@@ -103,23 +104,26 @@
#define RVT_S_WAIT_SSN_CREDIT 0x0100
#define RVT_S_WAIT_DMA 0x0200
#define RVT_S_WAIT_PIO 0x0400
-#define RVT_S_WAIT_PIO_DRAIN 0x0800
-#define RVT_S_WAIT_TX 0x1000
-#define RVT_S_WAIT_DMA_DESC 0x2000
-#define RVT_S_WAIT_KMEM 0x4000
-#define RVT_S_WAIT_PSN 0x8000
-#define RVT_S_WAIT_ACK 0x10000
-#define RVT_S_SEND_ONE 0x20000
-#define RVT_S_UNLIMITED_CREDIT 0x40000
-#define RVT_S_AHG_VALID 0x80000
-#define RVT_S_AHG_CLEAR 0x100000
-#define RVT_S_ECN 0x200000
+#define RVT_S_WAIT_TX 0x0800
+#define RVT_S_WAIT_DMA_DESC 0x1000
+#define RVT_S_WAIT_KMEM 0x2000
+#define RVT_S_WAIT_PSN 0x4000
+#define RVT_S_WAIT_ACK 0x8000
+#define RVT_S_SEND_ONE 0x10000
+#define RVT_S_UNLIMITED_CREDIT 0x20000
+#define RVT_S_ECN 0x40000
+#define RVT_S_MAX_BIT_MASK 0x800000
+
+/*
+ * Drivers should use s_flags starting with bit 31 down to the bit next to
+ * RVT_S_MAX_BIT_MASK
+ */
/*
* Wait flags that would prevent any packet type from being sent.
*/
#define RVT_S_ANY_WAIT_IO \
- (RVT_S_WAIT_PIO | RVT_S_WAIT_PIO_DRAIN | RVT_S_WAIT_TX | \
+ (RVT_S_WAIT_PIO | RVT_S_WAIT_TX | \
RVT_S_WAIT_DMA_DESC | RVT_S_WAIT_KMEM)
/*
diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h
index bd6bba3a6e04..017ccf75890c 100644
--- a/include/rdma/uverbs_ioctl.h
+++ b/include/rdma/uverbs_ioctl.h
@@ -61,54 +61,57 @@ enum uverbs_obj_access {
UVERBS_ACCESS_DESTROY
};
-enum {
- UVERBS_ATTR_SPEC_F_MANDATORY = 1U << 0,
- /* Support extending attributes by length, validate all unknown size == zero */
- UVERBS_ATTR_SPEC_F_MIN_SZ_OR_ZERO = 1U << 1,
-};
-
/* Specification of a single attribute inside the ioctl message */
+/* good size 16 */
struct uverbs_attr_spec {
+ u8 type;
+
+ /*
+ * Support extending attributes by length. Allow the user to provide
+ * more bytes than ptr.len, but check that everything after is zero'd
+ * by the user.
+ */
+ u8 zero_trailing:1;
+ /*
+ * Valid only for PTR_IN. Allocate and copy the data inside
+ * the parser
+ */
+ u8 alloc_and_copy:1;
+ u8 mandatory:1;
+
union {
- /* Header shared by all following union members - to reduce space. */
- struct {
- enum uverbs_attr_type type;
- /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
- u8 flags;
- };
struct {
- enum uverbs_attr_type type;
- /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
- u8 flags;
/* Current known size to kernel */
- u16 len;
+ u16 len;
/* User isn't allowed to provide something < min_len */
- u16 min_len;
+ u16 min_len;
} ptr;
+
struct {
- enum uverbs_attr_type type;
- /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
- u8 flags;
/*
* higher bits mean the namespace and lower bits mean
* the type id within the namespace.
*/
- u16 obj_type;
- u8 access;
+ u16 obj_type;
+ u8 access;
} obj;
+
+ struct {
+ u8 num_elems;
+ } enum_def;
+ } u;
+
+ /* This weird split of the enum lets us remove some padding */
+ union {
struct {
- enum uverbs_attr_type type;
- /* Combination of bits from enum UVERBS_ATTR_SPEC_F_XXXX */
- u8 flags;
- u8 num_elems;
/*
* The enum attribute can select one of the attributes
* contained in the ids array. Currently only PTR_IN
* attributes are supported in the ids array.
*/
- const struct uverbs_attr_spec *ids;
+ const struct uverbs_attr_spec *ids;
} enum_def;
- };
+ } u2;
};
struct uverbs_attr_spec_hash {
@@ -192,130 +195,112 @@ struct uverbs_object_tree_def {
const struct uverbs_object_def * const (*objects)[];
};
-#define UA_FLAGS(_flags) .flags = _flags
-#define __UVERBS_ATTR0(_id, _type, _fld, _attr, ...) \
- ((const struct uverbs_attr_def) \
- {.id = _id, .attr = {{._fld = {.type = _type, _attr, .flags = 0, } }, } })
-#define __UVERBS_ATTR1(_id, _type, _fld, _attr, _extra1, ...) \
- ((const struct uverbs_attr_def) \
- {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1 } },} })
-#define __UVERBS_ATTR2(_id, _type, _fld, _attr, _extra1, _extra2) \
- ((const struct uverbs_attr_def) \
- {.id = _id, .attr = {{._fld = {.type = _type, _attr, _extra1, _extra2 } },} })
-#define __UVERBS_ATTR(_id, _type, _fld, _attr, _extra1, _extra2, _n, ...) \
- __UVERBS_ATTR##_n(_id, _type, _fld, _attr, _extra1, _extra2)
+/*
+ * =======================================
+ * Attribute Specifications
+ * =======================================
+ */
-#define UVERBS_ATTR_TYPE(_type) \
- .min_len = sizeof(_type), .len = sizeof(_type)
-#define UVERBS_ATTR_STRUCT(_type, _last) \
- .min_len = ((uintptr_t)(&((_type *)0)->_last + 1)), .len = sizeof(_type)
#define UVERBS_ATTR_SIZE(_min_len, _len) \
- .min_len = _min_len, .len = _len
+ .u.ptr.min_len = _min_len, .u.ptr.len = _len
+
+/*
+ * Specifies a uapi structure that cannot be extended. The user must always
+ * supply the whole structure and nothing more. The structure must be declared
+ * in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_TYPE(_type) \
+ .u.ptr.min_len = sizeof(_type), .u.ptr.len = sizeof(_type)
+/*
+ * Specifies a uapi structure where the user must provide at least up to
+ * member 'last'. Anything after last and up until the end of the structure
+ * can be non-zero, anything longer than the end of the structure must be
+ * zero. The structure must be declared in a header under include/uapi/rdma.
+ */
+#define UVERBS_ATTR_STRUCT(_type, _last) \
+ .zero_trailing = 1, \
+ UVERBS_ATTR_SIZE(((uintptr_t)(&((_type *)0)->_last + 1)), \
+ sizeof(_type))
+/*
+ * Specifies at least min_len bytes must be passed in, but the amount can be
+ * larger, up to the protocol maximum size. No check for zeroing is done.
+ */
+#define UVERBS_ATTR_MIN_SIZE(_min_len) UVERBS_ATTR_SIZE(_min_len, USHRT_MAX)
+
+/* Must be used in the '...' of any UVERBS_ATTR */
+#define UA_ALLOC_AND_COPY .alloc_and_copy = 1
+#define UA_MANDATORY .mandatory = 1
+#define UA_OPTIONAL .mandatory = 0
+
+#define UVERBS_ATTR_IDR(_attr_id, _idr_type, _access, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = _attr_id, \
+ .attr = { .type = UVERBS_ATTR_TYPE_IDR, \
+ .u.obj.obj_type = _idr_type, \
+ .u.obj.access = _access, \
+ __VA_ARGS__ } })
+
+#define UVERBS_ATTR_FD(_attr_id, _fd_type, _access, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = (_attr_id) + \
+ BUILD_BUG_ON_ZERO((_access) != UVERBS_ACCESS_NEW && \
+ (_access) != UVERBS_ACCESS_READ), \
+ .attr = { .type = UVERBS_ATTR_TYPE_FD, \
+ .u.obj.obj_type = _fd_type, \
+ .u.obj.access = _access, \
+ __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = _attr_id, \
+ .attr = { .type = UVERBS_ATTR_TYPE_PTR_IN, \
+ _type, \
+ __VA_ARGS__ } })
+
+#define UVERBS_ATTR_PTR_OUT(_attr_id, _type, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = _attr_id, \
+ .attr = { .type = UVERBS_ATTR_TYPE_PTR_OUT, \
+ _type, \
+ __VA_ARGS__ } })
+
+/* _enum_arry should be a 'static const union uverbs_attr_spec[]' */
+#define UVERBS_ATTR_ENUM_IN(_attr_id, _enum_arr, ...) \
+ (&(const struct uverbs_attr_def){ \
+ .id = _attr_id, \
+ .attr = { .type = UVERBS_ATTR_TYPE_ENUM_IN, \
+ .u2.enum_def.ids = _enum_arr, \
+ .u.enum_def.num_elems = ARRAY_SIZE(_enum_arr), \
+ __VA_ARGS__ }, \
+ })
/*
- * In new compiler, UVERBS_ATTR could be simplified by declaring it as
- * [_id] = {.type = _type, .len = _len, ##__VA_ARGS__}
- * But since we support older compilers too, we need the more complex code.
+ * This spec is used in order to pass information to the hardware driver in a
+ * legacy way. Every verb that could get driver specific data should get this
+ * spec.
*/
-#define UVERBS_ATTR(_id, _type, _fld, _attr, ...) \
- __UVERBS_ATTR(_id, _type, _fld, _attr, ##__VA_ARGS__, 2, 1, 0)
-#define UVERBS_ATTR_PTR_IN_SZ(_id, _len, ...) \
- UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_IN, ptr, _len, ##__VA_ARGS__)
-/* If sizeof(_type) <= sizeof(u64), this will be inlined rather than a pointer */
-#define UVERBS_ATTR_PTR_IN(_id, _type, ...) \
- UVERBS_ATTR_PTR_IN_SZ(_id, _type, ##__VA_ARGS__)
-#define UVERBS_ATTR_PTR_OUT_SZ(_id, _len, ...) \
- UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_PTR_OUT, ptr, _len, ##__VA_ARGS__)
-#define UVERBS_ATTR_PTR_OUT(_id, _type, ...) \
- UVERBS_ATTR_PTR_OUT_SZ(_id, _type, ##__VA_ARGS__)
-#define UVERBS_ATTR_ENUM_IN(_id, _enum_arr, ...) \
- UVERBS_ATTR(_id, UVERBS_ATTR_TYPE_ENUM_IN, enum_def, \
- .ids = (_enum_arr), \
- .num_elems = ARRAY_SIZE(_enum_arr), ##__VA_ARGS__)
+#define UVERBS_ATTR_UHW() \
+ UVERBS_ATTR_PTR_IN(UVERBS_ATTR_UHW_IN, \
+ UVERBS_ATTR_MIN_SIZE(0), \
+ UA_OPTIONAL), \
+ UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_UHW_OUT, \
+ UVERBS_ATTR_MIN_SIZE(0), \
+ UA_OPTIONAL)
/*
- * In new compiler, UVERBS_ATTR_IDR (and FD) could be simplified by declaring
- * it as
- * {.id = _id, \
- * .attr {.type = __obj_class, \
- * .obj = {.obj_type = _idr_type, \
- * .access = _access \
- * }, ##__VA_ARGS__ } }
- * But since we support older compilers too, we need the more complex code.
+ * =======================================
+ * Declaration helpers
+ * =======================================
*/
-#define ___UVERBS_ATTR_OBJ0(_id, _obj_class, _obj_type, _access, ...)\
- ((const struct uverbs_attr_def) \
- {.id = _id, \
- .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \
- .access = _access, .flags = 0 } }, } })
-#define ___UVERBS_ATTR_OBJ1(_id, _obj_class, _obj_type, _access, _flags)\
- ((const struct uverbs_attr_def) \
- {.id = _id, \
- .attr = { {.obj = {.type = _obj_class, .obj_type = _obj_type, \
- .access = _access, _flags} }, } })
-#define ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, _flags, \
- _n, ...) \
- ___UVERBS_ATTR_OBJ##_n(_id, _obj_class, _obj_type, _access, _flags)
-#define __UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, ...) \
- ___UVERBS_ATTR_OBJ(_id, _obj_class, _obj_type, _access, \
- ##__VA_ARGS__, 1, 0)
-#define UVERBS_ATTR_IDR(_id, _idr_type, _access, ...) \
- __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_IDR, _idr_type, _access,\
- ##__VA_ARGS__)
-#define UVERBS_ATTR_FD(_id, _fd_type, _access, ...) \
- __UVERBS_ATTR_OBJ(_id, UVERBS_ATTR_TYPE_FD, _fd_type, \
- (_access) + BUILD_BUG_ON_ZERO( \
- (_access) != UVERBS_ACCESS_NEW && \
- (_access) != UVERBS_ACCESS_READ), \
- ##__VA_ARGS__)
-#define DECLARE_UVERBS_ATTR_SPEC(_name, ...) \
- const struct uverbs_attr_def _name = __VA_ARGS__
-
-#define DECLARE_UVERBS_ENUM(_name, ...) \
- const struct uverbs_enum_spec _name = { \
- .len = ARRAY_SIZE(((struct uverbs_attr_spec[]){__VA_ARGS__})),\
- .ids = {__VA_ARGS__}, \
+
+#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \
+ static const struct uverbs_object_def *const _name##_ptr[] = { \
+ __VA_ARGS__, \
+ }; \
+ static const struct uverbs_object_tree_def _name = { \
+ .num_objects = ARRAY_SIZE(_name##_ptr), \
+ .objects = &_name##_ptr, \
}
-#define _UVERBS_METHOD_ATTRS_SZ(...) \
- (sizeof((const struct uverbs_attr_def * const []){__VA_ARGS__}) /\
- sizeof(const struct uverbs_attr_def *))
-#define _UVERBS_METHOD(_id, _handler, _flags, ...) \
- ((const struct uverbs_method_def) { \
- .id = _id, \
- .flags = _flags, \
- .handler = _handler, \
- .num_attrs = _UVERBS_METHOD_ATTRS_SZ(__VA_ARGS__), \
- .attrs = &(const struct uverbs_attr_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_METHOD(_name, _id, _handler, ...) \
- const struct uverbs_method_def _name = \
- _UVERBS_METHOD(_id, _handler, 0, ##__VA_ARGS__)
-#define DECLARE_UVERBS_CTX_METHOD(_name, _id, _handler, _flags, ...) \
- const struct uverbs_method_def _name = \
- _UVERBS_METHOD(_id, _handler, \
- UVERBS_ACTION_FLAG_CREATE_ROOT, \
- ##__VA_ARGS__)
-#define _UVERBS_OBJECT_METHODS_SZ(...) \
- (sizeof((const struct uverbs_method_def * const []){__VA_ARGS__}) / \
- sizeof(const struct uverbs_method_def *))
-#define _UVERBS_OBJECT(_id, _type_attrs, ...) \
- ((const struct uverbs_object_def) { \
- .id = _id, \
- .type_attrs = _type_attrs, \
- .num_methods = _UVERBS_OBJECT_METHODS_SZ(__VA_ARGS__), \
- .methods = &(const struct uverbs_method_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_OBJECT(_name, _id, _type_attrs, ...) \
- const struct uverbs_object_def _name = \
- _UVERBS_OBJECT(_id, _type_attrs, ##__VA_ARGS__)
-#define _UVERBS_TREE_OBJECTS_SZ(...) \
- (sizeof((const struct uverbs_object_def * const []){__VA_ARGS__}) / \
- sizeof(const struct uverbs_object_def *))
-#define _UVERBS_OBJECT_TREE(...) \
- ((const struct uverbs_object_tree_def) { \
- .num_objects = _UVERBS_TREE_OBJECTS_SZ(__VA_ARGS__), \
- .objects = &(const struct uverbs_object_def * const []){__VA_ARGS__} })
-#define DECLARE_UVERBS_OBJECT_TREE(_name, ...) \
- const struct uverbs_object_tree_def _name = \
- _UVERBS_OBJECT_TREE(__VA_ARGS__)
/* =================================================
* Parsing infrastructure
@@ -323,7 +308,14 @@ struct uverbs_object_tree_def {
*/
struct uverbs_ptr_attr {
- u64 data;
+ /*
+ * If UVERBS_ATTR_SPEC_F_ALLOC_AND_COPY is set then the 'ptr' is
+ * used.
+ */
+ union {
+ void *ptr;
+ u64 data;
+ };
u16 len;
/* Combination of bits from enum UVERBS_ATTR_F_XXXX */
u16 flags;
@@ -331,11 +323,7 @@ struct uverbs_ptr_attr {
};
struct uverbs_obj_attr {
- /* pointer to the kernel descriptor -> type, access, etc */
- const struct uverbs_obj_type *type;
struct ib_uobject *uobject;
- /* fd or id in idr of this object */
- int id;
};
struct uverbs_attr {
@@ -431,6 +419,17 @@ static inline struct ib_uobject *uverbs_attr_get_uobject(const struct uverbs_att
return attr->obj_attr.uobject;
}
+static inline int
+uverbs_attr_get_len(const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+ if (IS_ERR(attr))
+ return PTR_ERR(attr);
+
+ return attr->ptr_attr.len;
+}
+
static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle,
size_t idx, const void *from, size_t size)
{
@@ -457,6 +456,18 @@ static inline bool uverbs_attr_ptr_is_inline(const struct uverbs_attr *attr)
return attr->ptr_attr.len <= sizeof(attr->ptr_attr.data);
}
+static inline void *uverbs_attr_get_alloced_ptr(
+ const struct uverbs_attr_bundle *attrs_bundle, u16 idx)
+{
+ const struct uverbs_attr *attr = uverbs_attr_get(attrs_bundle, idx);
+
+ if (IS_ERR(attr))
+ return (void *)attr;
+
+ return uverbs_attr_ptr_is_inline(attr) ? (void *)&attr->ptr_attr.data :
+ attr->ptr_attr.ptr;
+}
+
static inline int _uverbs_copy_from(void *to,
const struct uverbs_attr_bundle *attrs_bundle,
size_t idx,
diff --git a/include/rdma/uverbs_named_ioctl.h b/include/rdma/uverbs_named_ioctl.h
index c5bb4ebdb0b0..2eb1767042af 100644
--- a/include/rdma/uverbs_named_ioctl.h
+++ b/include/rdma/uverbs_named_ioctl.h
@@ -43,48 +43,84 @@
#define _UVERBS_NAME(x, y) _UVERBS_PASTE(x, y)
#define UVERBS_METHOD(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _method_##id)
#define UVERBS_HANDLER(id) _UVERBS_NAME(UVERBS_MODULE_NAME, _handler_##id)
+#define UVERBS_OBJECT(id) _UVERBS_NAME(UVERBS_MOUDLE_NAME, _object_##id)
-#define DECLARE_UVERBS_NAMED_METHOD(id, ...) \
- DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, UVERBS_HANDLER(id), ##__VA_ARGS__)
+/* These are static so they do not need to be qualified */
+#define UVERBS_METHOD_ATTRS(method_id) _method_attrs_##method_id
+#define UVERBS_OBJECT_METHODS(object_id) _object_methods_##object_id
-#define DECLARE_UVERBS_NAMED_METHOD_WITH_HANDLER(id, handler, ...) \
- DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, handler, ##__VA_ARGS__)
+#define DECLARE_UVERBS_NAMED_METHOD(_method_id, ...) \
+ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
+ _method_id)[] = { __VA_ARGS__ }; \
+ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
+ .id = _method_id, \
+ .handler = UVERBS_HANDLER(_method_id), \
+ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
+ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
+ }
-#define DECLARE_UVERBS_NAMED_METHOD_NO_OVERRIDE(id, handler, ...) \
- DECLARE_UVERBS_METHOD(UVERBS_METHOD(id), id, NULL, ##__VA_ARGS__)
-
-#define DECLARE_UVERBS_NAMED_OBJECT(id, ...) \
- DECLARE_UVERBS_OBJECT(UVERBS_OBJECT(id), id, ##__VA_ARGS__)
-
-#define _UVERBS_COMP_NAME(x, y, z) _UVERBS_NAME(_UVERBS_NAME(x, y), z)
+/* Create a standard destroy method using the default handler. The handle_attr
+ * argument must be the attribute specifying the handle to destroy, the
+ * default handler does not support any other attributes.
+ */
+#define DECLARE_UVERBS_NAMED_METHOD_DESTROY(_method_id, _handle_attr) \
+ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
+ _method_id)[] = { _handle_attr }; \
+ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
+ .id = _method_id, \
+ .handler = uverbs_destroy_def_handler, \
+ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
+ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
+ }
-#define UVERBS_NO_OVERRIDE NULL
+#define DECLARE_UVERBS_NAMED_OBJECT(_object_id, _type_attrs, ...) \
+ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
+ _object_id)[] = { __VA_ARGS__ }; \
+ const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \
+ .id = _object_id, \
+ .type_attrs = &_type_attrs, \
+ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
+ .methods = &UVERBS_OBJECT_METHODS(_object_id) \
+ }
-/* This declares a parsing tree with one object and one method. This is usually
- * used for merging driver attributes to the common attributes. The driver has
- * a chance to override the handler and type attrs of the original object.
- * The __VA_ARGS__ just contains a list of attributes.
+/*
+ * Declare global methods. These still have a unique object_id because we
+ * identify all uapi methods with a (object,method) tuple. However, they have
+ * no type pointer.
*/
-#define ADD_UVERBS_ATTRIBUTES(_name, _object, _method, _type_attrs, _handler, ...) \
-static DECLARE_UVERBS_METHOD(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \
- _method_, _name), \
- _method, _handler, ##__VA_ARGS__); \
- \
-static DECLARE_UVERBS_OBJECT(_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \
- _object_, _name), \
- _object, _type_attrs, \
- &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \
- _method_, _name)); \
- \
-static DECLARE_UVERBS_OBJECT_TREE(_name, \
- &_UVERBS_COMP_NAME(UVERBS_MODULE_NAME, \
- _object_, _name))
+#define DECLARE_UVERBS_GLOBAL_METHODS(_object_id, ...) \
+ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
+ _object_id)[] = { __VA_ARGS__ }; \
+ const struct uverbs_object_def UVERBS_OBJECT(_object_id) = { \
+ .id = _object_id, \
+ .num_methods = ARRAY_SIZE(UVERBS_OBJECT_METHODS(_object_id)), \
+ .methods = &UVERBS_OBJECT_METHODS(_object_id) \
+ }
-/* A very common use case is that the driver doesn't override the handler and
- * type_attrs. Therefore, we provide a simplified macro for this common case.
+/* Used by drivers to declare a complete parsing tree for a single method that
+ * differs only in having additional driver specific attributes.
*/
-#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object, _method, ...) \
- ADD_UVERBS_ATTRIBUTES(_name, _object, _method, UVERBS_NO_OVERRIDE, \
- UVERBS_NO_OVERRIDE, ##__VA_ARGS__)
+#define ADD_UVERBS_ATTRIBUTES_SIMPLE(_name, _object_id, _method_id, ...) \
+ static const struct uverbs_attr_def *const UVERBS_METHOD_ATTRS( \
+ _method_id)[] = { __VA_ARGS__ }; \
+ static const struct uverbs_method_def UVERBS_METHOD(_method_id) = { \
+ .id = _method_id, \
+ .num_attrs = ARRAY_SIZE(UVERBS_METHOD_ATTRS(_method_id)), \
+ .attrs = &UVERBS_METHOD_ATTRS(_method_id), \
+ }; \
+ static const struct uverbs_method_def *const UVERBS_OBJECT_METHODS( \
+ _object_id)[] = { &UVERBS_METHOD(_method_id) }; \
+ static const struct uverbs_object_def _name##_struct = { \
+ .id = _object_id, \
+ .num_methods = 1, \
+ .methods = &UVERBS_OBJECT_METHODS(_object_id) \
+ }; \
+ static const struct uverbs_object_def *const _name##_ptrs[] = { \
+ &_name##_struct, \
+ }; \
+ static const struct uverbs_object_tree_def _name = { \
+ .num_objects = 1, \
+ .objects = &_name##_ptrs, \
+ }
#endif
diff --git a/include/rdma/uverbs_std_types.h b/include/rdma/uverbs_std_types.h
index 9d56cdb84655..3e3f108f0912 100644
--- a/include/rdma/uverbs_std_types.h
+++ b/include/rdma/uverbs_std_types.h
@@ -37,8 +37,6 @@
#include <rdma/uverbs_ioctl.h>
#include <rdma/ib_user_ioctl_verbs.h>
-#define UVERBS_OBJECT(id) uverbs_object_##id
-
#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
const struct uverbs_object_tree_def *uverbs_default_get_objects(void);
#else
@@ -50,28 +48,37 @@ static inline const struct uverbs_object_tree_def *uverbs_default_get_objects(vo
static inline struct ib_uobject *__uobj_get(const struct uverbs_obj_type *type,
bool write,
- struct ib_ucontext *ucontext,
+ struct ib_uverbs_file *ufile,
int id)
{
- return rdma_lookup_get_uobject(type, ucontext, id, write);
+ return rdma_lookup_get_uobject(type, ufile, id, write);
}
#define uobj_get_type(_object) UVERBS_OBJECT(_object).type_attrs
-#define uobj_get_read(_type, _id, _ucontext) \
- __uobj_get(uobj_get_type(_type), false, _ucontext, _id)
+#define uobj_get_read(_type, _id, _ufile) \
+ __uobj_get(uobj_get_type(_type), false, _ufile, _id)
+
+static inline void *_uobj_get_obj_read(const struct uverbs_obj_type *type,
+ int id, struct ib_uverbs_file *ufile)
+{
+ struct ib_uobject *uobj = __uobj_get(type, false, ufile, id);
+
+ if (IS_ERR(uobj))
+ return NULL;
+ return uobj->object;
+}
+#define uobj_get_obj_read(_object, _type, _id, _ufile) \
+ ((struct ib_##_object *)_uobj_get_obj_read(uobj_get_type(_type), _id, \
+ _ufile))
-#define uobj_get_obj_read(_object, _type, _id, _ucontext) \
-({ \
- struct ib_uobject *__uobj = \
- __uobj_get(uobj_get_type(_type), \
- false, _ucontext, _id); \
- \
- (struct ib_##_object *)(IS_ERR(__uobj) ? NULL : __uobj->object);\
-})
+#define uobj_get_write(_type, _id, _ufile) \
+ __uobj_get(uobj_get_type(_type), true, _ufile, _id)
-#define uobj_get_write(_type, _id, _ucontext) \
- __uobj_get(uobj_get_type(_type), true, _ucontext, _id)
+int __uobj_perform_destroy(const struct uverbs_obj_type *type, int id,
+ struct ib_uverbs_file *ufile, int success_res);
+#define uobj_perform_destroy(_type, _id, _ufile, _success_res) \
+ __uobj_perform_destroy(uobj_get_type(_type), _id, _ufile, _success_res)
static inline void uobj_put_read(struct ib_uobject *uobj)
{
@@ -102,13 +109,12 @@ static inline void uobj_alloc_abort(struct ib_uobject *uobj)
}
static inline struct ib_uobject *__uobj_alloc(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext)
+ struct ib_uverbs_file *ufile)
{
- return rdma_alloc_begin_uobject(type, ucontext);
+ return rdma_alloc_begin_uobject(type, ufile);
}
-#define uobj_alloc(_type, ucontext) \
- __uobj_alloc(uobj_get_type(_type), ucontext)
+#define uobj_alloc(_type, _ufile) __uobj_alloc(uobj_get_type(_type), _ufile)
#endif
diff --git a/include/rdma/uverbs_types.h b/include/rdma/uverbs_types.h
index cc04ec65588d..e2fc9db466d3 100644
--- a/include/rdma/uverbs_types.h
+++ b/include/rdma/uverbs_types.h
@@ -72,12 +72,12 @@ struct uverbs_obj_type_class {
* reset flow).
*/
struct ib_uobject *(*alloc_begin)(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext);
+ struct ib_uverbs_file *ufile);
void (*alloc_commit)(struct ib_uobject *uobj);
void (*alloc_abort)(struct ib_uobject *uobj);
struct ib_uobject *(*lookup_get)(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext, int id,
+ struct ib_uverbs_file *ufile, int id,
bool exclusive);
void (*lookup_put)(struct ib_uobject *uobj, bool exclusive);
/*
@@ -93,7 +93,6 @@ struct uverbs_obj_type_class {
struct uverbs_obj_type {
const struct uverbs_obj_type_class * const type_class;
size_t obj_size;
- unsigned int destroy_order;
};
/*
@@ -121,11 +120,11 @@ struct uverbs_obj_idr_type {
};
struct ib_uobject *rdma_lookup_get_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext,
+ struct ib_uverbs_file *ufile,
int id, bool exclusive);
void rdma_lookup_put_uobject(struct ib_uobject *uobj, bool exclusive);
struct ib_uobject *rdma_alloc_begin_uobject(const struct uverbs_obj_type *type,
- struct ib_ucontext *ucontext);
+ struct ib_uverbs_file *ufile);
void rdma_alloc_abort_uobject(struct ib_uobject *uobj);
int __must_check rdma_remove_commit_uobject(struct ib_uobject *uobj);
int rdma_alloc_commit_uobject(struct ib_uobject *uobj);
@@ -140,7 +139,7 @@ struct uverbs_obj_fd_type {
* the driver is removed or the process terminated.
*/
struct uverbs_obj_type type;
- int (*context_closed)(struct ib_uobject_file *uobj_file,
+ int (*context_closed)(struct ib_uobject *uobj,
enum rdma_remove_reason why);
const struct file_operations *fops;
const char *name;
@@ -152,30 +151,29 @@ extern const struct uverbs_obj_type_class uverbs_fd_class;
#define UVERBS_BUILD_BUG_ON(cond) (sizeof(char[1 - 2 * !!(cond)]) - \
sizeof(char))
-#define UVERBS_TYPE_ALLOC_FD(_order, _obj_size, _context_closed, _fops, _name, _flags)\
+#define UVERBS_TYPE_ALLOC_FD(_obj_size, _context_closed, _fops, _name, _flags)\
((&((const struct uverbs_obj_fd_type) \
{.type = { \
- .destroy_order = _order, \
.type_class = &uverbs_fd_class, \
.obj_size = (_obj_size) + \
- UVERBS_BUILD_BUG_ON((_obj_size) < sizeof(struct ib_uobject_file)), \
+ UVERBS_BUILD_BUG_ON((_obj_size) < \
+ sizeof(struct ib_uobject)), \
}, \
.context_closed = _context_closed, \
.fops = _fops, \
.name = _name, \
.flags = _flags}))->type)
-#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _order, _destroy_object) \
+#define UVERBS_TYPE_ALLOC_IDR_SZ(_size, _destroy_object) \
((&((const struct uverbs_obj_idr_type) \
{.type = { \
- .destroy_order = _order, \
.type_class = &uverbs_idr_class, \
.obj_size = (_size) + \
UVERBS_BUILD_BUG_ON((_size) < \
sizeof(struct ib_uobject)) \
}, \
.destroy_object = _destroy_object,}))->type)
-#define UVERBS_TYPE_ALLOC_IDR(_order, _destroy_object) \
- UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), _order, \
+#define UVERBS_TYPE_ALLOC_IDR(_destroy_object) \
+ UVERBS_TYPE_ALLOC_IDR_SZ(sizeof(struct ib_uobject), \
_destroy_object)
#endif
diff --git a/include/uapi/rdma/cxgb4-abi.h b/include/uapi/rdma/cxgb4-abi.h
index a159ba8dcf8f..65c9eacd3ffb 100644
--- a/include/uapi/rdma/cxgb4-abi.h
+++ b/include/uapi/rdma/cxgb4-abi.h
@@ -44,6 +44,16 @@
* In particular do not use pointer types -- pass pointers in __aligned_u64
* instead.
*/
+
+enum {
+ C4IW_64B_CQE = (1 << 0)
+};
+
+struct c4iw_create_cq {
+ __u32 flags;
+ __u32 reserved;
+};
+
struct c4iw_create_cq_resp {
__aligned_u64 key;
__aligned_u64 gts_key;
@@ -51,7 +61,7 @@ struct c4iw_create_cq_resp {
__u32 cqid;
__u32 size;
__u32 qid_mask;
- __u32 reserved; /* explicit padding (optional for i386) */
+ __u32 flags;
};
enum {
diff --git a/include/uapi/rdma/ib_user_ioctl_cmds.h b/include/uapi/rdma/ib_user_ioctl_cmds.h
index 888ac5975a6c..2c881aaf05c2 100644
--- a/include/uapi/rdma/ib_user_ioctl_cmds.h
+++ b/include/uapi/rdma/ib_user_ioctl_cmds.h
@@ -79,7 +79,7 @@ enum uverbs_attrs_destroy_cq_cmd_attr_ids {
};
enum uverbs_attrs_create_flow_action_esp {
- UVERBS_ATTR_FLOW_ACTION_ESP_HANDLE,
+ UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
UVERBS_ATTR_FLOW_ACTION_ESP_ATTRS,
UVERBS_ATTR_FLOW_ACTION_ESP_ESN,
UVERBS_ATTR_FLOW_ACTION_ESP_KEYMAT,
@@ -87,6 +87,11 @@ enum uverbs_attrs_create_flow_action_esp {
UVERBS_ATTR_FLOW_ACTION_ESP_ENCAP,
};
+enum uverbs_attrs_modify_flow_action_esp {
+ UVERBS_ATTR_MODIFY_FLOW_ACTION_ESP_HANDLE =
+ UVERBS_ATTR_CREATE_FLOW_ACTION_ESP_HANDLE,
+};
+
enum uverbs_attrs_destroy_flow_action_esp {
UVERBS_ATTR_DESTROY_FLOW_ACTION_HANDLE,
};
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 625545d862d7..6cdf192070a2 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -40,6 +40,59 @@
#define RDMA_UAPI_PTR(_type, _name) __aligned_u64 _name
#endif
+enum ib_uverbs_access_flags {
+ IB_UVERBS_ACCESS_LOCAL_WRITE = 1 << 0,
+ IB_UVERBS_ACCESS_REMOTE_WRITE = 1 << 1,
+ IB_UVERBS_ACCESS_REMOTE_READ = 1 << 2,
+ IB_UVERBS_ACCESS_REMOTE_ATOMIC = 1 << 3,
+ IB_UVERBS_ACCESS_MW_BIND = 1 << 4,
+ IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
+ IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
+ IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
+};
+
+enum ib_uverbs_query_port_cap_flags {
+ IB_UVERBS_PCF_SM = 1 << 1,
+ IB_UVERBS_PCF_NOTICE_SUP = 1 << 2,
+ IB_UVERBS_PCF_TRAP_SUP = 1 << 3,
+ IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4,
+ IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5,
+ IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6,
+ IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7,
+ IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8,
+ IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9,
+ IB_UVERBS_PCF_SM_DISABLED = 1 << 10,
+ IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11,
+ IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
+ IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14,
+ IB_UVERBS_PCF_CM_SUP = 1 << 16,
+ IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17,
+ IB_UVERBS_PCF_REINIT_SUP = 1 << 18,
+ IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19,
+ IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20,
+ IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21,
+ IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22,
+ IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23,
+ IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24,
+ IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25,
+ /*
+ * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and
+ * is inaccessible
+ */
+ IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
+ IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
+ IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
+ IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30,
+ IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31,
+
+ /* NOTE this is an internal flag, not an IBA flag */
+ IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26,
+};
+
+enum ib_uverbs_query_port_flags {
+ IB_UVERBS_QPF_GRH_REQUIRED = 1 << 0,
+};
+
enum ib_uverbs_flow_action_esp_keymat {
IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM,
};
@@ -99,4 +152,9 @@ struct ib_uverbs_flow_action_esp {
__aligned_u64 hard_limit_pkts;
};
+enum ib_uverbs_read_counters_flags {
+ /* prefer read values from driver cache */
+ IB_UVERBS_READ_COUNTERS_PREFER_CACHED = 1 << 0,
+};
+
#endif
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 4f9991de8e3a..25a16760de2a 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -279,7 +279,7 @@ struct ib_uverbs_query_port {
};
struct ib_uverbs_query_port_resp {
- __u32 port_cap_flags;
+ __u32 port_cap_flags; /* see ib_uverbs_query_port_cap_flags */
__u32 max_msg_sz;
__u32 bad_pkey_cntr;
__u32 qkey_viol_cntr;
@@ -299,7 +299,8 @@ struct ib_uverbs_query_port_resp {
__u8 active_speed;
__u8 phys_state;
__u8 link_layer;
- __u8 reserved[2];
+ __u8 flags; /* see ib_uverbs_query_port_flags */
+ __u8 reserved;
};
struct ib_uverbs_alloc_pd {
diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h
index 8daec1fa49cf..addbb9c4529e 100644
--- a/include/uapi/rdma/mlx5-abi.h
+++ b/include/uapi/rdma/mlx5-abi.h
@@ -76,6 +76,9 @@ enum mlx5_lib_caps {
MLX5_LIB_CAP_4K_UAR = (__u64)1 << 0,
};
+enum mlx5_ib_alloc_uctx_v2_flags {
+ MLX5_IB_ALLOC_UCTX_DEVX = 1 << 0,
+};
struct mlx5_ib_alloc_ucontext_req_v2 {
__u32 total_num_bfregs;
__u32 num_low_latency_bfregs;
@@ -90,6 +93,7 @@ struct mlx5_ib_alloc_ucontext_req_v2 {
enum mlx5_ib_alloc_ucontext_resp_mask {
MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET = 1UL << 0,
+ MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY = 1UL << 1,
};
enum mlx5_user_cmds_supp_uhw {
@@ -138,7 +142,7 @@ struct mlx5_ib_alloc_ucontext_resp {
__u32 log_uar_size;
__u32 num_uars_per_page;
__u32 num_dyn_bfregs;
- __u32 reserved3;
+ __u32 dump_fill_mkey;
};
struct mlx5_ib_alloc_pd_resp {
diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
index f7d685ef2d1f..1a05bb4b0b34 100644
--- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h
+++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h
@@ -45,4 +45,77 @@ enum mlx5_ib_alloc_dm_attrs {
MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
};
+enum mlx5_ib_devx_methods {
+ MLX5_IB_METHOD_DEVX_OTHER = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_DEVX_QUERY_UAR,
+ MLX5_IB_METHOD_DEVX_QUERY_EQN,
+};
+
+enum mlx5_ib_devx_other_attrs {
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_IN = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OTHER_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_create_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_IN,
+ MLX5_IB_ATTR_DEVX_OBJ_CREATE_CMD_OUT,
+};
+
+enum mlx5_ib_devx_query_uar_attrs {
+ MLX5_IB_ATTR_DEVX_QUERY_UAR_USER_IDX = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_QUERY_UAR_DEV_IDX,
+};
+
+enum mlx5_ib_devx_obj_destroy_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_DESTROY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_obj_modify_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_IN,
+ MLX5_IB_ATTR_DEVX_OBJ_MODIFY_CMD_OUT,
+};
+
+enum mlx5_ib_devx_obj_query_attrs {
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_IN,
+ MLX5_IB_ATTR_DEVX_OBJ_QUERY_CMD_OUT,
+};
+
+enum mlx5_ib_devx_query_eqn_attrs {
+ MLX5_IB_ATTR_DEVX_QUERY_EQN_USER_VEC = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_QUERY_EQN_DEV_EQN,
+};
+
+enum mlx5_ib_devx_obj_methods {
+ MLX5_IB_METHOD_DEVX_OBJ_CREATE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_DEVX_OBJ_DESTROY,
+ MLX5_IB_METHOD_DEVX_OBJ_MODIFY,
+ MLX5_IB_METHOD_DEVX_OBJ_QUERY,
+};
+
+enum mlx5_ib_devx_umem_reg_attrs {
+ MLX5_IB_ATTR_DEVX_UMEM_REG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_ATTR_DEVX_UMEM_REG_ADDR,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_LEN,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS,
+ MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID,
+};
+
+enum mlx5_ib_devx_umem_dereg_attrs {
+ MLX5_IB_ATTR_DEVX_UMEM_DEREG_HANDLE = (1U << UVERBS_ID_NS_SHIFT),
+};
+
+enum mlx5_ib_devx_umem_methods {
+ MLX5_IB_METHOD_DEVX_UMEM_REG = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_METHOD_DEVX_UMEM_DEREG,
+};
+
+enum mlx5_ib_devx_objects {
+ MLX5_IB_OBJECT_DEVX = (1U << UVERBS_ID_NS_SHIFT),
+ MLX5_IB_OBJECT_DEVX_OBJ,
+ MLX5_IB_OBJECT_DEVX_UMEM,
+};
+
#endif
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 7232274de334..af6ad467ed61 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -140,6 +140,7 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
&net_secret);
return seq_scale(hash);
}
+EXPORT_SYMBOL_GPL(secure_tcp_seq);
u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
diff --git a/net/rds/ib.c b/net/rds/ib.c
index b6ad38e48f62..683b55d4e2b0 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -143,7 +143,7 @@ static void rds_ib_add_one(struct ib_device *device)
INIT_WORK(&rds_ibdev->free_work, rds_ib_dev_free);
rds_ibdev->max_wrs = device->attrs.max_qp_wr;
- rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE);
+ rds_ibdev->max_sge = min(device->attrs.max_send_sge, RDS_IB_MAX_SGE);
has_fr = (device->attrs.device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index add82b0266f3..d99a75f75e42 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -16,6 +16,7 @@
#include <net/tcp.h>
#include <net/sock.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
#include "smc.h"
#include "smc_clc.h"
@@ -450,8 +451,7 @@ out:
static int smc_link_determine_gid(struct smc_link_group *lgr)
{
struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
- struct ib_gid_attr gattr;
- union ib_gid gid;
+ const struct ib_gid_attr *gattr;
int i;
if (!lgr->vlan_id) {
@@ -461,18 +461,18 @@ static int smc_link_determine_gid(struct smc_link_group *lgr)
for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len;
i++) {
- if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
- &gattr))
+ gattr = rdma_get_gid_attr(lnk->smcibdev->ibdev, lnk->ibport, i);
+ if (IS_ERR(gattr))
continue;
- if (gattr.ndev) {
- if (is_vlan_dev(gattr.ndev) &&
- vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
- lnk->gid = gid;
- dev_put(gattr.ndev);
+ if (gattr->ndev) {
+ if (is_vlan_dev(gattr->ndev) &&
+ vlan_dev_vlan_id(gattr->ndev) == lgr->vlan_id) {
+ lnk->gid = gattr->gid;
+ rdma_put_gid_attr(gattr);
return 0;
}
- dev_put(gattr.ndev);
}
+ rdma_put_gid_attr(gattr);
}
return -ENODEV;
}
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 0eed7ab9f28b..74f29f814ec1 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -16,6 +16,7 @@
#include <linux/workqueue.h>
#include <linux/scatterlist.h>
#include <rdma/ib_verbs.h>
+#include <rdma/ib_cache.h>
#include "smc_pnet.h"
#include "smc_ib.h"
@@ -372,17 +373,21 @@ void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
{
- struct ib_gid_attr gattr;
- int rc;
-
- rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
- &smcibdev->gid[ibport - 1], &gattr);
- if (rc || !gattr.ndev)
- return -ENODEV;
+ const struct ib_gid_attr *gattr;
+ int rc = 0;
- memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN);
- dev_put(gattr.ndev);
- return 0;
+ gattr = rdma_get_gid_attr(smcibdev->ibdev, ibport, 0);
+ if (IS_ERR(gattr))
+ return PTR_ERR(gattr);
+ if (!gattr->ndev) {
+ rc = -ENODEV;
+ goto done;
+ }
+ smcibdev->gid[ibport - 1] = gattr->gid;
+ memcpy(smcibdev->mac[ibport - 1], gattr->ndev->dev_addr, ETH_ALEN);
+done:
+ rdma_put_gid_attr(gattr);
+ return rc;
}
/* Create an identifier unique for this instance of SMC-R.
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e9535a66bab0..547b2cdf1427 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -476,7 +476,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
/* Qualify the transport resource defaults with the
* capabilities of this particular device */
- newxprt->sc_max_send_sges = dev->attrs.max_sge;
+ newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
/* transport hdr, head iovec, one page list entry, tail iovec */
if (newxprt->sc_max_send_sges < 4) {
pr_err("svcrdma: too few Send SGEs available (%d)\n",
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 16161a36dc73..112a15abc4a4 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -508,7 +508,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
unsigned int max_sge;
int rc;
- max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
+ max_sge = min_t(unsigned int, ia->ri_device->attrs.max_send_sge,
RPCRDMA_MAX_SEND_SGES);
if (max_sge < RPCRDMA_MIN_SEND_SGES) {
pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);