From 26e990badde40b2fb824bfa3cb9d4288a79584bc Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sat, 3 Oct 2020 20:20:06 -0300 Subject: RDMA: Check attr_mask during modify_qp Each driver should check that it can support the provided attr_mask during modify_qp. IB_USER_VERBS_EX_CMD_MODIFY_QP was being used to block modify_qp_ex because the driver didn't check RATE_LIMIT. Link: https://lore.kernel.org/r/6-v1-caa70ba3d1ab+1436e-ucmd_mask_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- include/uapi/rdma/ib_user_verbs.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 456438c18c2c..7ee73a0652f1 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -596,20 +596,6 @@ enum { IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, }; -enum { - /* - * This value is equal to IB_QP_DEST_QPN. - */ - IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, -}; - -enum { - /* - * This value is equal to IB_QP_RATE_LIMIT. - */ - IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, -}; - struct ib_uverbs_ex_create_qp { __aligned_u64 user_handle; __u32 pd_handle; -- cgit From 53ef4999f07d9c75cdc8effb0cc8c581dc39b1a1 Mon Sep 17 00:00:00 2001 From: Weihang Li Date: Wed, 2 Dec 2020 09:29:20 +0800 Subject: RDMA/hns: Move capability flags of QP and CQ to hns-abi.h These flags will be returned to the userspace through ABI, so they should be defined in hns-abi.h. Furthermore, there is no need to include hns-abi.h in every source files, it just needs to be included in the common header file. Link: https://lore.kernel.org/r/1606872560-17823-1-git-send-email-liweihang@huawei.com Reported-by: kernel test robot Signed-off-by: Weihang Li Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_cq.c | 1 - drivers/infiniband/hw/hns/hns_roce_device.h | 11 +---------- drivers/infiniband/hw/hns/hns_roce_main.c | 1 - drivers/infiniband/hw/hns/hns_roce_pd.c | 1 - drivers/infiniband/hw/hns/hns_roce_qp.c | 1 - drivers/infiniband/hw/hns/hns_roce_srq.c | 1 - include/uapi/rdma/hns-abi.h | 10 ++++++++++ 7 files changed, 11 insertions(+), 15 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/hw/hns/hns_roce_cq.c b/drivers/infiniband/hw/hns/hns_roce_cq.c index 68f355fba425..5e6d68830fa5 100644 --- a/drivers/infiniband/hw/hns/hns_roce_cq.c +++ b/drivers/infiniband/hw/hns/hns_roce_cq.c @@ -36,7 +36,6 @@ #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" -#include #include "hns_roce_common.h" static int alloc_cqc(struct hns_roce_dev *hr_dev, struct hns_roce_cq *hr_cq) diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 70ae37bad77e..60b8349cd2f8 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -34,6 +34,7 @@ #define _HNS_ROCE_DEVICE_H #include +#include #define DRV_NAME "hns_roce" @@ -131,16 +132,6 @@ enum { SERV_TYPE_UD, }; -enum hns_roce_qp_caps { - HNS_ROCE_QP_CAP_RQ_RECORD_DB = BIT(0), - HNS_ROCE_QP_CAP_SQ_RECORD_DB = BIT(1), - HNS_ROCE_QP_CAP_OWNER_DB = BIT(2), -}; - -enum hns_roce_cq_flags { - HNS_ROCE_CQ_FLAG_RECORD_DB = BIT(0), -}; - enum hns_roce_qp_state { HNS_ROCE_QP_STATE_RST, HNS_ROCE_QP_STATE_INIT, diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index f01590d8c3cf..e8aa8075ffcd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -40,7 +40,6 @@ #include #include "hns_roce_common.h" #include "hns_roce_device.h" -#include #include "hns_roce_hem.h" /** diff --git a/drivers/infiniband/hw/hns/hns_roce_pd.c b/drivers/infiniband/hw/hns/hns_roce_pd.c index 98f69496adb4..45ec91db1553 100644 --- a/drivers/infiniband/hw/hns/hns_roce_pd.c +++ b/drivers/infiniband/hw/hns/hns_roce_pd.c @@ -32,7 +32,6 @@ #include #include -#include #include "hns_roce_device.h" static int hns_roce_pd_alloc(struct hns_roce_dev *hr_dev, unsigned long *pdn) diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 34aa086060d3..121d3b4c2edb 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -39,7 +39,6 @@ #include "hns_roce_common.h" #include "hns_roce_device.h" #include "hns_roce_hem.h" -#include static void flush_work_handle(struct work_struct *work) { diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index 27646b9e35df..36c6bcb85269 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -4,7 +4,6 @@ */ #include -#include #include "hns_roce_device.h" #include "hns_roce_cmd.h" #include "hns_roce_hem.h" diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h index 9ec85f76e9ac..90b739d05adf 100644 --- a/include/uapi/rdma/hns-abi.h +++ b/include/uapi/rdma/hns-abi.h @@ -43,6 +43,10 @@ struct hns_roce_ib_create_cq { __u32 reserved; }; +enum hns_roce_cq_cap_flags { + HNS_ROCE_CQ_FLAG_RECORD_DB = 1 << 0, +}; + struct hns_roce_ib_create_cq_resp { __aligned_u64 cqn; /* Only 32 bits used, 64 for compat */ __aligned_u64 cap_flags; @@ -69,6 +73,12 @@ struct hns_roce_ib_create_qp { __aligned_u64 sdb_addr; }; +enum hns_roce_qp_cap_flags { + HNS_ROCE_QP_CAP_RQ_RECORD_DB = 1 << 0, + HNS_ROCE_QP_CAP_SQ_RECORD_DB = 1 << 1, + HNS_ROCE_QP_CAP_OWNER_DB = 1 << 2, +}; + struct hns_roce_ib_create_qp_resp { __aligned_u64 cap_flags; }; -- cgit From d21a1240f5169a07a230d72e0e6d3773b2a088b4 Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Thu, 10 Dec 2020 11:42:59 -0600 Subject: RDMA/rxe: Use acquire/release for memory ordering Change work and completion queues to use smp_load_acquire() and smp_store_release() to synchronize between driver and users. This commit goes with a matching series of commits in the rxe user space provider. Link: https://lore.kernel.org/r/20201210174258.5234-1-rpearson@hpe.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_cq.c | 5 -- drivers/infiniband/sw/rxe/rxe_queue.h | 94 ++++++++++++++++++++++------------- drivers/infiniband/sw/rxe/rxe_verbs.c | 11 ---- include/uapi/rdma/rdma_user_rxe.h | 21 ++++++++ 4 files changed, 81 insertions(+), 50 deletions(-) (limited to 'include/uapi') diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index 43394c3f29d4..b315ebf041ac 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -123,11 +123,6 @@ int rxe_cq_post(struct rxe_cq *cq, struct rxe_cqe *cqe, int solicited) memcpy(producer_addr(cq->queue), cqe, sizeof(*cqe)); - /* make sure all changes to the CQ are written before we update the - * producer pointer - */ - smp_wmb(); - advance_producer(cq->queue); spin_unlock_irqrestore(&cq->cq_lock, flags); diff --git a/drivers/infiniband/sw/rxe/rxe_queue.h b/drivers/infiniband/sw/rxe/rxe_queue.h index 7d434a6837a7..2902ca7b288c 100644 --- a/drivers/infiniband/sw/rxe/rxe_queue.h +++ b/drivers/infiniband/sw/rxe/rxe_queue.h @@ -7,9 +7,11 @@ #ifndef RXE_QUEUE_H #define RXE_QUEUE_H +/* for definition of shared struct rxe_queue_buf */ +#include + /* implements a simple circular buffer that can optionally be * shared between user space and the kernel and can be resized - * the requested element size is rounded up to a power of 2 * and the number of elements in the buffer is also rounded * up to a power of 2. Since the queue is empty when the @@ -17,28 +19,6 @@ * of the queue is one less than the number of element slots */ -/* this data structure is shared between user space and kernel - * space for those cases where the queue is shared. It contains - * the producer and consumer indices. Is also contains a copy - * of the queue size parameters for user space to use but the - * kernel must use the parameters in the rxe_queue struct - * this MUST MATCH the corresponding librxe struct - * for performance reasons arrange to have producer and consumer - * pointers in separate cache lines - * the kernel should always mask the indices to avoid accessing - * memory outside of the data area - */ -struct rxe_queue_buf { - __u32 log2_elem_size; - __u32 index_mask; - __u32 pad_1[30]; - __u32 producer_index; - __u32 pad_2[31]; - __u32 consumer_index; - __u32 pad_3[31]; - __u8 data[]; -}; - struct rxe_queue { struct rxe_dev *rxe; struct rxe_queue_buf *buf; @@ -46,7 +26,7 @@ struct rxe_queue { size_t buf_size; size_t elem_size; unsigned int log2_elem_size; - unsigned int index_mask; + u32 index_mask; }; int do_mmap_info(struct rxe_dev *rxe, struct mminfo __user *outbuf, @@ -76,26 +56,56 @@ static inline int next_index(struct rxe_queue *q, int index) static inline int queue_empty(struct rxe_queue *q) { - return ((q->buf->producer_index - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue empty + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod - cons) & q->index_mask) == 0; } static inline int queue_full(struct rxe_queue *q) { - return ((q->buf->producer_index + 1 - q->buf->consumer_index) - & q->index_mask) == 0; + u32 prod; + u32 cons; + + /* make sure all changes to queue complete before + * testing queue full + */ + prod = smp_load_acquire(&q->buf->producer_index); + /* same */ + cons = smp_load_acquire(&q->buf->consumer_index); + + return ((prod + 1 - cons) & q->index_mask) == 0; } static inline void advance_producer(struct rxe_queue *q) { - q->buf->producer_index = (q->buf->producer_index + 1) - & q->index_mask; + u32 prod; + + prod = (q->buf->producer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing producer index + */ + smp_store_release(&q->buf->producer_index, prod); } static inline void advance_consumer(struct rxe_queue *q) { - q->buf->consumer_index = (q->buf->consumer_index + 1) - & q->index_mask; + u32 cons; + + cons = (q->buf->consumer_index + 1) & q->index_mask; + + /* make sure all changes to queue complete before + * changing consumer index + */ + smp_store_release(&q->buf->consumer_index, cons); } static inline void *producer_addr(struct rxe_queue *q) @@ -112,12 +122,28 @@ static inline void *consumer_addr(struct rxe_queue *q) static inline unsigned int producer_index(struct rxe_queue *q) { - return q->buf->producer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting producer index + */ + index = smp_load_acquire(&q->buf->producer_index); + index &= q->index_mask; + + return index; } static inline unsigned int consumer_index(struct rxe_queue *q) { - return q->buf->consumer_index; + u32 index; + + /* make sure all changes to queue + * complete before getting consumer index + */ + index = smp_load_acquire(&q->buf->consumer_index); + index &= q->index_mask; + + return index; } static inline void *addr_from_index(struct rxe_queue *q, unsigned int index) diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 2fbea2b2d72a..a031514e2f41 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -244,11 +244,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; - /* make sure all changes to the work queue are written before we - * update the producer pointer - */ - smp_wmb(); - advance_producer(rq->queue); return 0; @@ -633,12 +628,6 @@ static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, if (unlikely(err)) goto err1; - /* - * make sure all changes to the work queue are - * written before we update the producer pointer - */ - smp_wmb(); - advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index e591d8c1f3cf..068433e2229d 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -181,4 +181,25 @@ struct rxe_modify_srq_cmd { __aligned_u64 mmap_info_addr; }; +/* This data structure is stored at the base of work and + * completion queues shared between user space and kernel space. + * It contains the producer and consumer indices. Is also + * contains a copy of the queue size parameters for user space + * to use but the kernel must use the parameters in the + * rxe_queue struct. For performance reasons arrange to have + * producer and consumer indices in separate cache lines + * the kernel should always mask the indices to avoid accessing + * memory outside of the data area + */ +struct rxe_queue_buf { + __u32 log2_elem_size; + __u32 index_mask; + __u32 pad_1[30]; + __u32 producer_index; + __u32 pad_2[31]; + __u32 consumer_index; + __u32 pad_3[31]; + __u8 data[]; +}; + #endif /* RDMA_USER_RXE_H */ -- cgit