diff options
153 files changed, 4504 insertions, 2384 deletions
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index 3a31a979709b..cf29ab10501c 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -270,6 +270,47 @@ allOf: contains: enum: - qcom,msm8998-smmu-v2 + then: + anyOf: + - properties: + clock-names: + items: + - const: bus + clocks: + items: + - description: bus clock required for downstream bus access and for + the smmu ptw + - properties: + clock-names: + items: + - const: iface + - const: mem + - const: mem_iface + clocks: + items: + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + - description: bus clock required for memory access + - description: bus clock required for GPU memory access + - properties: + clock-names: + items: + - const: iface-mm + - const: iface-smmu + - const: bus-smmu + clocks: + items: + - description: interface clock required to access mnoc's registers + through the TCU's programming interface. + - description: interface clock required to access smmu's registers + through the TCU's programming interface. + - description: bus clock required for the smmu ptw + + - if: + properties: + compatible: + contains: + enum: - qcom,sdm630-smmu-v2 - qcom,sm6375-smmu-v2 then: diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml index 5b6395bc10e0..ea6b0f5f24de 100644 --- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml @@ -78,6 +78,9 @@ properties: - mediatek,mt8173-m4u # generation two - mediatek,mt8183-m4u # generation two - mediatek,mt8186-iommu-mm # generation two + - mediatek,mt8188-iommu-vdo # generation two + - mediatek,mt8188-iommu-vpp # generation two + - mediatek,mt8188-iommu-infra # generation two - mediatek,mt8192-m4u # generation two - mediatek,mt8195-iommu-vdo # generation two - mediatek,mt8195-iommu-vpp # generation two @@ -123,6 +126,7 @@ properties: description: | This is the mtk_m4u_id according to the HW. Specifies the mtk_m4u_id as defined in + dt-binding/memory/mediatek,mt8188-memory-port.h for mt8188, dt-binding/memory/mt2701-larb-port.h for mt2701 and mt7623, dt-binding/memory/mt2712-larb-port.h for mt2712, dt-binding/memory/mt6779-larb-port.h for mt6779, @@ -155,6 +159,8 @@ allOf: - mediatek,mt6795-m4u - mediatek,mt8173-m4u - mediatek,mt8186-iommu-mm + - mediatek,mt8188-iommu-vdo + - mediatek,mt8188-iommu-vpp - mediatek,mt8192-m4u - mediatek,mt8195-iommu-vdo - mediatek,mt8195-iommu-vpp @@ -168,6 +174,8 @@ allOf: compatible: enum: - mediatek,mt8186-iommu-mm + - mediatek,mt8188-iommu-vdo + - mediatek,mt8188-iommu-vpp - mediatek,mt8192-m4u - mediatek,mt8195-iommu-vdo - mediatek,mt8195-iommu-vpp @@ -194,7 +202,9 @@ allOf: properties: compatible: contains: - const: mediatek,mt8195-iommu-infra + enum: + - mediatek,mt8188-iommu-infra + - mediatek,mt8195-iommu-infra then: required: diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index d9fabdf930d9..a74eb899c381 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -17,11 +17,16 @@ description: | properties: compatible: - items: - - enum: - - qcom,msm8916-iommu - - qcom,msm8953-iommu - - const: qcom,msm-iommu-v1 + oneOf: + - items: + - enum: + - qcom,msm8916-iommu + - qcom,msm8953-iommu + - const: qcom,msm-iommu-v1 + - items: + - enum: + - qcom,msm8976-iommu + - const: qcom,msm-iommu-v2 clocks: items: @@ -64,6 +69,8 @@ patternProperties: enum: - qcom,msm-iommu-v1-ns - qcom,msm-iommu-v1-sec + - qcom,msm-iommu-v2-ns + - qcom,msm-iommu-v2-sec interrupts: maxItems: 1 @@ -71,6 +78,11 @@ patternProperties: reg: maxItems: 1 + qcom,ctx-asid: + $ref: /schemas/types.yaml#/definitions/uint32 + description: + The ASID number associated to the context bank. + required: - compatible - interrupts diff --git a/MAINTAINERS b/MAINTAINERS index 29a3fd26017c..4cc6ec3113e4 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13356,6 +13356,7 @@ L: [email protected] (moderated for non-subscribers) S: Supported F: Documentation/devicetree/bindings/iommu/mediatek* F: drivers/iommu/mtk_iommu* +F: include/dt-bindings/memory/mediatek,mt*-port.h F: include/dt-bindings/memory/mt*-port.h MEDIATEK JPEG DRIVER diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h index 36aec57ea7a3..fa918f01333e 100644 --- a/arch/x86/include/asm/audit.h +++ b/arch/x86/include/asm/audit.h @@ -4,4 +4,11 @@ int ia32_classify_syscall(unsigned int syscall); +extern unsigned ia32_dir_class[]; +extern unsigned ia32_write_class[]; +extern unsigned ia32_read_class[]; +extern unsigned ia32_chattr_class[]; +extern unsigned ia32_signal_class[]; + + #endif /* _ASM_X86_AUDIT_H */ diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c index 44c3601cfdc4..190c120f4285 100644 --- a/arch/x86/kernel/audit_64.c +++ b/arch/x86/kernel/audit_64.c @@ -63,11 +63,6 @@ int audit_classify_syscall(int abi, unsigned syscall) static int __init audit_classes_init(void) { #ifdef CONFIG_IA32_EMULATION - extern __u32 ia32_dir_class[]; - extern __u32 ia32_write_class[]; - extern __u32 ia32_read_class[]; - extern __u32 ia32_chattr_class[]; - extern __u32 ia32_signal_class[]; audit_register_class(AUDIT_CLASS_WRITE_32, ia32_write_class); audit_register_class(AUDIT_CLASS_READ_32, ia32_read_class); audit_register_class(AUDIT_CLASS_DIR_WRITE_32, ia32_dir_class); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6d75fab10161..382d4e6b848d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1280,11 +1280,11 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS | SRBDS), VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 41dac93b8ea4..cadf68737e6b 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -71,7 +71,7 @@ static unsigned short xsave_cpuid_features[] __initdata = { [XFEATURE_ZMM_Hi256] = X86_FEATURE_AVX512F, [XFEATURE_Hi16_ZMM] = X86_FEATURE_AVX512F, [XFEATURE_PT_UNIMPLEMENTED_SO_FAR] = X86_FEATURE_INTEL_PT, - [XFEATURE_PKRU] = X86_FEATURE_PKU, + [XFEATURE_PKRU] = X86_FEATURE_OSPKE, [XFEATURE_PASID] = X86_FEATURE_ENQCMD, [XFEATURE_CET_USER] = X86_FEATURE_SHSTK, [XFEATURE_XTILE_CFG] = X86_FEATURE_AMX_TILE, diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 4d8aff05a509..30a55207c000 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -231,9 +231,7 @@ struct irq_chip i8259A_chip = { }; static char irq_trigger[2]; -/** - * ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ - */ +/* ELCR registers (0x4d0, 0x4d1) control edge/level of IRQ */ static void restore_ELCR(char *trigger) { outb(trigger[0], PIC_ELCR1); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 531a9e3df717..691d4b7686ee 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1584,7 +1584,7 @@ static const struct iommu_ops *acpi_iommu_configure_id(struct device *dev, * If we have reason to believe the IOMMU driver missed the initial * iommu_probe_device() call for dev, replay it to get things in order. */ - if (!err && dev->bus && !device_iommu_mapped(dev)) + if (!err && dev->bus) err = iommu_probe_device(dev); /* Ignore all other errors apart from EPROBE_DEFER */ diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c index 9a15f0d12c79..6453b5b35bfe 100644 --- a/drivers/dma/idxd/device.c +++ b/drivers/dma/idxd/device.c @@ -299,21 +299,6 @@ void idxd_wqs_unmap_portal(struct idxd_device *idxd) } } -static void __idxd_wq_set_priv_locked(struct idxd_wq *wq, int priv) -{ - struct idxd_device *idxd = wq->idxd; - union wqcfg wqcfg; - unsigned int offset; - - offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PRIVL_IDX); - spin_lock(&idxd->dev_lock); - wqcfg.bits[WQCFG_PRIVL_IDX] = ioread32(idxd->reg_base + offset); - wqcfg.priv = priv; - wq->wqcfg->bits[WQCFG_PRIVL_IDX] = wqcfg.bits[WQCFG_PRIVL_IDX]; - iowrite32(wqcfg.bits[WQCFG_PRIVL_IDX], idxd->reg_base + offset); - spin_unlock(&idxd->dev_lock); -} - static void __idxd_wq_set_pasid_locked(struct idxd_wq *wq, int pasid) { struct idxd_device *idxd = wq->idxd; @@ -1421,15 +1406,14 @@ int drv_enable_wq(struct idxd_wq *wq) } /* - * In the event that the WQ is configurable for pasid and priv bits. - * For kernel wq, the driver should setup the pasid, pasid_en, and priv bit. - * However, for non-kernel wq, the driver should only set the pasid_en bit for - * shared wq. A dedicated wq that is not 'kernel' type will configure pasid and + * In the event that the WQ is configurable for pasid, the driver + * should setup the pasid, pasid_en bit. This is true for both kernel + * and user shared workqueues. There is no need to setup priv bit in + * that in-kernel DMA will also do user privileged requests. + * A dedicated wq that is not 'kernel' type will configure pasid and * pasid_en later on so there is no need to setup. */ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) { - int priv = 0; - if (wq_pasid_enabled(wq)) { if (is_idxd_wq_kernel(wq) || wq_shared(wq)) { u32 pasid = wq_dedicated(wq) ? idxd->pasid : 0; @@ -1437,10 +1421,6 @@ int drv_enable_wq(struct idxd_wq *wq) __idxd_wq_set_pasid_locked(wq, pasid); } } - - if (is_idxd_wq_kernel(wq)) - priv = 1; - __idxd_wq_set_priv_locked(wq, priv); } rc = 0; @@ -1548,6 +1528,15 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev) if (rc < 0) return -ENXIO; + /* + * System PASID is preserved across device disable/enable cycle, but + * genconfig register content gets cleared during device reset. We + * need to re-enable user interrupts for kernel work queue completion + * IRQ to function. + */ + if (idxd->pasid != IOMMU_PASID_INVALID) + idxd_set_user_intr(idxd, 1); + rc = idxd_device_evl_setup(idxd); if (rc < 0) { idxd->cmd_status = IDXD_SCMD_DEV_EVL_ERR; diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index eb35ca313684..07623fb0f52f 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -75,9 +75,10 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq, hw->xfer_size = len; /* * For dedicated WQ, this field is ignored and HW will use the WQCFG.priv - * field instead. This field should be set to 1 for kernel descriptors. + * field instead. This field should be set to 0 for kernel descriptors + * since kernel DMA on VT-d supports "user" privilege only. */ - hw->priv = 1; + hw->priv = 0; hw->completion_addr = compl; } diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 5428a2e1b1ec..502be9db63f4 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -473,6 +473,15 @@ static inline struct idxd_device *ie_to_idxd(struct idxd_irq_entry *ie) return container_of(ie, struct idxd_device, ie); } +static inline void idxd_set_user_intr(struct idxd_device *idxd, bool enable) +{ + union gencfg_reg reg; + + reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET); + reg.user_int_en = enable; + iowrite32(reg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET); +} + extern struct bus_type dsa_bus_type; extern bool support_enqcmd; diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 1aa823974cda..0eb1c827a215 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -550,14 +550,59 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d static int idxd_enable_system_pasid(struct idxd_device *idxd) { - return -EOPNOTSUPP; + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct iommu_domain *domain; + ioasid_t pasid; + int ret; + + /* + * Attach a global PASID to the DMA domain so that we can use ENQCMDS + * to submit work on buffers mapped by DMA API. + */ + domain = iommu_get_domain_for_dev(dev); + if (!domain) + return -EPERM; + + pasid = iommu_alloc_global_pasid(dev); + if (pasid == IOMMU_PASID_INVALID) + return -ENOSPC; + + /* + * DMA domain is owned by the driver, it should support all valid + * types such as DMA-FQ, identity, etc. + */ + ret = iommu_attach_device_pasid(domain, dev, pasid); + if (ret) { + dev_err(dev, "failed to attach device pasid %d, domain type %d", + pasid, domain->type); + iommu_free_global_pasid(pasid); + return ret; + } + + /* Since we set user privilege for kernel DMA, enable completion IRQ */ + idxd_set_user_intr(idxd, 1); + idxd->pasid = pasid; + + return ret; } static void idxd_disable_system_pasid(struct idxd_device *idxd) { + struct pci_dev *pdev = idxd->pdev; + struct device *dev = &pdev->dev; + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev(dev); + if (!domain) + return; + + iommu_detach_device_pasid(domain, dev, idxd->pasid); + iommu_free_global_pasid(idxd->pasid); - iommu_sva_unbind_device(idxd->sva); + idxd_set_user_intr(idxd, 0); idxd->sva = NULL; + idxd->pasid = IOMMU_PASID_INVALID; } static int idxd_enable_sva(struct pci_dev *pdev) @@ -600,8 +645,9 @@ static int idxd_probe(struct idxd_device *idxd) } else { set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags); - if (idxd_enable_system_pasid(idxd)) - dev_warn(dev, "No in-kernel DMA with PASID.\n"); + rc = idxd_enable_system_pasid(idxd); + if (rc) + dev_warn(dev, "No in-kernel DMA with PASID. %d\n", rc); else set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags); } diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c index 293739ac5596..63f6966c51aa 100644 --- a/drivers/dma/idxd/sysfs.c +++ b/drivers/dma/idxd/sysfs.c @@ -948,13 +948,6 @@ static ssize_t wq_name_store(struct device *dev, if (strlen(buf) > WQ_NAME_SIZE || strlen(buf) == 0) return -EINVAL; - /* - * This is temporarily placed here until we have SVM support for - * dmaengine. - */ - if (wq->type == IDXD_WQT_KERNEL && device_pasid_enabled(wq->idxd)) - return -EOPNOTSUPP; - input = kstrndup(buf, count, GFP_KERNEL); if (!input) return -ENOMEM; diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 73f913cbd146..7acc0f936dad 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1457,6 +1457,17 @@ static int config_non_roce_gid_cache(struct ib_device *device, i); goto err; } + + if (rdma_protocol_iwarp(device, port)) { + struct net_device *ndev; + + ndev = ib_device_get_netdev(device, port); + if (!ndev) + continue; + RCU_INIT_POINTER(gid_attr.ndev, ndev); + dev_put(ndev); + } + gid_attr.index = i; tprops->subnet_prefix = be64_to_cpu(gid_attr.gid.global.subnet_prefix); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 9891c7dc2af5..c343edf2f664 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -686,30 +686,52 @@ cma_validate_port(struct ib_device *device, u32 port, struct rdma_id_private *id_priv) { struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; + const struct ib_gid_attr *sgid_attr = ERR_PTR(-ENODEV); int bound_if_index = dev_addr->bound_dev_if; - const struct ib_gid_attr *sgid_attr; int dev_type = dev_addr->dev_type; struct net_device *ndev = NULL; if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net)) - return ERR_PTR(-ENODEV); + goto out; if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port)) - return ERR_PTR(-ENODEV); + goto out; if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port)) - return ERR_PTR(-ENODEV); + goto out; + + /* + * For drivers that do not associate more than one net device with + * their gid tables, such as iWARP drivers, it is sufficient to + * return the first table entry. + * + * Other driver classes might be included in the future. + */ + if (rdma_protocol_iwarp(device, port)) { + sgid_attr = rdma_get_gid_attr(device, port, 0); + if (IS_ERR(sgid_attr)) + goto out; + + rcu_read_lock(); + ndev = rcu_dereference(sgid_attr->ndev); + if (!net_eq(dev_net(ndev), dev_addr->net) || + ndev->ifindex != bound_if_index) + sgid_attr = ERR_PTR(-ENODEV); + rcu_read_unlock(); + goto out; + } if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) { ndev = dev_get_by_index(dev_addr->net, bound_if_index); if (!ndev) - return ERR_PTR(-ENODEV); + goto out; } else { gid_type = IB_GID_TYPE_IB; } sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev); dev_put(ndev); +out: return sgid_attr; } diff --git a/drivers/infiniband/core/iwpm_util.c b/drivers/infiniband/core/iwpm_util.c index 358a2db38d23..eecb369898f5 100644 --- a/drivers/infiniband/core/iwpm_util.c +++ b/drivers/infiniband/core/iwpm_util.c @@ -307,7 +307,7 @@ get_remote_info_exit: struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq, u8 nl_client, gfp_t gfp) { - struct iwpm_nlmsg_request *nlmsg_request = NULL; + struct iwpm_nlmsg_request *nlmsg_request; unsigned long flags; nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp); diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index 1b2cc9e45ade..ae2db0c70788 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -75,7 +75,7 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op) if (type >= RDMA_NL_NUM_CLIENTS) return false; - return (op < max_num_ops[type]) ? true : false; + return op < max_num_ops[type]; } static const struct rdma_nl_cbs * diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 7c9c79c13941..bf800f8cb3e4 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -72,12 +72,23 @@ enum { #define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) static dev_t dynamic_uverbs_dev; -static struct class *uverbs_class; static DEFINE_IDA(uverbs_ida); static int ib_uverbs_add_one(struct ib_device *device); static void ib_uverbs_remove_one(struct ib_device *device, void *client_data); +static char *uverbs_devnode(const struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); +} + +static const struct class uverbs_class = { + .name = "infiniband_verbs", + .devnode = uverbs_devnode, +}; + /* * Must be called with the ufile->device->disassociate_srcu held, and the lock * must be held until use of the ucontext is finished. @@ -1117,7 +1128,7 @@ static int ib_uverbs_add_one(struct ib_device *device) } device_initialize(&uverbs_dev->dev); - uverbs_dev->dev.class = uverbs_class; + uverbs_dev->dev.class = &uverbs_class; uverbs_dev->dev.parent = device->dev.parent; uverbs_dev->dev.release = ib_uverbs_release_dev; uverbs_dev->groups[0] = &dev_attr_group; @@ -1235,13 +1246,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data) put_device(&uverbs_dev->dev); } -static char *uverbs_devnode(const struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0666; - return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev)); -} - static int __init ib_uverbs_init(void) { int ret; @@ -1262,16 +1266,13 @@ static int __init ib_uverbs_init(void) goto out_alloc; } - uverbs_class = class_create("infiniband_verbs"); - if (IS_ERR(uverbs_class)) { - ret = PTR_ERR(uverbs_class); + ret = class_register(&uverbs_class); + if (ret) { pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev; } - uverbs_class->devnode = uverbs_devnode; - - ret = class_create_file(uverbs_class, &class_attr_abi_version.attr); + ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr); if (ret) { pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class; @@ -1286,7 +1287,7 @@ static int __init ib_uverbs_init(void) return 0; out_class: - class_destroy(uverbs_class); + class_unregister(&uverbs_class); out_chrdev: unregister_chrdev_region(dynamic_uverbs_dev, @@ -1303,7 +1304,7 @@ out: static void __exit ib_uverbs_cleanup(void) { ib_unregister_client(&uverbs_client); - class_destroy(uverbs_class); + class_unregister(&uverbs_class); unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_NUM_FIXED_MINOR); unregister_chrdev_region(dynamic_uverbs_dev, diff --git a/drivers/infiniband/core/uverbs_std_types_counters.c b/drivers/infiniband/core/uverbs_std_types_counters.c index 999da9c79866..381aa5797641 100644 --- a/drivers/infiniband/core/uverbs_std_types_counters.c +++ b/drivers/infiniband/core/uverbs_std_types_counters.c @@ -107,6 +107,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)( return ret; uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF); + if (IS_ERR(uattr)) + return PTR_ERR(uattr); read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64); read_attr.counters_buff = uverbs_zalloc( attrs, array_size(read_attr.ncounters, sizeof(u64))); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b99b3cc283b6..41ff5595c860 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1880,6 +1880,89 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr, } EXPORT_SYMBOL(ib_modify_qp_with_udata); +static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes, + u16 *speed, u8 *width) +{ + if (!lanes) { + if (netdev_speed <= SPEED_1000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_SDR; + } else if (netdev_speed <= SPEED_10000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_20000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_DDR; + } else if (netdev_speed <= SPEED_25000) { + *width = IB_WIDTH_1X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_40000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_FDR10; + } else if (netdev_speed <= SPEED_50000) { + *width = IB_WIDTH_2X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_100000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_EDR; + } else if (netdev_speed <= SPEED_200000) { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_HDR; + } else { + *width = IB_WIDTH_4X; + *speed = IB_SPEED_NDR; + } + + return; + } + + switch (lanes) { + case 1: + *width = IB_WIDTH_1X; + break; + case 2: + *width = IB_WIDTH_2X; + break; + case 4: + *width = IB_WIDTH_4X; + break; + case 8: + *width = IB_WIDTH_8X; + break; + case 12: + *width = IB_WIDTH_12X; + break; + default: + *width = IB_WIDTH_1X; + } + + switch (netdev_speed / lanes) { + case SPEED_2500: + *speed = IB_SPEED_SDR; + break; + case SPEED_5000: + *speed = IB_SPEED_DDR; + break; + case SPEED_10000: + *speed = IB_SPEED_FDR10; + break; + case SPEED_14000: + *speed = IB_SPEED_FDR; + break; + case SPEED_25000: + *speed = IB_SPEED_EDR; + break; + case SPEED_50000: + *speed = IB_SPEED_HDR; + break; + case SPEED_100000: + *speed = IB_SPEED_NDR; + break; + default: + *speed = IB_SPEED_SDR; + } +} + int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) { int rc; @@ -1904,29 +1987,13 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width) netdev_speed = lksettings.base.speed; } else { netdev_speed = SPEED_1000; - pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name, - netdev_speed); + if (rc) + pr_warn("%s speed is unknown, defaulting to %u\n", + netdev->name, netdev_speed); } - if (netdev_speed <= SPEED_1000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_SDR; - } else if (netdev_speed <= SPEED_10000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_FDR10; - } else if (netdev_speed <= SPEED_20000) { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_DDR; - } else if (netdev_speed <= SPEED_25000) { - *width = IB_WIDTH_1X; - *speed = IB_SPEED_EDR; - } else if (netdev_speed <= SPEED_40000) { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_FDR10; - } else { - *width = IB_WIDTH_4X; - *speed = IB_SPEED_EDR; - } + ib_get_width_and_speed(netdev_speed, lksettings.lanes, + speed, width); return 0; } diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index ea81b2497511..9fd9849ebdd1 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -112,12 +112,34 @@ struct bnxt_re_gsi_context { #define BNXT_RE_NQ_IDX 1 #define BNXT_RE_GEN_P5_MAX_VF 64 +struct bnxt_re_pacing { + u64 dbr_db_fifo_reg_off; + void *dbr_page; + u64 dbr_bar_addr; + u32 pacing_algo_th; + u32 do_pacing_save; + u32 dbq_pacing_time; /* ms */ + u32 dbr_def_do_pacing; + bool dbr_pacing; + struct mutex dbq_lock; /* synchronize db pacing algo */ +}; + +#define BNXT_RE_MAX_DBR_DO_PACING 0xFFFF +#define BNXT_RE_DBR_PACING_TIME 5 /* ms */ +#define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */ +#define BNXT_RE_PACING_ALARM_TH_MULTIPLE 2 /* Multiple of pacing algo threshold */ +/* Default do_pacing value when there is no congestion */ +#define BNXT_RE_DBR_DO_PACING_NO_CONGESTION 0x7F /* 1 in 512 probability */ +#define BNXT_RE_DB_FIFO_ROOM_MASK 0x1FFF8000 +#define BNXT_RE_MAX_FIFO_DEPTH 0x2c00 +#define BNXT_RE_DB_FIFO_ROOM_SHIFT 15 +#define BNXT_RE_GRC_FIFO_REG_BASE 0x2000 + struct bnxt_re_dev { struct ib_device ibdev; struct list_head list; unsigned long flags; #define BNXT_RE_FLAG_NETDEV_REGISTERED 0 -#define BNXT_RE_FLAG_GOT_MSIX 2 #define BNXT_RE_FLAG_HAVE_L2_REF 3 #define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4 #define BNXT_RE_FLAG_QOS_WORK_REG 5 @@ -152,16 +174,9 @@ struct bnxt_re_dev { struct bnxt_qplib_res qplib_res; struct bnxt_qplib_dpi dpi_privileged; - atomic_t qp_count; struct mutex qp_lock; /* protect qp list */ struct list_head qp_list; - atomic_t cq_count; - atomic_t srq_count; - atomic_t mr_count; - atomic_t mw_count; - atomic_t ah_count; - atomic_t pd_count; /* Max of 2 lossless traffic class supported per port */ u16 cosq[2]; @@ -171,6 +186,9 @@ struct bnxt_re_dev { atomic_t nq_alloc_cnt; u32 is_virtfn; u32 num_vfs; + struct bnxt_re_pacing pacing; + struct work_struct dbq_fifo_check_work; + struct delayed_work dbq_pacing_work; }; #define to_bnxt_re_dev(ptr, member) \ @@ -181,6 +199,7 @@ struct bnxt_re_dev { #define BNXT_RE_ROCEV2_IPV6_PACKET 3 #define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT)) +void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev); static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev) { diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.c b/drivers/infiniband/hw/bnxt_re/hw_counters.c index 825d512799d9..93572405d6fa 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.c +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.c @@ -61,15 +61,29 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_ACTIVE_PD].name = "active_pds", [BNXT_RE_ACTIVE_AH].name = "active_ahs", [BNXT_RE_ACTIVE_QP].name = "active_qps", + [BNXT_RE_ACTIVE_RC_QP].name = "active_rc_qps", + [BNXT_RE_ACTIVE_UD_QP].name = "active_ud_qps", [BNXT_RE_ACTIVE_SRQ].name = "active_srqs", [BNXT_RE_ACTIVE_CQ].name = "active_cqs", [BNXT_RE_ACTIVE_MR].name = "active_mrs", [BNXT_RE_ACTIVE_MW].name = "active_mws", + [BNXT_RE_WATERMARK_PD].name = "watermark_pds", + [BNXT_RE_WATERMARK_AH].name = "watermark_ahs", + [BNXT_RE_WATERMARK_QP].name = "watermark_qps", + [BNXT_RE_WATERMARK_RC_QP].name = "watermark_rc_qps", + [BNXT_RE_WATERMARK_UD_QP].name = "watermark_ud_qps", + [BNXT_RE_WATERMARK_SRQ].name = "watermark_srqs", + [BNXT_RE_WATERMARK_CQ].name = "watermark_cqs", + [BNXT_RE_WATERMARK_MR].name = "watermark_mrs", + [BNXT_RE_WATERMARK_MW].name = "watermark_mws", + [BNXT_RE_RESIZE_CQ_CNT].name = "resize_cq_cnt", [BNXT_RE_RX_PKTS].name = "rx_pkts", [BNXT_RE_RX_BYTES].name = "rx_bytes", [BNXT_RE_TX_PKTS].name = "tx_pkts", [BNXT_RE_TX_BYTES].name = "tx_bytes", [BNXT_RE_RECOVERABLE_ERRORS].name = "recoverable_errors", + [BNXT_RE_TX_ERRORS].name = "tx_roce_errors", + [BNXT_RE_TX_DISCARDS].name = "tx_roce_discards", [BNXT_RE_RX_ERRORS].name = "rx_roce_errors", [BNXT_RE_RX_DISCARDS].name = "rx_roce_discards", [BNXT_RE_TO_RETRANSMITS].name = "to_retransmits", @@ -117,14 +131,25 @@ static const struct rdma_stat_desc bnxt_re_stat_descs[] = { [BNXT_RE_TX_READ_RES].name = "tx_read_resp", [BNXT_RE_TX_WRITE_REQ].name = "tx_write_req", [BNXT_RE_TX_SEND_REQ].name = "tx_send_req", + [BNXT_RE_TX_ROCE_PKTS].name = "tx_roce_only_pkts", + [BNXT_RE_TX_ROCE_BYTES].name = "tx_roce_only_bytes", [BNXT_RE_RX_ATOMIC_REQ].name = "rx_atomic_req", [BNXT_RE_RX_READ_REQ].name = "rx_read_req", [BNXT_RE_RX_READ_RESP].name = "rx_read_resp", [BNXT_RE_RX_WRITE_REQ].name = "rx_write_req", [BNXT_RE_RX_SEND_REQ].name = "rx_send_req", + [BNXT_RE_RX_ROCE_PKTS].name = "rx_roce_only_pkts", + [BNXT_RE_RX_ROCE_BYTES].name = "rx_roce_only_bytes", [BNXT_RE_RX_ROCE_GOOD_PKTS].name = "rx_roce_good_pkts", [BNXT_RE_RX_ROCE_GOOD_BYTES].name = "rx_roce_good_bytes", - [BNXT_RE_OOB].name = "rx_out_of_buffer" + [BNXT_RE_OOB].name = "rx_out_of_buffer", + [BNXT_RE_TX_CNP].name = "tx_cnp_pkts", + [BNXT_RE_RX_CNP].name = "rx_cnp_pkts", + [BNXT_RE_RX_ECN].name = "rx_ecn_marked_pkts", + [BNXT_RE_PACING_RESCHED].name = "pacing_reschedule", + [BNXT_RE_PACING_CMPL].name = "pacing_complete", + [BNXT_RE_PACING_ALERT].name = "pacing_alerts", + [BNXT_RE_DB_FIFO_REG].name = "db_fifo_register", }; static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, @@ -136,14 +161,22 @@ static void bnxt_re_copy_ext_stats(struct bnxt_re_dev *rdev, stats->value[BNXT_RE_TX_READ_RES] = s->tx_read_res; stats->value[BNXT_RE_TX_WRITE_REQ] = s->tx_write_req; stats->value[BNXT_RE_TX_SEND_REQ] = s->tx_send_req; + stats->value[BNXT_RE_TX_ROCE_PKTS] = s->tx_roce_pkts; + stats->value[BNXT_RE_TX_ROCE_BYTES] = s->tx_roce_bytes; stats->value[BNXT_RE_RX_ATOMIC_REQ] = s->rx_atomic_req; stats->value[BNXT_RE_RX_READ_REQ] = s->rx_read_req; stats->value[BNXT_RE_RX_READ_RESP] = s->rx_read_res; stats->value[BNXT_RE_RX_WRITE_REQ] = s->rx_write_req; stats->value[BNXT_RE_RX_SEND_REQ] = s->rx_send_req; + stats->value[BNXT_RE_RX_ROCE_PKTS] = s->rx_roce_pkts; + stats->value[BNXT_RE_RX_ROCE_BYTES] = s->rx_roce_bytes; stats->value[BNXT_RE_RX_ROCE_GOOD_PKTS] = s->rx_roce_good_pkts; stats->value[BNXT_RE_RX_ROCE_GOOD_BYTES] = s->rx_roce_good_bytes; stats->value[BNXT_RE_OOB] = s->rx_out_of_buffer; + stats->value[BNXT_RE_TX_CNP] = s->tx_cnp; + stats->value[BNXT_RE_RX_CNP] = s->rx_cnp; + stats->value[BNXT_RE_RX_ECN] = s->rx_ecn_marked; + stats->value[BNXT_RE_OUT_OF_SEQ_ERR] = s->rx_out_of_sequence; } static int bnxt_re_get_ext_stat(struct bnxt_re_dev *rdev, @@ -249,30 +282,59 @@ static void bnxt_re_copy_err_stats(struct bnxt_re_dev *rdev, err_s->res_oos_drop_count; } +static void bnxt_re_copy_db_pacing_stats(struct bnxt_re_dev *rdev, + struct rdma_hw_stats *stats) +{ + struct bnxt_re_db_pacing_stats *pacing_s = &rdev->stats.pacing; + + stats->value[BNXT_RE_PACING_RESCHED] = pacing_s->resched; + stats->value[BNXT_RE_PACING_CMPL] = pacing_s->complete; + stats->value[BNXT_RE_PACING_ALERT] = pacing_s->alerts; + stats->value[BNXT_RE_DB_FIFO_REG] = + readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); +} + int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, u32 port, int index) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - struct ctx_hw_stats *hw_stats = NULL; + struct bnxt_re_res_cntrs *res_s = &rdev->stats.res; struct bnxt_qplib_roce_stats *err_s = NULL; + struct ctx_hw_stats *hw_stats = NULL; int rc = 0; hw_stats = rdev->qplib_ctx.stats.dma; if (!port || !stats) return -EINVAL; - stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&rdev->qp_count); - stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&rdev->srq_count); - stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&rdev->cq_count); - stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&rdev->mr_count); - stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&rdev->mw_count); - stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&rdev->pd_count); - stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&rdev->ah_count); + stats->value[BNXT_RE_ACTIVE_QP] = atomic_read(&res_s->qp_count); + stats->value[BNXT_RE_ACTIVE_RC_QP] = atomic_read(&res_s->rc_qp_count); + stats->value[BNXT_RE_ACTIVE_UD_QP] = atomic_read(&res_s->ud_qp_count); + stats->value[BNXT_RE_ACTIVE_SRQ] = atomic_read(&res_s->srq_count); + stats->value[BNXT_RE_ACTIVE_CQ] = atomic_read(&res_s->cq_count); + stats->value[BNXT_RE_ACTIVE_MR] = atomic_read(&res_s->mr_count); + stats->value[BNXT_RE_ACTIVE_MW] = atomic_read(&res_s->mw_count); + stats->value[BNXT_RE_ACTIVE_PD] = atomic_read(&res_s->pd_count); + stats->value[BNXT_RE_ACTIVE_AH] = atomic_read(&res_s->ah_count); + stats->value[BNXT_RE_WATERMARK_QP] = res_s->qp_watermark; + stats->value[BNXT_RE_WATERMARK_RC_QP] = res_s->rc_qp_watermark; + stats->value[BNXT_RE_WATERMARK_UD_QP] = res_s->ud_qp_watermark; + stats->value[BNXT_RE_WATERMARK_SRQ] = res_s->srq_watermark; + stats->value[BNXT_RE_WATERMARK_CQ] = res_s->cq_watermark; + stats->value[BNXT_RE_WATERMARK_MR] = res_s->mr_watermark; + stats->value[BNXT_RE_WATERMARK_MW] = res_s->mw_watermark; + stats->value[BNXT_RE_WATERMARK_PD] = res_s->pd_watermark; + stats->value[BNXT_RE_WATERMARK_AH] = res_s->ah_watermark; + stats->value[BNXT_RE_RESIZE_CQ_CNT] = atomic_read(&res_s->resize_count); if (hw_stats) { stats->value[BNXT_RE_RECOVERABLE_ERRORS] = le64_to_cpu(hw_stats->tx_bcast_pkts); + stats->value[BNXT_RE_TX_DISCARDS] = + le64_to_cpu(hw_stats->tx_discard_pkts); + stats->value[BNXT_RE_TX_ERRORS] = + le64_to_cpu(hw_stats->tx_error_pkts); stats->value[BNXT_RE_RX_ERRORS] = le64_to_cpu(hw_stats->rx_error_pkts); stats->value[BNXT_RE_RX_DISCARDS] = @@ -294,6 +356,7 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, &rdev->flags); goto done; } + bnxt_re_copy_err_stats(rdev, stats, err_s); if (_is_ext_stats_supported(rdev->dev_attr.dev_cap_flags) && !rdev->is_virtfn) { rc = bnxt_re_get_ext_stat(rdev, stats); @@ -303,7 +366,8 @@ int bnxt_re_ib_get_hw_stats(struct ib_device *ibdev, goto done; } } - bnxt_re_copy_err_stats(rdev, stats, err_s); + if (rdev->pacing.dbr_pacing) + bnxt_re_copy_db_pacing_stats(rdev, stats); } done: diff --git a/drivers/infiniband/hw/bnxt_re/hw_counters.h b/drivers/infiniband/hw/bnxt_re/hw_counters.h index 7943b2c393e4..e541b6f8ca9f 100644 --- a/drivers/infiniband/hw/bnxt_re/hw_counters.h +++ b/drivers/infiniband/hw/bnxt_re/hw_counters.h @@ -44,15 +44,29 @@ enum bnxt_re_hw_stats { BNXT_RE_ACTIVE_PD, BNXT_RE_ACTIVE_AH, BNXT_RE_ACTIVE_QP, + BNXT_RE_ACTIVE_RC_QP, + BNXT_RE_ACTIVE_UD_QP, BNXT_RE_ACTIVE_SRQ, BNXT_RE_ACTIVE_CQ, BNXT_RE_ACTIVE_MR, BNXT_RE_ACTIVE_MW, + BNXT_RE_WATERMARK_PD, + BNXT_RE_WATERMARK_AH, + BNXT_RE_WATERMARK_QP, + BNXT_RE_WATERMARK_RC_QP, + BNXT_RE_WATERMARK_UD_QP, + BNXT_RE_WATERMARK_SRQ, + BNXT_RE_WATERMARK_CQ, + BNXT_RE_WATERMARK_MR, + BNXT_RE_WATERMARK_MW, + BNXT_RE_RESIZE_CQ_CNT, BNXT_RE_RX_PKTS, BNXT_RE_RX_BYTES, BNXT_RE_TX_PKTS, BNXT_RE_TX_BYTES, BNXT_RE_RECOVERABLE_ERRORS, + BNXT_RE_TX_ERRORS, + BNXT_RE_TX_DISCARDS, BNXT_RE_RX_ERRORS, BNXT_RE_RX_DISCARDS, BNXT_RE_TO_RETRANSMITS, @@ -100,19 +114,58 @@ enum bnxt_re_hw_stats { BNXT_RE_TX_READ_RES, BNXT_RE_TX_WRITE_REQ, BNXT_RE_TX_SEND_REQ, + BNXT_RE_TX_ROCE_PKTS, + BNXT_RE_TX_ROCE_BYTES, BNXT_RE_RX_ATOMIC_REQ, BNXT_RE_RX_READ_REQ, BNXT_RE_RX_READ_RESP, BNXT_RE_RX_WRITE_REQ, BNXT_RE_RX_SEND_REQ, + BNXT_RE_RX_ROCE_PKTS, + BNXT_RE_RX_ROCE_BYTES, BNXT_RE_RX_ROCE_GOOD_PKTS, BNXT_RE_RX_ROCE_GOOD_BYTES, BNXT_RE_OOB, + BNXT_RE_TX_CNP, + BNXT_RE_RX_CNP, + BNXT_RE_RX_ECN, + BNXT_RE_PACING_RESCHED, + BNXT_RE_PACING_CMPL, + BNXT_RE_PACING_ALERT, + BNXT_RE_DB_FIFO_REG, BNXT_RE_NUM_EXT_COUNTERS }; #define BNXT_RE_NUM_STD_COUNTERS (BNXT_RE_OUT_OF_SEQ_ERR + 1) +struct bnxt_re_db_pacing_stats { + u64 resched; + u64 complete; + u64 alerts; +}; + +struct bnxt_re_res_cntrs { + atomic_t qp_count; + atomic_t rc_qp_count; + atomic_t ud_qp_count; + atomic_t cq_count; + atomic_t srq_count; + atomic_t mr_count; + atomic_t mw_count; + atomic_t ah_count; + atomic_t pd_count; + atomic_t resize_count; + u64 qp_watermark; + u64 rc_qp_watermark; + u64 ud_qp_watermark; + u64 cq_watermark; + u64 srq_watermark; + u64 mr_watermark; + u64 mw_watermark; + u64 ah_watermark; + u64 pd_watermark; +}; + struct bnxt_re_rstat { struct bnxt_qplib_roce_stats errs; struct bnxt_qplib_ext_stat ext_stat; @@ -120,6 +173,8 @@ struct bnxt_re_rstat { struct bnxt_re_stats { struct bnxt_re_rstat rstat; + struct bnxt_re_res_cntrs res; + struct bnxt_re_db_pacing_stats pacing; }; struct rdma_hw_stats *bnxt_re_ib_alloc_hw_port_stats(struct ib_device *ibdev, diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 03cc45a5458d..0848c2c2ffcf 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -284,7 +284,7 @@ int bnxt_re_query_gid(struct ib_device *ibdev, u32 port_num, int index, union ib_gid *gid) { struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev); - int rc = 0; + int rc; /* Ignore port_num */ memset(gid, 0, sizeof(*gid)); @@ -565,6 +565,8 @@ bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset, break; case BNXT_RE_MMAP_UC_DB: case BNXT_RE_MMAP_WC_DB: + case BNXT_RE_MMAP_DBR_BAR: + case BNXT_RE_MMAP_DBR_PAGE: ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx, &entry->rdma_entry, PAGE_SIZE); break; @@ -600,7 +602,7 @@ int bnxt_re_dealloc_pd(struct ib_pd *ib_pd, struct ib_udata *udata) if (!bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl, &pd->qplib_pd)) - atomic_dec(&rdev->pd_count); + atomic_dec(&rdev->stats.res.pd_count); } return 0; } @@ -613,10 +615,11 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) udata, struct bnxt_re_ucontext, ib_uctx); struct bnxt_re_pd *pd = container_of(ibpd, struct bnxt_re_pd, ib_pd); struct bnxt_re_user_mmap_entry *entry = NULL; + u32 active_pds; int rc = 0; pd->rdev = rdev; - if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) { + if (bnxt_qplib_alloc_pd(&rdev->qplib_res, &pd->qplib_pd)) { ibdev_err(&rdev->ibdev, "Failed to allocate HW PD"); rc = -ENOMEM; goto fail; @@ -663,7 +666,9 @@ int bnxt_re_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) if (bnxt_re_create_fence_mr(pd)) ibdev_warn(&rdev->ibdev, "Failed to create Fence-MR\n"); - atomic_inc(&rdev->pd_count); + active_pds = atomic_inc_return(&rdev->stats.res.pd_count); + if (active_pds > rdev->stats.res.pd_watermark) + rdev->stats.res.pd_watermark = active_pds; return 0; dbfail: @@ -679,7 +684,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); struct bnxt_re_dev *rdev = ah->rdev; bool block = true; - int rc = 0; + int rc; block = !(flags & RDMA_DESTROY_AH_SLEEPABLE); rc = bnxt_qplib_destroy_ah(&rdev->qplib_res, &ah->qplib_ah, block); @@ -689,7 +694,7 @@ int bnxt_re_destroy_ah(struct ib_ah *ib_ah, u32 flags) else goto fail; } - atomic_dec(&rdev->ah_count); + atomic_dec(&rdev->stats.res.ah_count); fail: return rc; } @@ -723,6 +728,7 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, const struct ib_gid_attr *sgid_attr; struct bnxt_re_gid_ctx *ctx; struct bnxt_re_ah *ah = container_of(ib_ah, struct bnxt_re_ah, ib_ah); + u32 active_ahs; u8 nw_type; int rc; @@ -775,7 +781,9 @@ int bnxt_re_create_ah(struct ib_ah *ib_ah, struct rdma_ah_init_attr *init_attr, wmb(); /* make sure cache is updated. */ spin_unlock_irqrestore(&uctx->sh_lock, flag); } - atomic_inc(&rdev->ah_count); + active_ahs = atomic_inc_return(&rdev->stats.res.ah_count); + if (active_ahs > rdev->stats.res.ah_watermark) + rdev->stats.res.ah_watermark = active_ahs; return 0; } @@ -826,7 +834,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) struct bnxt_re_qp *gsi_sqp; struct bnxt_re_ah *gsi_sah; struct bnxt_re_dev *rdev; - int rc = 0; + int rc; rdev = qp->rdev; gsi_sqp = rdev->gsi_ctx.gsi_sqp; @@ -836,7 +844,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) bnxt_qplib_destroy_ah(&rdev->qplib_res, &gsi_sah->qplib_ah, true); - atomic_dec(&rdev->ah_count); + atomic_dec(&rdev->stats.res.ah_count); bnxt_qplib_clean_qp(&qp->qplib_qp); ibdev_dbg(&rdev->ibdev, "Destroy the shadow QP\n"); @@ -851,7 +859,7 @@ static int bnxt_re_destroy_gsi_sqp(struct bnxt_re_qp *qp) mutex_lock(&rdev->qp_lock); list_del(&gsi_sqp->list); mutex_unlock(&rdev->qp_lock); - atomic_dec(&rdev->qp_count); + atomic_dec(&rdev->stats.res.qp_count); kfree(rdev->gsi_ctx.sqp_tbl); kfree(gsi_sah); @@ -901,7 +909,7 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata) mutex_lock(&rdev->qp_lock); list_del(&qp->list); mutex_unlock(&rdev->qp_lock); - atomic_dec(&rdev->qp_count); + atomic_dec(&rdev->stats.res.qp_count); ib_umem_release(qp->rumem); ib_umem_release(qp->sumem); @@ -1095,7 +1103,7 @@ static struct bnxt_re_ah *bnxt_re_create_shadow_qp_ah "Failed to allocate HW AH for Shadow QP"); goto fail; } - atomic_inc(&rdev->ah_count); + atomic_inc(&rdev->stats.res.ah_count); return ah; @@ -1163,7 +1171,7 @@ static struct bnxt_re_qp *bnxt_re_create_shadow_qp INIT_LIST_HEAD(&qp->list); mutex_lock(&rdev->qp_lock); list_add_tail(&qp->list, &rdev->qp_list); - atomic_inc(&rdev->qp_count); + atomic_inc(&rdev->stats.res.qp_count); mutex_unlock(&rdev->qp_lock); return qp; fail: @@ -1340,8 +1348,7 @@ static int bnxt_re_init_qp_attr(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, qplqp->pd = &pd->qplib_pd; qplqp->qp_handle = (u64)qplqp; qplqp->max_inline_data = init_attr->cap.max_inline_data; - qplqp->sig_type = ((init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? - true : false); + qplqp->sig_type = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; qptype = bnxt_re_init_qp_type(rdev, init_attr); if (qptype < 0) { rc = qptype; @@ -1446,7 +1453,7 @@ static int bnxt_re_create_gsi_qp(struct bnxt_re_qp *qp, struct bnxt_re_pd *pd, { struct bnxt_re_dev *rdev; struct bnxt_qplib_qp *qplqp; - int rc = 0; + int rc; rdev = qp->rdev; qplqp = &qp->qplib_qp; @@ -1497,6 +1504,7 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr; struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp); + u32 active_qps; int rc; rc = bnxt_re_test_qp_limits(rdev, qp_init_attr, dev_attr); @@ -1545,7 +1553,18 @@ int bnxt_re_create_qp(struct ib_qp *ib_qp, struct ib_qp_init_attr *qp_init_attr, mutex_lock(&rdev->qp_lock); list_add_tail(&qp->list, &rdev->qp_list); mutex_unlock(&rdev->qp_lock); - atomic_inc(&rdev->qp_count); + active_qps = atomic_inc_return(&rdev->stats.res.qp_count); + if (active_qps > rdev->stats.res.qp_watermark) + rdev->stats.res.qp_watermark = active_qps; + if (qp_init_attr->qp_type == IB_QPT_RC) { + active_qps = atomic_inc_return(&rdev->stats.res.rc_qp_count); + if (active_qps > rdev->stats.res.rc_qp_watermark) + rdev->stats.res.rc_qp_watermark = active_qps; + } else if (qp_init_attr->qp_type == IB_QPT_UD) { + active_qps = atomic_inc_return(&rdev->stats.res.ud_qp_count); + if (active_qps > rdev->stats.res.ud_qp_watermark) + rdev->stats.res.ud_qp_watermark = active_qps; + } return 0; qp_destroy: @@ -1648,7 +1667,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) nq = qplib_srq->cq->nq; bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); ib_umem_release(srq->umem); - atomic_dec(&rdev->srq_count); + atomic_dec(&rdev->stats.res.srq_count); if (nq) nq->budget--; return 0; @@ -1696,6 +1715,7 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct bnxt_re_srq *srq; struct bnxt_re_pd *pd; struct ib_pd *ib_pd; + u32 active_srqs; int rc, entries; ib_pd = ib_srq->pd; @@ -1760,7 +1780,9 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, } if (nq) nq->budget++; - atomic_inc(&rdev->srq_count); + active_srqs = atomic_inc_return(&rdev->stats.res.srq_count); + if (active_srqs > rdev->stats.res.srq_watermark) + rdev->stats.res.srq_watermark = active_srqs; spin_lock_init(&srq->lock); return 0; @@ -1862,7 +1884,7 @@ static int bnxt_re_modify_shadow_qp(struct bnxt_re_dev *rdev, int qp_attr_mask) { struct bnxt_re_qp *qp = rdev->gsi_ctx.gsi_sqp; - int rc = 0; + int rc; if (qp_attr_mask & IB_QP_STATE) { qp->qplib_qp.modify_flags |= CMDQ_MODIFY_QP_MODIFY_MASK_STATE; @@ -2212,7 +2234,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, u8 ip_version = 0; u16 vlan_id = 0xFFFF; void *buf; - int i, rc = 0; + int i, rc; memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr)); @@ -2250,7 +2272,7 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp, } is_eth = true; - is_vlan = (vlan_id && (vlan_id < 0x1000)) ? true : false; + is_vlan = vlan_id && (vlan_id < 0x1000); ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh, ip_version, is_udp, 0, &qp->qp1_hdr); @@ -2787,7 +2809,6 @@ static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev, struct bnxt_qplib_swqe wqe; int rc = 0; - memset(&wqe, 0, sizeof(wqe)); while (wr) { /* House keeping */ memset(&wqe, 0, sizeof(wqe)); @@ -2886,7 +2907,7 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) bnxt_qplib_destroy_cq(&rdev->qplib_res, &cq->qplib_cq); ib_umem_release(cq->umem); - atomic_dec(&rdev->cq_count); + atomic_dec(&rdev->stats.res.cq_count); nq->budget--; kfree(cq->cql); return 0; @@ -2902,6 +2923,7 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, int cqe = attr->cqe; struct bnxt_qplib_nq *nq = NULL; unsigned int nq_alloc_cnt; + u32 active_cqs; if (attr->flags) return -EOPNOTSUPP; @@ -2970,7 +2992,9 @@ int bnxt_re_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, cq->cq_period = cq->qplib_cq.period; nq->budget++; - atomic_inc(&rdev->cq_count); + active_cqs = atomic_inc_return(&rdev->stats.res.cq_count); + if (active_cqs > rdev->stats.res.cq_watermark) + rdev->stats.res.cq_watermark = active_cqs; spin_lock_init(&cq->cq_lock); if (udata) { @@ -3083,6 +3107,7 @@ int bnxt_re_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) } cq->ib_cq.cqe = cq->resize_cqe; + atomic_inc(&rdev->stats.res.resize_count); return 0; @@ -3319,26 +3344,21 @@ static int bnxt_re_process_raw_qp_pkt_rx(struct bnxt_re_qp *gsi_qp, struct bnxt_re_dev *rdev = gsi_qp->rdev; struct bnxt_re_sqp_entries *sqp_entry = NULL; struct bnxt_re_qp *gsi_sqp = rdev->gsi_ctx.gsi_sqp; + dma_addr_t shrq_hdr_buf_map; + struct ib_sge s_sge[2] = {}; + struct ib_sge r_sge[2] = {}; struct bnxt_re_ah *gsi_sah; + struct ib_recv_wr rwr = {}; + dma_addr_t rq_hdr_buf_map; + struct ib_ud_wr udwr = {}; struct ib_send_wr *swr; - struct ib_ud_wr udwr; - struct ib_recv_wr rwr; + u32 skip_bytes = 0; int pkt_type = 0; - u32 tbl_idx; void *rq_hdr_buf; - dma_addr_t rq_hdr_buf_map; - dma_addr_t shrq_hdr_buf_map; u32 offset = 0; - u32 skip_bytes = 0; - struct ib_sge s_sge[2]; - struct ib_sge r_sge[2]; + u32 tbl_idx; int rc; - memset(&udwr, 0, sizeof(udwr)); - memset(&rwr, 0, sizeof(rwr)); - memset(&s_sge, 0, sizeof(s_sge)); - memset(&r_sge, 0, sizeof(r_sge)); - swr = &udwr.wr; tbl_idx = cqe->wr_id; @@ -3578,7 +3598,7 @@ static int send_phantom_wqe(struct bnxt_re_qp *qp) { struct bnxt_qplib_qp *lib_qp = &qp->qplib_qp; unsigned long flags; - int rc = 0; + int rc; spin_lock_irqsave(&qp->sq_lock, flags); @@ -3768,6 +3788,7 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr; + u32 active_mrs; int rc; mr = kzalloc(sizeof(*mr), GFP_KERNEL); @@ -3795,7 +3816,9 @@ struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags) if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_ATOMIC)) mr->ib_mr.rkey = mr->ib_mr.lkey; - atomic_inc(&rdev->mr_count); + active_mrs = atomic_inc_return(&rdev->stats.res.mr_count); + if (active_mrs > rdev->stats.res.mr_watermark) + rdev->stats.res.mr_watermark = active_mrs; return &mr->ib_mr; @@ -3828,7 +3851,7 @@ int bnxt_re_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata) ib_umem_release(mr->ib_umem); kfree(mr); - atomic_dec(&rdev->mr_count); + atomic_dec(&rdev->stats.res.mr_count); return rc; } @@ -3858,6 +3881,7 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mr *mr = NULL; + u32 active_mrs; int rc; if (type != IB_MR_TYPE_MEM_REG) { @@ -3896,7 +3920,9 @@ struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type, goto fail_mr; } - atomic_inc(&rdev->mr_count); + active_mrs = atomic_inc_return(&rdev->stats.res.mr_count); + if (active_mrs > rdev->stats.res.mr_watermark) + rdev->stats.res.mr_watermark = active_mrs; return &mr->ib_mr; fail_mr: @@ -3914,6 +3940,7 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; struct bnxt_re_mw *mw; + u32 active_mws; int rc; mw = kzalloc(sizeof(*mw), GFP_KERNEL); @@ -3932,7 +3959,9 @@ struct ib_mw *bnxt_re_alloc_mw(struct ib_pd *ib_pd, enum ib_mw_type type, } mw->ib_mw.rkey = mw->qplib_mw.rkey; - atomic_inc(&rdev->mw_count); + active_mws = atomic_inc_return(&rdev->stats.res.mw_count); + if (active_mws > rdev->stats.res.mw_watermark) + rdev->stats.res.mw_watermark = active_mws; return &mw->ib_mw; fail: @@ -3953,21 +3982,19 @@ int bnxt_re_dealloc_mw(struct ib_mw *ib_mw) } kfree(mw); - atomic_dec(&rdev->mw_count); + atomic_dec(&rdev->stats.res.mw_count); return rc; } -/* uverbs */ -struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, - u64 virt_addr, int mr_access_flags, - struct ib_udata *udata) +static struct ib_mr *__bnxt_re_user_reg_mr(struct ib_pd *ib_pd, u64 length, u64 virt_addr, + int mr_access_flags, struct ib_umem *umem) { struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); struct bnxt_re_dev *rdev = pd->rdev; - struct bnxt_re_mr *mr; - struct ib_umem *umem; unsigned long page_size; + struct bnxt_re_mr *mr; int umem_pgs, rc; + u32 active_mrs; if (length > BNXT_RE_MAX_MR_SIZE) { ibdev_err(&rdev->ibdev, "MR Size: %lld > Max supported:%lld\n", @@ -3975,6 +4002,12 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, return ERR_PTR(-ENOMEM); } + page_size = ib_umem_find_best_pgsz(umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr); + if (!page_size) { + ibdev_err(&rdev->ibdev, "umem page size unsupported!"); + return ERR_PTR(-EINVAL); + } + mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); @@ -3986,45 +4019,33 @@ struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr); if (rc) { - ibdev_err(&rdev->ibdev, "Failed to allocate MR"); + ibdev_err(&rdev->ibdev, "Failed to allocate MR rc = %d", rc); + rc = -EIO; goto free_mr; } /* The fixed portion of the rkey is the same as the lkey */ mr->ib_mr.rkey = mr->qplib_mr.rkey; - - umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags); - if (IS_ERR(umem)) { - ibdev_err(&rdev->ibdev, "Failed to get umem"); - rc = -EFAULT; - goto free_mrw; - } mr->ib_umem = umem; - mr->qplib_mr.va = virt_addr; - page_size = ib_umem_find_best_pgsz( - umem, BNXT_RE_PAGE_SIZE_SUPPORTED, virt_addr); - if (!page_size) { - ibdev_err(&rdev->ibdev, "umem page size unsupported!"); - rc = -EFAULT; - goto free_umem; - } mr->qplib_mr.total_size = length; umem_pgs = ib_umem_num_dma_blocks(umem, page_size); rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, umem, umem_pgs, page_size); if (rc) { - ibdev_err(&rdev->ibdev, "Failed to register user MR"); - goto free_umem; + ibdev_err(&rdev->ibdev, "Failed to register user MR - rc = %d\n", rc); + rc = -EIO; + goto free_mrw; } mr->ib_mr.lkey = mr->qplib_mr.lkey; mr->ib_mr.rkey = mr->qplib_mr.lkey; - atomic_inc(&rdev->mr_count); + active_mrs = atomic_inc_return(&rdev->stats.res.mr_count); + if (active_mrs > rdev->stats.res.mr_watermark) + rdev->stats.res.mr_watermark = active_mrs; return &mr->ib_mr; -free_umem: - ib_umem_release(umem); + free_mrw: bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr); free_mr: @@ -4032,6 +4053,48 @@ free_mr: return ERR_PTR(rc); } +struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length, + u64 virt_addr, int mr_access_flags, + struct ib_udata *udata) +{ + struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct bnxt_re_dev *rdev = pd->rdev; + struct ib_umem *umem; + struct ib_mr *ib_mr; + + umem = ib_umem_get(&rdev->ibdev, start, length, mr_access_flags); + if (IS_ERR(umem)) + return ERR_CAST(umem); + + ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem); + if (IS_ERR(ib_mr)) + ib_umem_release(umem); + return ib_mr; +} + +struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start, + u64 length, u64 virt_addr, int fd, + int mr_access_flags, struct ib_udata *udata) +{ + struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd); + struct bnxt_re_dev *rdev = pd->rdev; + struct ib_umem_dmabuf *umem_dmabuf; + struct ib_umem *umem; + struct ib_mr *ib_mr; + + umem_dmabuf = ib_umem_dmabuf_get_pinned(&rdev->ibdev, start, length, + fd, mr_access_flags); + if (IS_ERR(umem_dmabuf)) + return ERR_CAST(umem_dmabuf); + + umem = &umem_dmabuf->umem; + + ib_mr = __bnxt_re_user_reg_mr(ib_pd, length, virt_addr, mr_access_flags, umem); + if (IS_ERR(ib_mr)) + ib_umem_release(umem); + return ib_mr; +} + int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) { struct ib_device *ibdev = ctx->device; @@ -4087,6 +4150,8 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) goto cfail; } uctx->shpage_mmap = &entry->rdma_entry; + if (rdev->pacing.dbr_pacing) + resp.comp_mask |= BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED; rc = ib_copy_to_udata(udata, &resp, min(udata->outlen, sizeof(resp))); if (rc) { @@ -4159,6 +4224,19 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) case BNXT_RE_MMAP_SH_PAGE: ret = vm_insert_page(vma, vma->vm_start, virt_to_page(uctx->shpg)); break; + case BNXT_RE_MMAP_DBR_BAR: + pfn = bnxt_entry->mem_offset >> PAGE_SHIFT; + ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE, + pgprot_noncached(vma->vm_page_prot), + rdma_entry); + break; + case BNXT_RE_MMAP_DBR_PAGE: + /* Driver doesn't expect write access for user space */ + if (vma->vm_flags & VM_WRITE) + return -EFAULT; + ret = vm_insert_page(vma, vma->vm_start, + virt_to_page((void *)bnxt_entry->mem_offset)); + break; default: ret = -EINVAL; break; @@ -4178,6 +4256,15 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry) kfree(bnxt_entry); } +static int UVERBS_HANDLER(BNXT_RE_METHOD_NOTIFY_DRV)(struct uverbs_attr_bundle *attrs) +{ + struct bnxt_re_ucontext *uctx; + + uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx); + bnxt_re_pacing_alert(uctx->rdev); + return 0; +} + static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *attrs) { struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_ALLOC_PAGE_HANDLE); @@ -4190,7 +4277,7 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle * u64 mmap_offset; u32 length; u32 dpi; - u64 dbr; + u64 addr; int err; uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx); @@ -4212,19 +4299,30 @@ static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle * return -ENOMEM; length = PAGE_SIZE; dpi = uctx->wcdpi.dpi; - dbr = (u64)uctx->wcdpi.umdbr; + addr = (u64)uctx->wcdpi.umdbr; mmap_flag = BNXT_RE_MMAP_WC_DB; } else { return -EINVAL; } break; + case BNXT_RE_ALLOC_DBR_BAR_PAGE: + length = PAGE_SIZE; + addr = (u64)rdev->pacing.dbr_bar_addr; + mmap_flag = BNXT_RE_MMAP_DBR_BAR; + break; + + case BNXT_RE_ALLOC_DBR_PAGE: + length = PAGE_SIZE; + addr = (u64)rdev->pacing.dbr_page; + mmap_flag = BNXT_RE_MMAP_DBR_PAGE; + break; default: return -EOPNOTSUPP; } - entry = bnxt_re_mmap_entry_insert(uctx, dbr, mmap_flag, &mmap_offset); + entry = bnxt_re_mmap_entry_insert(uctx, addr, mmap_flag, &mmap_offset); if (!entry) return -ENOMEM; @@ -4264,6 +4362,9 @@ static int alloc_page_obj_cleanup(struct ib_uobject *uobject, uctx->wcdpi.dbr = NULL; } break; + case BNXT_RE_MMAP_DBR_BAR: + case BNXT_RE_MMAP_DBR_PAGE: + break; default: goto exit; } @@ -4301,7 +4402,13 @@ DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_ALLOC_PAGE, &UVERBS_METHOD(BNXT_RE_METHOD_ALLOC_PAGE), &UVERBS_METHOD(BNXT_RE_METHOD_DESTROY_PAGE)); +DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_NOTIFY_DRV); + +DECLARE_UVERBS_GLOBAL_METHODS(BNXT_RE_OBJECT_NOTIFY_DRV, + &UVERBS_METHOD(BNXT_RE_METHOD_NOTIFY_DRV)); + const struct uapi_definition bnxt_re_uapi_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE), + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_NOTIFY_DRV), {} }; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index 32d9e9d09791..84715b7e7a4e 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -146,6 +146,8 @@ enum bnxt_re_mmap_flag { BNXT_RE_MMAP_SH_PAGE, BNXT_RE_MMAP_UC_DB, BNXT_RE_MMAP_WC_DB, + BNXT_RE_MMAP_DBR_PAGE, + BNXT_RE_MMAP_DBR_BAR, }; struct bnxt_re_user_mmap_entry { @@ -227,6 +229,10 @@ int bnxt_re_dealloc_mw(struct ib_mw *mw); struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, u64 virt_addr, int mr_access_flags, struct ib_udata *udata); +struct ib_mr *bnxt_re_reg_user_mr_dmabuf(struct ib_pd *ib_pd, u64 start, + u64 length, u64 virt_addr, + int fd, int mr_access_flags, + struct ib_udata *udata); int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata); void bnxt_re_dealloc_ucontext(struct ib_ucontext *context); int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 63e98e2d3596..c9066aade412 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -360,7 +360,7 @@ static struct bnxt_ulp_ops bnxt_re_ulp_ops = { static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev) { struct bnxt_en_dev *en_dev; - int rc = 0; + int rc; en_dev = rdev->en_dev; @@ -395,10 +395,9 @@ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, struct bnxt_en_dev *en_dev = rdev->en_dev; struct hwrm_func_qcfg_output resp = {0}; struct hwrm_func_qcfg_input req = {0}; - struct bnxt_fw_msg fw_msg; + struct bnxt_fw_msg fw_msg = {}; int rc; - memset(&fw_msg, 0, sizeof(fw_msg)); bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG); req.fid = cpu_to_le16(0xffff); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, @@ -432,9 +431,219 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) return rc; cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE; + cctx->modes.dbr_pacing = + le32_to_cpu(resp.flags_ext2) & + FUNC_QCAPS_RESP_FLAGS_EXT2_DBR_PACING_EXT_SUPPORTED; + return 0; +} + +static int bnxt_re_hwrm_dbr_pacing_qcfg(struct bnxt_re_dev *rdev) +{ + struct hwrm_func_dbr_pacing_qcfg_output resp = {}; + struct hwrm_func_dbr_pacing_qcfg_input req = {}; + struct bnxt_en_dev *en_dev = rdev->en_dev; + struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_fw_msg fw_msg = {}; + int rc; + + cctx = rdev->chip_ctx; + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_DBR_PACING_QCFG); + bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, + sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); + rc = bnxt_send_msg(en_dev, &fw_msg); + if (rc) + return rc; + + if ((le32_to_cpu(resp.dbr_stat_db_fifo_reg) & + FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK) == + FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC) + cctx->dbr_stat_db_fifo = + le32_to_cpu(resp.dbr_stat_db_fifo_reg) & + ~FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK; + return 0; +} + +/* Update the pacing tunable parameters to the default values */ +static void bnxt_re_set_default_pacing_data(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_db_pacing_data *pacing_data = rdev->qplib_res.pacing_data; + + pacing_data->do_pacing = rdev->pacing.dbr_def_do_pacing; + pacing_data->pacing_th = rdev->pacing.pacing_algo_th; + pacing_data->alarm_th = + pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; +} + +static void __wait_for_fifo_occupancy_below_th(struct bnxt_re_dev *rdev) +{ + u32 read_val, fifo_occup; + + /* loop shouldn't run infintely as the occupancy usually goes + * below pacing algo threshold as soon as pacing kicks in. + */ + while (1) { + read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); + fifo_occup = BNXT_RE_MAX_FIFO_DEPTH - + ((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >> + BNXT_RE_DB_FIFO_ROOM_SHIFT); + /* Fifo occupancy cannot be greater the MAX FIFO depth */ + if (fifo_occup > BNXT_RE_MAX_FIFO_DEPTH) + break; + + if (fifo_occup < rdev->qplib_res.pacing_data->pacing_th) + break; + } +} + +static void bnxt_re_db_fifo_check(struct work_struct *work) +{ + struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, + dbq_fifo_check_work); + struct bnxt_qplib_db_pacing_data *pacing_data; + u32 pacing_save; + + if (!mutex_trylock(&rdev->pacing.dbq_lock)) + return; + pacing_data = rdev->qplib_res.pacing_data; + pacing_save = rdev->pacing.do_pacing_save; + __wait_for_fifo_occupancy_below_th(rdev); + cancel_delayed_work_sync(&rdev->dbq_pacing_work); + if (pacing_save > rdev->pacing.dbr_def_do_pacing) { + /* Double the do_pacing value during the congestion */ + pacing_save = pacing_save << 1; + } else { + /* + * when a new congestion is detected increase the do_pacing + * by 8 times. And also increase the pacing_th by 4 times. The + * reason to increase pacing_th is to give more space for the + * queue to oscillate down without getting empty, but also more + * room for the queue to increase without causing another alarm. + */ + pacing_save = pacing_save << 3; + pacing_data->pacing_th = rdev->pacing.pacing_algo_th * 4; + } + + if (pacing_save > BNXT_RE_MAX_DBR_DO_PACING) + pacing_save = BNXT_RE_MAX_DBR_DO_PACING; + + pacing_data->do_pacing = pacing_save; + rdev->pacing.do_pacing_save = pacing_data->do_pacing; + pacing_data->alarm_th = + pacing_data->pacing_th * BNXT_RE_PACING_ALARM_TH_MULTIPLE; + schedule_delayed_work(&rdev->dbq_pacing_work, + msecs_to_jiffies(rdev->pacing.dbq_pacing_time)); + rdev->stats.pacing.alerts++; + mutex_unlock(&rdev->pacing.dbq_lock); +} + +static void bnxt_re_pacing_timer_exp(struct work_struct *work) +{ + struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev, + dbq_pacing_work.work); + struct bnxt_qplib_db_pacing_data *pacing_data; + u32 read_val, fifo_occup; + + if (!mutex_trylock(&rdev->pacing.dbq_lock)) + return; + + pacing_data = rdev->qplib_res.pacing_data; + read_val = readl(rdev->en_dev->bar0 + rdev->pacing.dbr_db_fifo_reg_off); + fifo_occup = BNXT_RE_MAX_FIFO_DEPTH - + ((read_val & BNXT_RE_DB_FIFO_ROOM_MASK) >> + BNXT_RE_DB_FIFO_ROOM_SHIFT); + + if (fifo_occup > pacing_data->pacing_th) + goto restart_timer; + + /* + * Instead of immediately going back to the default do_pacing + * reduce it by 1/8 times and restart the timer. + */ + pacing_data->do_pacing = pacing_data->do_pacing - (pacing_data->do_pacing >> 3); + pacing_data->do_pacing = max_t(u32, rdev->pacing.dbr_def_do_pacing, pacing_data->do_pacing); + if (pacing_data->do_pacing <= rdev->pacing.dbr_def_do_pacing) { + bnxt_re_set_default_pacing_data(rdev); + rdev->stats.pacing.complete++; + goto dbq_unlock; + } + +restart_timer: + schedule_delayed_work(&rdev->dbq_pacing_work, + msecs_to_jiffies(rdev->pacing.dbq_pacing_time)); + rdev->stats.pacing.resched++; +dbq_unlock: + rdev->pacing.do_pacing_save = pacing_data->do_pacing; + mutex_unlock(&rdev->pacing.dbq_lock); +} + +void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev) +{ + struct bnxt_qplib_db_pacing_data *pacing_data; + + if (!rdev->pacing.dbr_pacing) + return; + mutex_lock(&rdev->pacing.dbq_lock); + pacing_data = rdev->qplib_res.pacing_data; + + /* + * Increase the alarm_th to max so that other user lib instances do not + * keep alerting the driver. + */ + pacing_data->alarm_th = BNXT_RE_MAX_FIFO_DEPTH; + pacing_data->do_pacing = BNXT_RE_MAX_DBR_DO_PACING; + cancel_work_sync(&rdev->dbq_fifo_check_work); + schedule_work(&rdev->dbq_fifo_check_work); + mutex_unlock(&rdev->pacing.dbq_lock); +} + +static int bnxt_re_initialize_dbr_pacing(struct bnxt_re_dev *rdev) +{ + if (bnxt_re_hwrm_dbr_pacing_qcfg(rdev)) + return -EIO; + + /* Allocate a page for app use */ + rdev->pacing.dbr_page = (void *)__get_free_page(GFP_KERNEL); + if (!rdev->pacing.dbr_page) + return -ENOMEM; + + memset((u8 *)rdev->pacing.dbr_page, 0, PAGE_SIZE); + rdev->qplib_res.pacing_data = (struct bnxt_qplib_db_pacing_data *)rdev->pacing.dbr_page; + + /* MAP HW window 2 for reading db fifo depth */ + writel(rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_BASE_MASK, + rdev->en_dev->bar0 + BNXT_GRCPF_REG_WINDOW_BASE_OUT + 4); + rdev->pacing.dbr_db_fifo_reg_off = + (rdev->chip_ctx->dbr_stat_db_fifo & BNXT_GRC_OFFSET_MASK) + + BNXT_RE_GRC_FIFO_REG_BASE; + rdev->pacing.dbr_bar_addr = + pci_resource_start(rdev->qplib_res.pdev, 0) + rdev->pacing.dbr_db_fifo_reg_off; + + rdev->pacing.pacing_algo_th = BNXT_RE_PACING_ALGO_THRESHOLD; + rdev->pacing.dbq_pacing_time = BNXT_RE_DBR_PACING_TIME; + rdev->pacing.dbr_def_do_pacing = BNXT_RE_DBR_DO_PACING_NO_CONGESTION; + rdev->pacing.do_pacing_save = rdev->pacing.dbr_def_do_pacing; + rdev->qplib_res.pacing_data->fifo_max_depth = BNXT_RE_MAX_FIFO_DEPTH; + rdev->qplib_res.pacing_data->fifo_room_mask = BNXT_RE_DB_FIFO_ROOM_MASK; + rdev->qplib_res.pacing_data->fifo_room_shift = BNXT_RE_DB_FIFO_ROOM_SHIFT; + rdev->qplib_res.pacing_data->grc_reg_offset = rdev->pacing.dbr_db_fifo_reg_off; + bnxt_re_set_default_pacing_data(rdev); + /* Initialize worker for DBR Pacing */ + INIT_WORK(&rdev->dbq_fifo_check_work, bnxt_re_db_fifo_check); + INIT_DELAYED_WORK(&rdev->dbq_pacing_work, bnxt_re_pacing_timer_exp); return 0; } +static void bnxt_re_deinitialize_dbr_pacing(struct bnxt_re_dev *rdev) +{ + cancel_work_sync(&rdev->dbq_fifo_check_work); + cancel_delayed_work_sync(&rdev->dbq_pacing_work); + if (rdev->pacing.dbr_page) + free_page((u64)rdev->pacing.dbr_page); + + rdev->pacing.dbr_page = NULL; + rdev->pacing.dbr_pacing = false; +} + static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id, int type) { @@ -652,6 +861,7 @@ static const struct ib_device_ops bnxt_re_dev_ops = { .query_qp = bnxt_re_query_qp, .query_srq = bnxt_re_query_srq, .reg_user_mr = bnxt_re_reg_user_mr, + .reg_user_mr_dmabuf = bnxt_re_reg_user_mr_dmabuf, .req_notify_cq = bnxt_re_req_notify_cq, .resize_cq = bnxt_re_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, bnxt_re_ah, ib_ah), @@ -711,13 +921,14 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct bnxt_aux_priv *aux_priv, rdev->id = rdev->en_dev->pdev->devfn; INIT_LIST_HEAD(&rdev->qp_list); mutex_init(&rdev->qp_lock); - atomic_set(&rdev->qp_count, 0); - atomic_set(&rdev->cq_count, 0); - atomic_set(&rdev->srq_count, 0); - atomic_set(&rdev->mr_count, 0); - atomic_set(&rdev->mw_count, 0); - atomic_set(&rdev->ah_count, 0); - atomic_set(&rdev->pd_count, 0); + mutex_init(&rdev->pacing.dbq_lock); + atomic_set(&rdev->stats.res.qp_count, 0); + atomic_set(&rdev->stats.res.cq_count, 0); + atomic_set(&rdev->stats.res.srq_count, 0); + atomic_set(&rdev->stats.res.mr_count, 0); + atomic_set(&rdev->stats.res.mw_count, 0); + atomic_set(&rdev->stats.res.ah_count, 0); + atomic_set(&rdev->stats.res.pd_count, 0); rdev->cosq[0] = 0xFFFF; rdev->cosq[1] = 0xFFFF; @@ -759,7 +970,7 @@ static int bnxt_re_handle_unaffi_async_event(struct creq_func_event static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, struct bnxt_re_qp *qp) { - struct ib_event event; + struct ib_event event = {}; unsigned int flags; if (qp->qplib_qp.state == CMDQ_MODIFY_QP_NEW_STATE_ERR && @@ -769,7 +980,6 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event, bnxt_re_unlock_cqs(qp, flags); } - memset(&event, 0, sizeof(event)); if (qp->qplib_qp.srq) { event.device = &qp->rdev->ibdev; event.element.qp = &qp->ib_qp; @@ -937,13 +1147,12 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev) { struct bnxt_re_ring_attr rattr = {}; int num_vec_created = 0; - int rc = 0, i; + int rc, i; u8 type; /* Configure and allocate resources for qplib */ rdev->qplib_res.rcfw = &rdev->rcfw; - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr, - rdev->is_virtfn); + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); if (rc) goto fail; @@ -1090,11 +1299,10 @@ static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev) { u32 prio_map = 0, tmp_map = 0; struct net_device *netdev; - struct dcb_app app; + struct dcb_app app = {}; netdev = rdev->netdev; - memset(&app, 0, sizeof(app)); app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE; app.protocol = ETH_P_IBOE; tmp_map = dcb_ieee_getapp_mask(netdev, &app); @@ -1123,8 +1331,7 @@ static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev) */ if ((prio_map == 0 && rdev->qplib_res.prio) || (prio_map != 0 && !rdev->qplib_res.prio)) { - rdev->qplib_res.prio = prio_map ? true : false; - + rdev->qplib_res.prio = prio_map; bnxt_re_update_gid(rdev); } @@ -1138,7 +1345,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) struct hwrm_ver_get_input req = {}; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_fw_msg fw_msg = {}; - int rc = 0; + int rc; bnxt_re_init_hwrm_hdr((void *)&req, HWRM_VER_GET); req.hwrm_intf_maj = HWRM_VERSION_MAJOR; @@ -1168,7 +1375,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev) static int bnxt_re_ib_init(struct bnxt_re_dev *rdev) { - int rc = 0; + int rc; u32 event; /* Register ib dev */ @@ -1214,8 +1421,11 @@ static void bnxt_re_dev_uninit(struct bnxt_re_dev *rdev) bnxt_re_net_ring_free(rdev, rdev->rcfw.creq.ring_id, type); bnxt_qplib_free_rcfw_channel(&rdev->rcfw); } - if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) - rdev->num_msix = 0; + + rdev->num_msix = 0; + + if (rdev->pacing.dbr_pacing) + bnxt_re_deinitialize_dbr_pacing(rdev); bnxt_re_destroy_chip_ctx(rdev); if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) @@ -1234,15 +1444,14 @@ static void bnxt_re_worker(struct work_struct *work) static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) { + struct bnxt_re_ring_attr rattr = {}; struct bnxt_qplib_creq_ctx *creq; - struct bnxt_re_ring_attr rattr; u32 db_offt; int vid; u8 type; int rc; /* Registered a new RoCE device instance to netdev */ - memset(&rattr, 0, sizeof(rattr)); rc = bnxt_re_register_netdev(rdev); if (rc) { ibdev_err(&rdev->ibdev, @@ -1271,7 +1480,6 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) ibdev_dbg(&rdev->ibdev, "Got %d MSI-X vectors\n", rdev->en_dev->ulp_tbl->msix_requested); rdev->num_msix = rdev->en_dev->ulp_tbl->msix_requested; - set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags); bnxt_re_query_hwrm_intf_version(rdev); @@ -1311,8 +1519,17 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode) goto free_ring; } - rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr, - rdev->is_virtfn); + if (bnxt_qplib_dbr_pacing_en(rdev->chip_ctx)) { + rc = bnxt_re_initialize_dbr_pacing(rdev); + if (!rc) { + rdev->pacing.dbr_pacing = true; + } else { + ibdev_err(&rdev->ibdev, + "DBR pacing disabled with error : %d\n", rc); + rdev->pacing.dbr_pacing = false; + } + } + rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr); if (rc) goto disable_rcfw; @@ -1400,7 +1617,7 @@ static int bnxt_re_add_device(struct auxiliary_device *adev, u8 wqe_mode) container_of(adev, struct bnxt_aux_priv, aux_dev); struct bnxt_en_dev *en_dev; struct bnxt_re_dev *rdev; - int rc = 0; + int rc; /* en_dev should never be NULL as long as adev and aux_dev are valid. */ en_dev = aux_priv->edev; @@ -1646,7 +1863,7 @@ static struct auxiliary_driver bnxt_re_driver = { static int __init bnxt_re_mod_init(void) { - int rc = 0; + int rc; pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version); rc = auxiliary_driver_register(&bnxt_re_driver); diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c index a42555623aed..abbabea7f5fa 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c @@ -535,7 +535,7 @@ int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq, cqn_handler_t cqn_handler, srqn_handler_t srqn_handler) { - int rc = -1; + int rc; nq->pdev = pdev; nq->cqn_handler = cqn_handler; @@ -727,27 +727,30 @@ int bnxt_qplib_query_srq(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct creq_query_srq_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; struct creq_query_srq_resp_sb *sb; struct cmdq_query_srq req = {}; - int rc = 0; + int rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_SRQ, sizeof(req)); /* Configure the request */ - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; req.srq_cid = cpu_to_le32(srq->id); - sb = sbuf->sb; - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + sb = sbuf.sb; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); srq->threshold = le16_to_cpu(sb->srq_limit); - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } @@ -1365,24 +1368,26 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) struct bnxt_qplib_rcfw *rcfw = res->rcfw; struct creq_query_qp_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; struct creq_query_qp_resp_sb *sb; struct cmdq_query_qp req = {}; u32 temp32[4]; - int i, rc = 0; + int i, rc; + + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) + return -ENOMEM; + sb = sbuf.sb; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_QP, sizeof(req)); - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) - return -ENOMEM; - sb = sbuf->sb; - req.qp_cid = cpu_to_le32(qp->id); - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) @@ -1391,8 +1396,7 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) qp->state = sb->en_sqd_async_notify_state & CREQ_QUERY_QP_RESP_SB_STATE_MASK; qp->en_sqd_async_notify = sb->en_sqd_async_notify_state & - CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY ? - true : false; + CREQ_QUERY_QP_RESP_SB_EN_SQD_ASYNC_NOTIFY; qp->access = sb->access; qp->pkey_index = le16_to_cpu(sb->pkey); qp->qkey = le32_to_cpu(sb->qkey); @@ -1442,7 +1446,8 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp) memcpy(qp->smac, sb->src_mac, 6); qp->vlan_id = le16_to_cpu(sb->vlan_pcp_vlan_dei_vlan_id); bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c index bc3aea4592b9..c8c4017fe405 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c @@ -55,7 +55,7 @@ static void bnxt_qplib_service_creq(struct tasklet_struct *t); /** * bnxt_qplib_map_rc - map return type based on opcode - * @opcode - roce slow path opcode + * @opcode: roce slow path opcode * * case #1 * Firmware initiated error recovery is a safe state machine and @@ -98,8 +98,8 @@ static int bnxt_qplib_map_rc(u8 opcode) /** * bnxt_re_is_fw_stalled - Check firmware health - * @rcfw - rcfw channel instance of rdev - * @cookie - cookie to track the command + * @rcfw: rcfw channel instance of rdev + * @cookie: cookie to track the command * * If firmware has not responded any rcfw command within * rcfw->max_timeout, consider firmware as stalled. @@ -133,8 +133,8 @@ static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw, /** * __wait_for_resp - Don't hold the cpu context and wait for response - * @rcfw - rcfw channel instance of rdev - * @cookie - cookie to track the command + * @rcfw: rcfw channel instance of rdev + * @cookie: cookie to track the command * * Wait for command completion in sleepable context. * @@ -179,8 +179,8 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) /** * __block_for_resp - hold the cpu context and wait for response - * @rcfw - rcfw channel instance of rdev - * @cookie - cookie to track the command + * @rcfw: rcfw channel instance of rdev + * @cookie: cookie to track the command * * This function will hold the cpu (non-sleepable context) and * wait for command completion. Maximum holding interval is 8 second. @@ -216,8 +216,8 @@ static int __block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie) }; /* __send_message_no_waiter - get cookie and post the message. - * @rcfw - rcfw channel instance of rdev - * @msg - qplib message internal + * @rcfw: rcfw channel instance of rdev + * @msg: qplib message internal * * This function will just post and don't bother about completion. * Current design of this function is - @@ -335,7 +335,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, cpu_to_le64(sbuf->dma_addr)); __set_cmdq_base_resp_size(msg->req, msg->req_sz, ALIGN(sbuf->size, - BNXT_QPLIB_CMDQE_UNITS)); + BNXT_QPLIB_CMDQE_UNITS) / + BNXT_QPLIB_CMDQE_UNITS); } preq = (u8 *)msg->req; @@ -373,8 +374,8 @@ static int __send_message(struct bnxt_qplib_rcfw *rcfw, /** * __poll_for_resp - self poll completion for rcfw command - * @rcfw - rcfw channel instance of rdev - * @cookie - cookie to track the command + * @rcfw: rcfw channel instance of rdev + * @cookie: cookie to track the command * * It works same as __wait_for_resp except this function will * do self polling in sort interval since interrupt is disabled. @@ -470,8 +471,8 @@ static void __destroy_timedout_ah(struct bnxt_qplib_rcfw *rcfw, /** * __bnxt_qplib_rcfw_send_message - qplib interface to send * and complete rcfw command. - * @rcfw - rcfw channel instance of rdev - * @msg - qplib message internal + * @rcfw: rcfw channel instance of rdev + * @msg: qplib message internal * * This function does not account shadow queue depth. It will send * all the command unconditionally as long as send queue is not full. @@ -487,7 +488,7 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_crsqe *crsqe; unsigned long flags; u16 cookie; - int rc = 0; + int rc; u8 opcode; opcode = __get_cmdq_base_opcode(msg->req, msg->req_sz); @@ -533,8 +534,8 @@ static int __bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw, /** * bnxt_qplib_rcfw_send_message - qplib interface to send * and complete rcfw command. - * @rcfw - rcfw channel instance of rdev - * @msg - qplib message internal + * @rcfw: rcfw channel instance of rdev + * @msg: qplib message internal * * Driver interact with Firmware through rcfw channel/slow path in two ways. * a. Blocking rcfw command send. In this path, driver cannot hold @@ -1195,34 +1196,3 @@ int bnxt_qplib_enable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw, return 0; } - -struct bnxt_qplib_rcfw_sbuf *bnxt_qplib_rcfw_alloc_sbuf( - struct bnxt_qplib_rcfw *rcfw, - u32 size) -{ - struct bnxt_qplib_rcfw_sbuf *sbuf; - - sbuf = kzalloc(sizeof(*sbuf), GFP_KERNEL); - if (!sbuf) - return NULL; - - sbuf->size = size; - sbuf->sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf->size, - &sbuf->dma_addr, GFP_KERNEL); - if (!sbuf->sb) - goto bail; - - return sbuf; -bail: - kfree(sbuf); - return NULL; -} - -void bnxt_qplib_rcfw_free_sbuf(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_rcfw_sbuf *sbuf) -{ - if (sbuf->sb) - dma_free_coherent(&rcfw->pdev->dev, sbuf->size, - sbuf->sb, sbuf->dma_addr); - kfree(sbuf); -} diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index 739d942761d1..157db6b7e119 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -118,11 +118,11 @@ static int __alloc_pbl(struct bnxt_qplib_res *res, else pages = sginfo->npages; /* page ptr arrays */ - pbl->pg_arr = vmalloc(pages * sizeof(void *)); + pbl->pg_arr = vmalloc_array(pages, sizeof(void *)); if (!pbl->pg_arr) return -ENOMEM; - pbl->pg_map_arr = vmalloc(pages * sizeof(dma_addr_t)); + pbl->pg_map_arr = vmalloc_array(pages, sizeof(dma_addr_t)); if (!pbl->pg_map_arr) { vfree(pbl->pg_arr); pbl->pg_arr = NULL; @@ -385,7 +385,7 @@ static int bnxt_qplib_alloc_tqm_rings(struct bnxt_qplib_res *res, struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; struct bnxt_qplib_tqm_ctx *tqmctx; - int rc = 0; + int rc; int i; tqmctx = &ctx->tqm_ctx; @@ -463,7 +463,7 @@ static void bnxt_qplib_map_tqm_pgtbl(struct bnxt_qplib_tqm_ctx *ctx) static int bnxt_qplib_setup_tqm_rings(struct bnxt_qplib_res *res, struct bnxt_qplib_ctx *ctx) { - int rc = 0; + int rc; rc = bnxt_qplib_alloc_tqm_rings(res, ctx); if (rc) @@ -501,7 +501,7 @@ int bnxt_qplib_alloc_ctx(struct bnxt_qplib_res *res, { struct bnxt_qplib_hwq_attr hwq_attr = {}; struct bnxt_qplib_sg_info sginfo = {}; - int rc = 0; + int rc; if (virt_fn || is_p5) goto stats_alloc; @@ -642,31 +642,44 @@ static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl, } /* PDs */ -int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd) +int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd *pd) { + struct bnxt_qplib_pd_tbl *pdt = &res->pd_tbl; u32 bit_num; + int rc = 0; + mutex_lock(&res->pd_tbl_lock); bit_num = find_first_bit(pdt->tbl, pdt->max); - if (bit_num == pdt->max) - return -ENOMEM; + if (bit_num == pdt->max) { + rc = -ENOMEM; + goto exit; + } /* Found unused PD */ clear_bit(bit_num, pdt->tbl); pd->id = bit_num; - return 0; +exit: + mutex_unlock(&res->pd_tbl_lock); + return rc; } int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd) { + int rc = 0; + + mutex_lock(&res->pd_tbl_lock); if (test_and_set_bit(pd->id, pdt->tbl)) { dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d\n", pd->id); - return -EINVAL; + rc = -EINVAL; + goto exit; } pd->id = 0; - return 0; +exit: + mutex_unlock(&res->pd_tbl_lock); + return rc; } static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt) @@ -691,6 +704,7 @@ static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res, pdt->max = max; memset((u8 *)pdt->tbl, 0xFF, bytes); + mutex_init(&res->pd_tbl_lock); return 0; } @@ -877,7 +891,7 @@ int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev, struct net_device *netdev, struct bnxt_qplib_dev_attr *dev_attr) { - int rc = 0; + int rc; res->pdev = pdev; res->netdev = netdev; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index d850a553821e..5949f004f785 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -48,6 +48,7 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; struct bnxt_qplib_drv_modes { u8 wqe_mode; bool db_push; + bool dbr_pacing; }; struct bnxt_qplib_chip_ctx { @@ -58,6 +59,17 @@ struct bnxt_qplib_chip_ctx { u16 hwrm_cmd_max_timeout; struct bnxt_qplib_drv_modes modes; u64 hwrm_intf_ver; + u32 dbr_stat_db_fifo; +}; + +struct bnxt_qplib_db_pacing_data { + u32 do_pacing; + u32 pacing_th; + u32 alarm_th; + u32 fifo_max_depth; + u32 fifo_room_mask; + u32 fifo_room_shift; + u32 grc_reg_offset; }; #define BNXT_QPLIB_DBR_PF_DB_OFFSET 0x10000 @@ -265,12 +277,15 @@ struct bnxt_qplib_res { struct net_device *netdev; struct bnxt_qplib_rcfw *rcfw; struct bnxt_qplib_pd_tbl pd_tbl; + /* To protect the pd table bit map */ + struct mutex pd_tbl_lock; struct bnxt_qplib_sgid_tbl sgid_tbl; struct bnxt_qplib_dpi_tbl dpi_tbl; /* To protect the dpi table bit map */ struct mutex dpi_tbl_lock; bool prio; bool is_vf; + struct bnxt_qplib_db_pacing_data *pacing_data; }; static inline bool bnxt_qplib_is_chip_gen_p5(struct bnxt_qplib_chip_ctx *cctx) @@ -355,7 +370,7 @@ void bnxt_qplib_free_hwq(struct bnxt_qplib_res *res, struct bnxt_qplib_hwq *hwq); int bnxt_qplib_alloc_init_hwq(struct bnxt_qplib_hwq *hwq, struct bnxt_qplib_hwq_attr *hwq_attr); -int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl, +int bnxt_qplib_alloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd *pd); int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res, struct bnxt_qplib_pd_tbl *pd_tbl, @@ -467,4 +482,10 @@ static inline bool _is_ext_stats_supported(u16 dev_cap_flags) return dev_cap_flags & CREQ_QUERY_FUNC_RESP_SB_EXT_STATS; } + +static inline u8 bnxt_qplib_dbr_pacing_en(struct bnxt_qplib_chip_ctx *cctx) +{ + return cctx->modes.dbr_pacing; +} + #endif /* __BNXT_QPLIB_RES_H__ */ diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.c b/drivers/infiniband/hw/bnxt_re/qplib_sp.c index ab45f9d4bb02..a27b68515164 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.c @@ -72,7 +72,7 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, struct creq_query_version_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct cmdq_query_version req = {}; - int rc = 0; + int rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_VERSION, @@ -89,31 +89,29 @@ static void bnxt_qplib_query_version(struct bnxt_qplib_rcfw *rcfw, } int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_dev_attr *attr, bool vf) + struct bnxt_qplib_dev_attr *attr) { struct creq_query_func_resp resp = {}; struct bnxt_qplib_cmdqmsg msg = {}; struct creq_query_func_resp_sb *sb; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; struct cmdq_query_func req = {}; u8 *tqm_alloc; - int i, rc = 0; + int i, rc; u32 temp; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_FUNC, sizeof(req)); - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) { - dev_err(&rcfw->pdev->dev, - "SP: QUERY_FUNC alloc side buffer failed\n"); + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - } - - sb = sbuf->sb; - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + sb = sbuf.sb; + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) @@ -121,9 +119,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, /* Extract the context from the side buffer */ attr->max_qp = le32_to_cpu(sb->max_qp); - /* max_qp value reported by FW for PF doesn't include the QP1 for PF */ - if (!vf) - attr->max_qp += 1; + /* max_qp value reported by FW doesn't include the QP1 */ + attr->max_qp += 1; attr->max_qp_rd_atom = sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ? BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom; @@ -175,7 +172,8 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, attr->is_atomic = bnxt_qplib_is_atomic_cap(rcfw); bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } @@ -186,7 +184,7 @@ int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct creq_set_func_resources_resp resp = {}; struct cmdq_set_func_resources req = {}; struct bnxt_qplib_cmdqmsg msg = {}; - int rc = 0; + int rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_SET_FUNC_RESOURCES, @@ -718,23 +716,22 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, struct creq_query_roce_stats_resp_sb *sb; struct cmdq_query_roce_stats req = {}; struct bnxt_qplib_cmdqmsg msg = {}; - struct bnxt_qplib_rcfw_sbuf *sbuf; - int rc = 0; + struct bnxt_qplib_rcfw_sbuf sbuf; + int rc; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_BASE_OPCODE_QUERY_ROCE_STATS, sizeof(req)); - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) { - dev_err(&rcfw->pdev->dev, - "SP: QUERY_ROCE_STATS alloc side buffer failed\n"); + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - } + sb = sbuf.sb; - sb = sbuf->sb; - req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS; - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) @@ -790,7 +787,8 @@ int bnxt_qplib_get_roce_stats(struct bnxt_qplib_rcfw *rcfw, } bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } @@ -801,49 +799,56 @@ int bnxt_qplib_qext_stat(struct bnxt_qplib_rcfw *rcfw, u32 fid, struct creq_query_roce_stats_ext_resp_sb *sb; struct cmdq_query_roce_stats_ext req = {}; struct bnxt_qplib_cmdqmsg msg = {}; - struct bnxt_qplib_rcfw_sbuf *sbuf; + struct bnxt_qplib_rcfw_sbuf sbuf; int rc; - sbuf = bnxt_qplib_rcfw_alloc_sbuf(rcfw, sizeof(*sb)); - if (!sbuf) { - dev_err(&rcfw->pdev->dev, - "SP: QUERY_ROCE_STATS_EXT alloc sb failed"); + sbuf.size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); + sbuf.sb = dma_alloc_coherent(&rcfw->pdev->dev, sbuf.size, + &sbuf.dma_addr, GFP_KERNEL); + if (!sbuf.sb) return -ENOMEM; - } + sb = sbuf.sb; bnxt_qplib_rcfw_cmd_prep((struct cmdq_base *)&req, CMDQ_QUERY_ROCE_STATS_EXT_OPCODE_QUERY_ROCE_STATS, sizeof(req)); - req.resp_size = ALIGN(sizeof(*sb), BNXT_QPLIB_CMDQE_UNITS); - req.resp_addr = cpu_to_le64(sbuf->dma_addr); + req.resp_size = sbuf.size / BNXT_QPLIB_CMDQE_UNITS; + req.resp_addr = cpu_to_le64(sbuf.dma_addr); req.function_id = cpu_to_le32(fid); req.flags = cpu_to_le16(CMDQ_QUERY_ROCE_STATS_EXT_FLAGS_FUNCTION_ID); - bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, sbuf, sizeof(req), + bnxt_qplib_fill_cmdqmsg(&msg, &req, &resp, &sbuf, sizeof(req), sizeof(resp), 0); rc = bnxt_qplib_rcfw_send_message(rcfw, &msg); if (rc) goto bail; - sb = sbuf->sb; estat->tx_atomic_req = le64_to_cpu(sb->tx_atomic_req_pkts); estat->tx_read_req = le64_to_cpu(sb->tx_read_req_pkts); estat->tx_read_res = le64_to_cpu(sb->tx_read_res_pkts); estat->tx_write_req = le64_to_cpu(sb->tx_write_req_pkts); estat->tx_send_req = le64_to_cpu(sb->tx_send_req_pkts); + estat->tx_roce_pkts = le64_to_cpu(sb->tx_roce_pkts); + estat->tx_roce_bytes = le64_to_cpu(sb->tx_roce_bytes); estat->rx_atomic_req = le64_to_cpu(sb->rx_atomic_req_pkts); estat->rx_read_req = le64_to_cpu(sb->rx_read_req_pkts); estat->rx_read_res = le64_to_cpu(sb->rx_read_res_pkts); estat->rx_write_req = le64_to_cpu(sb->rx_write_req_pkts); estat->rx_send_req = le64_to_cpu(sb->rx_send_req_pkts); + estat->rx_roce_pkts = le64_to_cpu(sb->rx_roce_pkts); + estat->rx_roce_bytes = le64_to_cpu(sb->rx_roce_bytes); estat->rx_roce_good_pkts = le64_to_cpu(sb->rx_roce_good_pkts); estat->rx_roce_good_bytes = le64_to_cpu(sb->rx_roce_good_bytes); estat->rx_out_of_buffer = le64_to_cpu(sb->rx_out_of_buffer_pkts); estat->rx_out_of_sequence = le64_to_cpu(sb->rx_out_of_sequence_pkts); + estat->tx_cnp = le64_to_cpu(sb->tx_cnp_pkts); + estat->rx_cnp = le64_to_cpu(sb->rx_cnp_pkts); + estat->rx_ecn_marked = le64_to_cpu(sb->rx_ecn_marked_pkts); bail: - bnxt_qplib_rcfw_free_sbuf(rcfw, sbuf); + dma_free_coherent(&rcfw->pdev->dev, sbuf.size, + sbuf.sb, sbuf.dma_addr); return rc; } diff --git a/drivers/infiniband/hw/bnxt_re/qplib_sp.h b/drivers/infiniband/hw/bnxt_re/qplib_sp.h index 264ef3cedc45..d33c78b96217 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_sp.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_sp.h @@ -322,7 +322,7 @@ int bnxt_qplib_update_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl, struct bnxt_qplib_gid *gid, u16 gid_idx, const u8 *smac); int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw, - struct bnxt_qplib_dev_attr *attr, bool vf); + struct bnxt_qplib_dev_attr *attr); int bnxt_qplib_set_func_resources(struct bnxt_qplib_res *res, struct bnxt_qplib_rcfw *rcfw, struct bnxt_qplib_ctx *ctx); diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c index ffbd9a89981e..d16d8eaa1415 100644 --- a/drivers/infiniband/hw/cxgb4/qp.c +++ b/drivers/infiniband/hw/cxgb4/qp.c @@ -2466,7 +2466,7 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, init_attr->cap.max_send_sge = qhp->attr.sq_max_sges; init_attr->cap.max_recv_sge = qhp->attr.rq_max_sges; init_attr->cap.max_inline_data = T4_MAX_SEND_INLINE; - init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0; + init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; return 0; } diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 4e93ef7f84ee..9c65bd27bae0 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -66,6 +66,7 @@ enum efa_admin_get_stats_type { EFA_ADMIN_GET_STATS_TYPE_BASIC = 0, EFA_ADMIN_GET_STATS_TYPE_MESSAGES = 1, EFA_ADMIN_GET_STATS_TYPE_RDMA_READ = 2, + EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE = 3, }; enum efa_admin_get_stats_scope { @@ -570,6 +571,16 @@ struct efa_admin_rdma_read_stats { u64 read_resp_bytes; }; +struct efa_admin_rdma_write_stats { + u64 write_wrs; + + u64 write_bytes; + + u64 write_wr_err; + + u64 write_recv_bytes; +}; + struct efa_admin_acq_get_stats_resp { struct efa_admin_acq_common_desc acq_common_desc; @@ -579,6 +590,8 @@ struct efa_admin_acq_get_stats_resp { struct efa_admin_messages_stats messages_stats; struct efa_admin_rdma_read_stats rdma_read_stats; + + struct efa_admin_rdma_write_stats rdma_write_stats; } u; }; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index 8f8885e002ba..576811885d59 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include "efa_com.h" @@ -794,6 +794,12 @@ int efa_com_get_stats(struct efa_com_dev *edev, result->rdma_read_stats.read_wr_err = resp.u.rdma_read_stats.read_wr_err; result->rdma_read_stats.read_resp_bytes = resp.u.rdma_read_stats.read_resp_bytes; break; + case EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE: + result->rdma_write_stats.write_wrs = resp.u.rdma_write_stats.write_wrs; + result->rdma_write_stats.write_bytes = resp.u.rdma_write_stats.write_bytes; + result->rdma_write_stats.write_wr_err = resp.u.rdma_write_stats.write_wr_err; + result->rdma_write_stats.write_recv_bytes = resp.u.rdma_write_stats.write_recv_bytes; + break; } return 0; diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index 0898ad5bc340..fc97f37bb39b 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef _EFA_COM_CMD_H_ @@ -262,10 +262,18 @@ struct efa_com_rdma_read_stats { u64 read_resp_bytes; }; +struct efa_com_rdma_write_stats { + u64 write_wrs; + u64 write_bytes; + u64 write_wr_err; + u64 write_recv_bytes; +}; + union efa_com_get_stats_result { struct efa_com_basic_stats basic_stats; struct efa_com_messages_stats messages_stats; struct efa_com_rdma_read_stats rdma_read_stats; + struct efa_com_rdma_write_stats rdma_write_stats; }; int efa_com_create_qp(struct efa_com_dev *edev, diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index 2a195c4b0f17..0f8ca99d0827 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -61,6 +61,10 @@ struct efa_user_mmap_entry { op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \ op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \ op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \ + op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \ + op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \ + op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \ + op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \ #define EFA_STATS_ENUM(ename, name) ename, #define EFA_STATS_STR(ename, nam) \ @@ -449,12 +453,12 @@ int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num); - efa_qp_user_mmap_entries_remove(qp); - err = efa_destroy_qp_handle(dev, qp->qp_handle); if (err) return err; + efa_qp_user_mmap_entries_remove(qp); + if (qp->rq_cpu_addr) { ibdev_dbg(&dev->ibdev, "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n", @@ -1013,8 +1017,8 @@ int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n", cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr); - efa_cq_user_mmap_entries_remove(cq); efa_destroy_cq_idx(dev, cq->cq_idx); + efa_cq_user_mmap_entries_remove(cq); if (cq->eq) { xa_erase(&dev->cqs_xa, cq->cq_idx); synchronize_irq(cq->eq->irq.irqn); @@ -2080,6 +2084,7 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, { struct efa_com_get_stats_params params = {}; union efa_com_get_stats_result result; + struct efa_com_rdma_write_stats *rws; struct efa_com_rdma_read_stats *rrs; struct efa_com_messages_stats *ms; struct efa_com_basic_stats *bs; @@ -2121,6 +2126,19 @@ static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats, stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err; stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes; + if (EFA_DEV_CAP(dev, RDMA_WRITE)) { + params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE; + err = efa_com_get_stats(&dev->edev, ¶ms, &result); + if (err) + return err; + + rws = &result.rdma_write_stats; + stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs; + stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes; + stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err; + stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes; + } + return ARRAY_SIZE(efa_port_stats_descs); } diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index a882b57aa118..9d316fdc6f9a 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -228,7 +228,7 @@ struct erdma_cmdq_ext_db_req { /* create_cq cfg1 */ #define ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK GENMASK(31, 16) -#define ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK BIT(15) +#define ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK BIT(15) #define ERDMA_CMD_CREATE_CQ_MTT_DB_CFG_MASK BIT(11) #define ERDMA_CMD_CREATE_CQ_EQN_MASK GENMASK(9, 0) @@ -248,6 +248,7 @@ struct erdma_cmdq_create_cq_req { /* regmr/deregmr cfg0 */ #define ERDMA_CMD_MR_VALID_MASK BIT(31) +#define ERDMA_CMD_MR_VERSION_MASK GENMASK(30, 28) #define ERDMA_CMD_MR_KEY_MASK GENMASK(27, 20) #define ERDMA_CMD_MR_MPT_IDX_MASK GENMASK(19, 0) @@ -258,7 +259,8 @@ struct erdma_cmdq_create_cq_req { /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) -#define ERDMA_CMD_REGMR_MTT_TYPE_MASK GENMASK(21, 20) +#define ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK GENMASK(26, 24) +#define ERDMA_CMD_REGMR_MTT_LEVEL_MASK GENMASK(21, 20) #define ERDMA_CMD_REGMR_MTT_CNT_MASK GENMASK(19, 0) struct erdma_cmdq_reg_mr_req { @@ -268,7 +270,14 @@ struct erdma_cmdq_reg_mr_req { u64 start_va; u32 size; u32 cfg2; - u64 phy_addr[4]; + union { + u64 phy_addr[4]; + struct { + u64 rsvd; + u32 size_h; + u32 mtt_cnt_h; + }; + }; }; struct erdma_cmdq_dereg_mr_req { @@ -309,7 +318,7 @@ struct erdma_cmdq_modify_qp_req { /* create qp mtt_cfg */ #define ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK GENMASK(31, 12) #define ERDMA_CMD_CREATE_QP_MTT_CNT_MASK GENMASK(11, 1) -#define ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK BIT(0) +#define ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK BIT(0) /* create qp db cfg */ #define ERDMA_CMD_CREATE_QP_SQDB_CFG_MASK GENMASK(31, 16) @@ -364,6 +373,7 @@ struct erdma_cmdq_reflush_req { enum { ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, + ERDMA_DEV_CAP_FLAGS_MTT_VA = 1 << 5, ERDMA_DEV_CAP_FLAGS_EXTEND_DB = 1 << 3, }; diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 44923c51a01b..6d0330badd68 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -410,7 +410,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, /* Copy SGLs to SQE content to accelerate */ memcpy(get_queue_entry(qp->kern_qp.sq_buf, idx + 1, qp->attrs.sq_size, SQEBB_SHIFT), - mr->mem.mtt_buf, MTT_SIZE(mr->mem.mtt_nents)); + mr->mem.mtt->buf, MTT_SIZE(mr->mem.mtt_nents)); wqe_size = sizeof(struct erdma_reg_mr_sqe) + MTT_SIZE(mr->mem.mtt_nents); } else { diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 517676fbb8b1..dcccb6015232 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -19,6 +19,23 @@ #include "erdma_cm.h" #include "erdma_verbs.h" +static void assemble_qbuf_mtt_for_cmd(struct erdma_mem *mem, u32 *cfg, + u64 *addr0, u64 *addr1) +{ + struct erdma_mtt *mtt = mem->mtt; + + if (mem->mtt_nents > ERDMA_MAX_INLINE_MTT_ENTRIES) { + *addr0 = mtt->buf_dma; + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, + ERDMA_MR_MTT_1LEVEL); + } else { + *addr0 = mtt->buf[0]; + memcpy(addr1, mtt->buf + 1, MTT_SIZE(mem->mtt_nents - 1)); + *cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, + ERDMA_MR_MTT_0LEVEL); + } +} + static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) { struct erdma_dev *dev = to_edev(qp->ibqp.device); @@ -53,8 +70,8 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) req.sq_mtt_cfg = FIELD_PREP(ERDMA_CMD_CREATE_QP_PAGE_OFFSET_MASK, 0) | FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, 1) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - ERDMA_MR_INLINE_MTT); + FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_LEVEL_MASK, + ERDMA_MR_MTT_0LEVEL); req.rq_mtt_cfg = req.sq_mtt_cfg; req.rq_buf_addr = qp->kern_qp.rq_buf_dma_addr; @@ -67,30 +84,28 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) user_qp = &qp->user_qp; req.sq_cqn_mtt_cfg = FIELD_PREP( ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->sq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); + ilog2(user_qp->sq_mem.page_size) - ERDMA_HW_PAGE_SHIFT); req.sq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->scq->cqn); req.rq_cqn_mtt_cfg = FIELD_PREP( ERDMA_CMD_CREATE_QP_PAGE_SIZE_MASK, - ilog2(user_qp->rq_mtt.page_size) - ERDMA_HW_PAGE_SHIFT); + ilog2(user_qp->rq_mem.page_size) - ERDMA_HW_PAGE_SHIFT); req.rq_cqn_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_CQN_MASK, qp->rcq->cqn); - req.sq_mtt_cfg = user_qp->sq_mtt.page_offset; + req.sq_mtt_cfg = user_qp->sq_mem.page_offset; req.sq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->sq_mtt.mtt_nents) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - user_qp->sq_mtt.mtt_type); + user_qp->sq_mem.mtt_nents); - req.rq_mtt_cfg = user_qp->rq_mtt.page_offset; + req.rq_mtt_cfg = user_qp->rq_mem.page_offset; req.rq_mtt_cfg |= FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_CNT_MASK, - user_qp->rq_mtt.mtt_nents) | - FIELD_PREP(ERDMA_CMD_CREATE_QP_MTT_TYPE_MASK, - user_qp->rq_mtt.mtt_type); + user_qp->rq_mem.mtt_nents); - req.sq_buf_addr = user_qp->sq_mtt.mtt_entry[0]; - req.rq_buf_addr = user_qp->rq_mtt.mtt_entry[0]; + assemble_qbuf_mtt_for_cmd(&user_qp->sq_mem, &req.sq_mtt_cfg, + &req.sq_buf_addr, req.sq_mtt_entry); + assemble_qbuf_mtt_for_cmd(&user_qp->rq_mem, &req.rq_mtt_cfg, + &req.rq_buf_addr, req.rq_mtt_entry); req.sq_db_info_dma_addr = user_qp->sq_db_info_dma_addr; req.rq_db_info_dma_addr = user_qp->rq_db_info_dma_addr; @@ -117,13 +132,27 @@ static int create_qp_cmd(struct erdma_ucontext *uctx, struct erdma_qp *qp) static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) { - struct erdma_cmdq_reg_mr_req req; struct erdma_pd *pd = to_epd(mr->ibmr.pd); - u64 *phy_addr; - int i; + struct erdma_cmdq_reg_mr_req req; + u32 mtt_level; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_REG_MR); + if (mr->type == ERDMA_MR_TYPE_FRMR || + mr->mem.page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES) { + if (mr->mem.mtt->continuous) { + req.phy_addr[0] = mr->mem.mtt->buf_dma; + mtt_level = ERDMA_MR_MTT_1LEVEL; + } else { + req.phy_addr[0] = sg_dma_address(mr->mem.mtt->sglist); + mtt_level = mr->mem.mtt->level; + } + } else { + memcpy(req.phy_addr, mr->mem.mtt->buf, + MTT_SIZE(mr->mem.page_cnt)); + mtt_level = ERDMA_MR_MTT_0LEVEL; + } + req.cfg0 = FIELD_PREP(ERDMA_CMD_MR_VALID_MASK, mr->valid) | FIELD_PREP(ERDMA_CMD_MR_KEY_MASK, mr->ibmr.lkey & 0xFF) | FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8); @@ -132,7 +161,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access); req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK, ilog2(mr->mem.page_size)) | - FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) | + FIELD_PREP(ERDMA_CMD_REGMR_MTT_LEVEL_MASK, mtt_level) | FIELD_PREP(ERDMA_CMD_REGMR_MTT_CNT_MASK, mr->mem.page_cnt); if (mr->type == ERDMA_MR_TYPE_DMA) @@ -143,14 +172,12 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) req.size = mr->mem.len; } - if (mr->type == ERDMA_MR_TYPE_FRMR || - mr->mem.mtt_type == ERDMA_MR_INDIRECT_MTT) { - phy_addr = req.phy_addr; - *phy_addr = mr->mem.mtt_entry[0]; - } else { - phy_addr = req.phy_addr; - for (i = 0; i < mr->mem.mtt_nents; i++) - *phy_addr++ = mr->mem.mtt_entry[i]; + if (!mr->mem.mtt->continuous && mr->mem.mtt->level > 1) { + req.cfg0 |= FIELD_PREP(ERDMA_CMD_MR_VERSION_MASK, 1); + req.cfg2 |= FIELD_PREP(ERDMA_CMD_REGMR_MTT_PAGESIZE_MASK, + PAGE_SHIFT - ERDMA_HW_PAGE_SHIFT); + req.size_h = upper_32_bits(mr->mem.len); + req.mtt_cnt_h = mr->mem.page_cnt >> 20; } post_cmd: @@ -161,7 +188,7 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) { struct erdma_dev *dev = to_edev(cq->ibcq.device); struct erdma_cmdq_create_cq_req req; - struct erdma_mem *mtt; + struct erdma_mem *mem; u32 page_size; erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, @@ -179,30 +206,34 @@ static int create_cq_cmd(struct erdma_ucontext *uctx, struct erdma_cq *cq) req.qbuf_addr_h = upper_32_bits(cq->kern_cq.qbuf_dma_addr); req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, 1) | - FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, - ERDMA_MR_INLINE_MTT); + FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, + ERDMA_MR_MTT_0LEVEL); req.first_page_offset = 0; req.cq_db_info_addr = cq->kern_cq.qbuf_dma_addr + (cq->depth << CQE_SHIFT); } else { - mtt = &cq->user_cq.qbuf_mtt; + mem = &cq->user_cq.qbuf_mem; req.cfg0 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_PAGESIZE_MASK, - ilog2(mtt->page_size) - ERDMA_HW_PAGE_SHIFT); - if (mtt->mtt_nents == 1) { - req.qbuf_addr_l = lower_32_bits(*(u64 *)mtt->mtt_buf); - req.qbuf_addr_h = upper_32_bits(*(u64 *)mtt->mtt_buf); + ilog2(mem->page_size) - ERDMA_HW_PAGE_SHIFT); + if (mem->mtt_nents == 1) { + req.qbuf_addr_l = lower_32_bits(mem->mtt->buf[0]); + req.qbuf_addr_h = upper_32_bits(mem->mtt->buf[0]); + req.cfg1 |= + FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, + ERDMA_MR_MTT_0LEVEL); } else { - req.qbuf_addr_l = lower_32_bits(mtt->mtt_entry[0]); - req.qbuf_addr_h = upper_32_bits(mtt->mtt_entry[0]); + req.qbuf_addr_l = lower_32_bits(mem->mtt->buf_dma); + req.qbuf_addr_h = upper_32_bits(mem->mtt->buf_dma); + req.cfg1 |= + FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_LEVEL_MASK, + ERDMA_MR_MTT_1LEVEL); } req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_CNT_MASK, - mtt->mtt_nents); - req.cfg1 |= FIELD_PREP(ERDMA_CMD_CREATE_CQ_MTT_TYPE_MASK, - mtt->mtt_type); + mem->mtt_nents); - req.first_page_offset = mtt->page_offset; + req.first_page_offset = mem->page_offset; req.cq_db_info_addr = cq->user_cq.db_info_dma_addr; if (uctx->ext_db.enable) { @@ -481,8 +512,8 @@ static int init_kernel_qp(struct erdma_dev *dev, struct erdma_qp *qp, dev->func_bar + (ERDMA_SDB_SHARED_PAGE_INDEX << PAGE_SHIFT); kqp->hw_rq_db = dev->func_bar + ERDMA_BAR_RQDB_SPACE_OFFSET; - kqp->swr_tbl = vmalloc(qp->attrs.sq_size * sizeof(u64)); - kqp->rwr_tbl = vmalloc(qp->attrs.rq_size * sizeof(u64)); + kqp->swr_tbl = vmalloc_array(qp->attrs.sq_size, sizeof(u64)); + kqp->rwr_tbl = vmalloc_array(qp->attrs.rq_size, sizeof(u64)); if (!kqp->swr_tbl || !kqp->rwr_tbl) goto err_out; @@ -508,12 +539,223 @@ err_out: return -ENOMEM; } +static void erdma_fill_bottom_mtt(struct erdma_dev *dev, struct erdma_mem *mem) +{ + struct erdma_mtt *mtt = mem->mtt; + struct ib_block_iter biter; + u32 idx = 0; + + while (mtt->low_level) + mtt = mtt->low_level; + + rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) + mtt->buf[idx++] = rdma_block_iter_dma_address(&biter); +} + +static struct erdma_mtt *erdma_create_cont_mtt(struct erdma_dev *dev, + size_t size) +{ + struct erdma_mtt *mtt; + + mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); + if (!mtt) + return ERR_PTR(-ENOMEM); + + mtt->size = size; + mtt->buf = kzalloc(mtt->size, GFP_KERNEL); + if (!mtt->buf) + goto err_free_mtt; + + mtt->continuous = true; + mtt->buf_dma = dma_map_single(&dev->pdev->dev, mtt->buf, mtt->size, + DMA_TO_DEVICE); + if (dma_mapping_error(&dev->pdev->dev, mtt->buf_dma)) + goto err_free_mtt_buf; + + return mtt; + +err_free_mtt_buf: + kfree(mtt->buf); + +err_free_mtt: + kfree(mtt); + + return ERR_PTR(-ENOMEM); +} + +static void erdma_destroy_mtt_buf_sg(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + dma_unmap_sg(&dev->pdev->dev, mtt->sglist, mtt->nsg, DMA_TO_DEVICE); + vfree(mtt->sglist); +} + +static void erdma_destroy_scatter_mtt(struct erdma_dev *dev, + struct erdma_mtt *mtt) +{ + erdma_destroy_mtt_buf_sg(dev, mtt); + vfree(mtt->buf); + kfree(mtt); +} + +static void erdma_init_middle_mtt(struct erdma_mtt *mtt, + struct erdma_mtt *low_mtt) +{ + struct scatterlist *sg; + u32 idx = 0, i; + + for_each_sg(low_mtt->sglist, sg, low_mtt->nsg, i) + mtt->buf[idx++] = sg_dma_address(sg); +} + +static int erdma_create_mtt_buf_sg(struct erdma_dev *dev, struct erdma_mtt *mtt) +{ + struct scatterlist *sglist; + void *buf = mtt->buf; + u32 npages, i, nsg; + struct page *pg; + + /* Failed if buf is not page aligned */ + if ((uintptr_t)buf & ~PAGE_MASK) + return -EINVAL; + + npages = DIV_ROUND_UP(mtt->size, PAGE_SIZE); + sglist = vzalloc(npages * sizeof(*sglist)); + if (!sglist) + return -ENOMEM; + + sg_init_table(sglist, npages); + for (i = 0; i < npages; i++) { + pg = vmalloc_to_page(buf); + if (!pg) + goto err; + sg_set_page(&sglist[i], pg, PAGE_SIZE, 0); + buf += PAGE_SIZE; + } + + nsg = dma_map_sg(&dev->pdev->dev, sglist, npages, DMA_TO_DEVICE); + if (!nsg) + goto err; + + mtt->sglist = sglist; + mtt->nsg = nsg; + + return 0; +err: + vfree(sglist); + + return -ENOMEM; +} + +static struct erdma_mtt *erdma_create_scatter_mtt(struct erdma_dev *dev, + size_t size) +{ + struct erdma_mtt *mtt; + int ret = -ENOMEM; + + mtt = kzalloc(sizeof(*mtt), GFP_KERNEL); + if (!mtt) + return NULL; + + mtt->size = ALIGN(size, PAGE_SIZE); + mtt->buf = vzalloc(mtt->size); + mtt->continuous = false; + if (!mtt->buf) + goto err_free_mtt; + + ret = erdma_create_mtt_buf_sg(dev, mtt); + if (ret) + goto err_free_mtt_buf; + + ibdev_dbg(&dev->ibdev, "create scatter mtt, size:%lu, nsg:%u\n", + mtt->size, mtt->nsg); + + return mtt; + +err_free_mtt_buf: + vfree(mtt->buf); + +err_free_mtt: + kfree(mtt); + + return ERR_PTR(ret); +} + +static struct erdma_mtt *erdma_create_mtt(struct erdma_dev *dev, size_t size, + bool force_continuous) +{ + struct erdma_mtt *mtt, *tmp_mtt; + int ret, level = 0; + + ibdev_dbg(&dev->ibdev, "create_mtt, size:%lu, force cont:%d\n", size, + force_continuous); + + if (!(dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_MTT_VA)) + force_continuous = true; + + if (force_continuous) + return erdma_create_cont_mtt(dev, size); + + mtt = erdma_create_scatter_mtt(dev, size); + if (IS_ERR(mtt)) + return mtt; + level = 1; + + /* convergence the mtt table. */ + while (mtt->nsg != 1 && level <= 3) { + tmp_mtt = erdma_create_scatter_mtt(dev, MTT_SIZE(mtt->nsg)); + if (IS_ERR(tmp_mtt)) { + ret = PTR_ERR(tmp_mtt); + goto err_free_mtt; + } + erdma_init_middle_mtt(tmp_mtt, mtt); + tmp_mtt->low_level = mtt; + mtt = tmp_mtt; + level++; + } + + if (level > 3) { + ret = -ENOMEM; + goto err_free_mtt; + } + + mtt->level = level; + ibdev_dbg(&dev->ibdev, "top mtt: level:%d, dma_addr 0x%llx\n", + mtt->level, mtt->sglist[0].dma_address); + + return mtt; +err_free_mtt: + while (mtt) { + tmp_mtt = mtt->low_level; + erdma_destroy_scatter_mtt(dev, mtt); + mtt = tmp_mtt; + } + + return ERR_PTR(ret); +} + +static void erdma_destroy_mtt(struct erdma_dev *dev, struct erdma_mtt *mtt) +{ + struct erdma_mtt *tmp_mtt; + + if (mtt->continuous) { + dma_unmap_single(&dev->pdev->dev, mtt->buf_dma, mtt->size, + DMA_TO_DEVICE); + kfree(mtt->buf); + kfree(mtt); + } else { + while (mtt) { + tmp_mtt = mtt->low_level; + erdma_destroy_scatter_mtt(dev, mtt); + mtt = tmp_mtt; + } + } +} + static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem, u64 start, u64 len, int access, u64 virt, - unsigned long req_page_size, u8 force_indirect_mtt) + unsigned long req_page_size, bool force_continuous) { - struct ib_block_iter biter; - uint64_t *phy_addr = NULL; int ret = 0; mem->umem = ib_umem_get(&dev->ibdev, start, len, access); @@ -529,38 +771,14 @@ static int get_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem, mem->page_offset = start & (mem->page_size - 1); mem->mtt_nents = ib_umem_num_dma_blocks(mem->umem, mem->page_size); mem->page_cnt = mem->mtt_nents; - - if (mem->page_cnt > ERDMA_MAX_INLINE_MTT_ENTRIES || - force_indirect_mtt) { - mem->mtt_type = ERDMA_MR_INDIRECT_MTT; - mem->mtt_buf = - alloc_pages_exact(MTT_SIZE(mem->page_cnt), GFP_KERNEL); - if (!mem->mtt_buf) { - ret = -ENOMEM; - goto error_ret; - } - phy_addr = mem->mtt_buf; - } else { - mem->mtt_type = ERDMA_MR_INLINE_MTT; - phy_addr = mem->mtt_entry; + mem->mtt = erdma_create_mtt(dev, MTT_SIZE(mem->page_cnt), + force_continuous); + if (IS_ERR(mem->mtt)) { + ret = PTR_ERR(mem->mtt); + goto error_ret; } - rdma_umem_for_each_dma_block(mem->umem, &biter, mem->page_size) { - *phy_addr = rdma_block_iter_dma_address(&biter); - phy_addr++; - } - - if (mem->mtt_type == ERDMA_MR_INDIRECT_MTT) { - mem->mtt_entry[0] = - dma_map_single(&dev->pdev->dev, mem->mtt_buf, - MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE); - if (dma_mapping_error(&dev->pdev->dev, mem->mtt_entry[0])) { - free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt)); - mem->mtt_buf = NULL; - ret = -ENOMEM; - goto error_ret; - } - } + erdma_fill_bottom_mtt(dev, mem); return 0; @@ -575,11 +793,8 @@ error_ret: static void put_mtt_entries(struct erdma_dev *dev, struct erdma_mem *mem) { - if (mem->mtt_buf) { - dma_unmap_single(&dev->pdev->dev, mem->mtt_entry[0], - MTT_SIZE(mem->page_cnt), DMA_TO_DEVICE); - free_pages_exact(mem->mtt_buf, MTT_SIZE(mem->page_cnt)); - } + if (mem->mtt) + erdma_destroy_mtt(dev, mem->mtt); if (mem->umem) { ib_umem_release(mem->umem); @@ -660,18 +875,18 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, qp->attrs.rq_size * RQE_SIZE)) return -EINVAL; - ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mtt, va, + ret = get_mtt_entries(qp->dev, &qp->user_qp.sq_mem, va, qp->attrs.sq_size << SQEBB_SHIFT, 0, va, - (SZ_1M - SZ_4K), 1); + (SZ_1M - SZ_4K), true); if (ret) return ret; rq_offset = ALIGN(qp->attrs.sq_size << SQEBB_SHIFT, ERDMA_HW_PAGE_SIZE); qp->user_qp.rq_offset = rq_offset; - ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mtt, va + rq_offset, + ret = get_mtt_entries(qp->dev, &qp->user_qp.rq_mem, va + rq_offset, qp->attrs.rq_size << RQE_SHIFT, 0, va + rq_offset, - (SZ_1M - SZ_4K), 1); + (SZ_1M - SZ_4K), true); if (ret) goto put_sq_mtt; @@ -687,18 +902,18 @@ static int init_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx, return 0; put_rq_mtt: - put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt); + put_mtt_entries(qp->dev, &qp->user_qp.rq_mem); put_sq_mtt: - put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt); + put_mtt_entries(qp->dev, &qp->user_qp.sq_mem); return ret; } static void free_user_qp(struct erdma_qp *qp, struct erdma_ucontext *uctx) { - put_mtt_entries(qp->dev, &qp->user_qp.sq_mtt); - put_mtt_entries(qp->dev, &qp->user_qp.rq_mtt); + put_mtt_entries(qp->dev, &qp->user_qp.sq_mem); + put_mtt_entries(qp->dev, &qp->user_qp.rq_mem); erdma_unmap_user_dbrecords(uctx, &qp->user_qp.user_dbr_page); } @@ -875,33 +1090,20 @@ struct ib_mr *erdma_ib_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, mr->mem.page_size = PAGE_SIZE; /* update it later. */ mr->mem.page_cnt = max_num_sg; - mr->mem.mtt_type = ERDMA_MR_INDIRECT_MTT; - mr->mem.mtt_buf = - alloc_pages_exact(MTT_SIZE(mr->mem.page_cnt), GFP_KERNEL); - if (!mr->mem.mtt_buf) { - ret = -ENOMEM; + mr->mem.mtt = erdma_create_mtt(dev, MTT_SIZE(max_num_sg), true); + if (IS_ERR(mr->mem.mtt)) { + ret = PTR_ERR(mr->mem.mtt); goto out_remove_stag; } - mr->mem.mtt_entry[0] = - dma_map_single(&dev->pdev->dev, mr->mem.mtt_buf, - MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE); - if (dma_mapping_error(&dev->pdev->dev, mr->mem.mtt_entry[0])) { - ret = -ENOMEM; - goto out_free_mtt; - } - ret = regmr_cmd(dev, mr); if (ret) - goto out_dma_unmap; + goto out_destroy_mtt; return &mr->ibmr; -out_dma_unmap: - dma_unmap_single(&dev->pdev->dev, mr->mem.mtt_entry[0], - MTT_SIZE(mr->mem.page_cnt), DMA_TO_DEVICE); -out_free_mtt: - free_pages_exact(mr->mem.mtt_buf, MTT_SIZE(mr->mem.page_cnt)); +out_destroy_mtt: + erdma_destroy_mtt(dev, mr->mem.mtt); out_remove_stag: erdma_free_idx(&dev->res_cb[ERDMA_RES_TYPE_STAG_IDX], @@ -920,7 +1122,7 @@ static int erdma_set_page(struct ib_mr *ibmr, u64 addr) if (mr->mem.mtt_nents >= mr->mem.page_cnt) return -1; - *((u64 *)mr->mem.mtt_buf + mr->mem.mtt_nents) = addr; + mr->mem.mtt->buf[mr->mem.mtt_nents] = addr; mr->mem.mtt_nents++; return 0; @@ -956,7 +1158,7 @@ struct ib_mr *erdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len, return ERR_PTR(-ENOMEM); ret = get_mtt_entries(dev, &mr->mem, start, len, access, virt, - SZ_2G - SZ_4K, 0); + SZ_2G - SZ_4K, false); if (ret) goto err_out_free; @@ -1041,7 +1243,7 @@ int erdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) cq->kern_cq.qbuf, cq->kern_cq.qbuf_dma_addr); } else { erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); - put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + put_mtt_entries(dev, &cq->user_cq.qbuf_mem); } xa_erase(&dev->cq_xa, cq->cqn); @@ -1089,8 +1291,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) WARPPED_BUFSIZE(qp->attrs.sq_size << SQEBB_SHIFT), qp->kern_qp.sq_buf, qp->kern_qp.sq_buf_dma_addr); } else { - put_mtt_entries(dev, &qp->user_qp.sq_mtt); - put_mtt_entries(dev, &qp->user_qp.rq_mtt); + put_mtt_entries(dev, &qp->user_qp.sq_mem); + put_mtt_entries(dev, &qp->user_qp.rq_mem); erdma_unmap_user_dbrecords(ctx, &qp->user_qp.user_dbr_page); } @@ -1379,9 +1581,9 @@ static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq, int ret; struct erdma_dev *dev = to_edev(cq->ibcq.device); - ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mtt, ureq->qbuf_va, + ret = get_mtt_entries(dev, &cq->user_cq.qbuf_mem, ureq->qbuf_va, ureq->qbuf_len, 0, ureq->qbuf_va, SZ_64M - SZ_4K, - 1); + true); if (ret) return ret; @@ -1389,7 +1591,7 @@ static int erdma_init_user_cq(struct erdma_ucontext *ctx, struct erdma_cq *cq, &cq->user_cq.user_dbr_page, &cq->user_cq.db_info_dma_addr); if (ret) - put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + put_mtt_entries(dev, &cq->user_cq.qbuf_mem); return ret; } @@ -1473,7 +1675,7 @@ int erdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, err_free_res: if (!rdma_is_kernel_res(&ibcq->res)) { erdma_unmap_user_dbrecords(ctx, &cq->user_cq.user_dbr_page); - put_mtt_entries(dev, &cq->user_cq.qbuf_mtt); + put_mtt_entries(dev, &cq->user_cq.qbuf_mem); } else { dma_free_coherent(&dev->pdev->dev, WARPPED_BUFSIZE(depth << CQE_SHIFT), diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index 429fc3063f98..eb9c0f92fb6f 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -65,7 +65,7 @@ struct erdma_pd { * MemoryRegion definition. */ #define ERDMA_MAX_INLINE_MTT_ENTRIES 4 -#define MTT_SIZE(mtt_cnt) (mtt_cnt << 3) /* per mtt takes 8 Bytes. */ +#define MTT_SIZE(mtt_cnt) ((mtt_cnt) << 3) /* per mtt entry takes 8 Bytes. */ #define ERDMA_MR_MAX_MTT_CNT 524288 #define ERDMA_MTT_ENTRY_SIZE 8 @@ -73,8 +73,8 @@ struct erdma_pd { #define ERDMA_MR_TYPE_FRMR 1 #define ERDMA_MR_TYPE_DMA 2 -#define ERDMA_MR_INLINE_MTT 0 -#define ERDMA_MR_INDIRECT_MTT 1 +#define ERDMA_MR_MTT_0LEVEL 0 +#define ERDMA_MR_MTT_1LEVEL 1 #define ERDMA_MR_ACC_RA BIT(0) #define ERDMA_MR_ACC_LR BIT(1) @@ -90,10 +90,28 @@ static inline u8 to_erdma_access_flags(int access) (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0); } +/* Hierarchical storage structure for MTT entries */ +struct erdma_mtt { + u64 *buf; + size_t size; + + bool continuous; + union { + dma_addr_t buf_dma; + struct { + struct scatterlist *sglist; + u32 nsg; + u32 level; + }; + }; + + struct erdma_mtt *low_level; +}; + struct erdma_mem { struct ib_umem *umem; - void *mtt_buf; - u32 mtt_type; + struct erdma_mtt *mtt; + u32 page_size; u32 page_offset; u32 page_cnt; @@ -101,8 +119,6 @@ struct erdma_mem { u64 va; u64 len; - - u64 mtt_entry[ERDMA_MAX_INLINE_MTT_ENTRIES]; }; struct erdma_mr { @@ -121,8 +137,8 @@ struct erdma_user_dbrecords_page { }; struct erdma_uqp { - struct erdma_mem sq_mtt; - struct erdma_mem rq_mtt; + struct erdma_mem sq_mem; + struct erdma_mem rq_mem; dma_addr_t sq_db_info_dma_addr; dma_addr_t rq_db_info_dma_addr; @@ -234,7 +250,7 @@ struct erdma_kcq_info { }; struct erdma_ucq_info { - struct erdma_mem qbuf_mtt; + struct erdma_mem qbuf_mem; struct erdma_user_dbrecords_page *user_dbr_page; dma_addr_t db_info_dma_addr; }; diff --git a/drivers/infiniband/hw/hfi1/Makefile b/drivers/infiniband/hw/hfi1/Makefile index 2e89ec10efed..5d977f363684 100644 --- a/drivers/infiniband/hw/hfi1/Makefile +++ b/drivers/infiniband/hw/hfi1/Makefile @@ -31,6 +31,7 @@ hfi1-y := \ netdev_rx.o \ opfn.o \ pcie.o \ + pin_system.o \ pio.o \ pio_copy.o \ platform.o \ diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 77ee77d4000f..bbc957c578e1 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -230,11 +230,9 @@ static void node_affinity_add_tail(struct hfi1_affinity_node *entry) /* It must be called with node_affinity.lock held */ static struct hfi1_affinity_node *node_affinity_lookup(int node) { - struct list_head *pos; struct hfi1_affinity_node *entry; - list_for_each(pos, &node_affinity.list) { - entry = list_entry(pos, struct hfi1_affinity_node, list); + list_for_each_entry(entry, &node_affinity.list, list) { if (entry->node == node) return entry; } diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c index baaa4406d5e6..0814291a0412 100644 --- a/drivers/infiniband/hw/hfi1/chip.c +++ b/drivers/infiniband/hw/hfi1/chip.c @@ -1461,7 +1461,8 @@ static u64 dc_access_lcb_cntr(const struct cntr_entry *entry, void *context, ret = write_lcb_csr(dd, csr, data); if (ret) { - dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr); + if (!(dd->flags & HFI1_SHUTDOWN)) + dd_dev_err(dd, "Could not acquire LCB for counter 0x%x", csr); return 0; } @@ -6160,7 +6161,7 @@ static int request_host_lcb_access(struct hfi1_devdata *dd) ret = do_8051_command(dd, HCMD_MISC, (u64)HCMD_MISC_REQUEST_LCB_ACCESS << LOAD_DATA_FIELD_ID_SHIFT, NULL); - if (ret != HCMD_SUCCESS) { + if (ret != HCMD_SUCCESS && !(dd->flags & HFI1_SHUTDOWN)) { dd_dev_err(dd, "%s: command failed with error %d\n", __func__, ret); } @@ -6241,7 +6242,8 @@ int acquire_lcb_access(struct hfi1_devdata *dd, int sleep_ok) if (dd->lcb_access_count == 0) { ret = request_host_lcb_access(dd); if (ret) { - dd_dev_err(dd, + if (!(dd->flags & HFI1_SHUTDOWN)) + dd_dev_err(dd, "%s: unable to acquire LCB access, err %d\n", __func__, ret); goto done; diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c index 05be0d119f79..b0a00b7aaec5 100644 --- a/drivers/infiniband/hw/hfi1/device.c +++ b/drivers/infiniband/hw/hfi1/device.c @@ -10,8 +10,29 @@ #include "hfi.h" #include "device.h" -static struct class *class; -static struct class *user_class; +static char *hfi1_devnode(const struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0600; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + +static const struct class class = { + .name = "hfi1", + .devnode = hfi1_devnode, +}; + +static char *hfi1_user_devnode(const struct device *dev, umode_t *mode) +{ + if (mode) + *mode = 0666; + return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); +} + +static const struct class user_class = { + .name = "hfi1_user", + .devnode = hfi1_user_devnode, +}; static dev_t hfi1_dev; int hfi1_cdev_init(int minor, const char *name, @@ -37,9 +58,9 @@ int hfi1_cdev_init(int minor, const char *name, } if (user_accessible) - device = device_create(user_class, NULL, dev, NULL, "%s", name); + device = device_create(&user_class, NULL, dev, NULL, "%s", name); else - device = device_create(class, NULL, dev, NULL, "%s", name); + device = device_create(&class, NULL, dev, NULL, "%s", name); if (IS_ERR(device)) { ret = PTR_ERR(device); @@ -72,26 +93,6 @@ const char *class_name(void) return hfi1_class_name; } -static char *hfi1_devnode(const struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0600; - return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); -} - -static const char *hfi1_class_name_user = "hfi1_user"; -static const char *class_name_user(void) -{ - return hfi1_class_name_user; -} - -static char *hfi1_user_devnode(const struct device *dev, umode_t *mode) -{ - if (mode) - *mode = 0666; - return kasprintf(GFP_KERNEL, "%s", dev_name(dev)); -} - int __init dev_init(void) { int ret; @@ -102,27 +103,21 @@ int __init dev_init(void) goto done; } - class = class_create(class_name()); - if (IS_ERR(class)) { - ret = PTR_ERR(class); + ret = class_register(&class); + if (ret) { pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); goto done; } - class->devnode = hfi1_devnode; - user_class = class_create(class_name_user()); - if (IS_ERR(user_class)) { - ret = PTR_ERR(user_class); + ret = class_register(&user_class); + if (ret) { pr_err("Could not create device class for user accessible files (err %d)\n", -ret); - class_destroy(class); - class = NULL; - user_class = NULL; + class_unregister(&class); unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); goto done; } - user_class->devnode = hfi1_user_devnode; done: return ret; @@ -130,11 +125,8 @@ done: void dev_cleanup(void) { - class_destroy(class); - class = NULL; - - class_destroy(user_class); - user_class = NULL; + class_unregister(&class); + class_unregister(&user_class); unregister_chrdev_region(hfi1_dev, HFI1_NMINORS); } diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 7fa9cd39254f..38772e52d7ed 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* - * Copyright(c) 2020 Cornelis Networks, Inc. + * Copyright(c) 2020-2023 Cornelis Networks, Inc. * Copyright(c) 2015-2020 Intel Corporation. */ @@ -1378,8 +1378,6 @@ struct hfi1_devdata { #define PT_INVALID 3 struct tid_rb_node; -struct mmu_rb_node; -struct mmu_rb_handler; /* Private data for file operations */ struct hfi1_filedata { diff --git a/drivers/infiniband/hw/hfi1/pin_system.c b/drivers/infiniband/hw/hfi1/pin_system.c new file mode 100644 index 000000000000..384f722093e0 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/pin_system.c @@ -0,0 +1,474 @@ +// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause +/* + * Copyright(c) 2023 - Cornelis Networks, Inc. + */ + +#include <linux/types.h> + +#include "hfi.h" +#include "common.h" +#include "device.h" +#include "pinning.h" +#include "mmu_rb.h" +#include "user_sdma.h" +#include "trace.h" + +struct sdma_mmu_node { + struct mmu_rb_node rb; + struct hfi1_user_sdma_pkt_q *pq; + struct page **pages; + unsigned int npages; +}; + +static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, + unsigned long len); +static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, void *arg2, + bool *stop); +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); + +static struct mmu_rb_ops sdma_rb_ops = { + .filter = sdma_rb_filter, + .evict = sdma_rb_evict, + .remove = sdma_rb_remove, +}; + +int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq) +{ + struct hfi1_devdata *dd = pq->dd; + int ret; + + ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq, + &pq->handler); + if (ret) + dd_dev_err(dd, + "[%u:%u] Failed to register system memory DMA support with MMU: %d\n", + pq->ctxt, pq->subctxt, ret); + return ret; +} + +void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq) +{ + if (pq->handler) + hfi1_mmu_rb_unregister(pq->handler); +} + +static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) +{ + struct evict_data evict_data; + + evict_data.cleared = 0; + evict_data.target = npages; + hfi1_mmu_rb_evict(pq->handler, &evict_data); + return evict_data.cleared; +} + +static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, + unsigned int start, unsigned int npages) +{ + hfi1_release_user_pages(mm, pages + start, npages, false); + kfree(pages); +} + +static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node) +{ + return node->rb.handler->mn.mm; +} + +static void free_system_node(struct sdma_mmu_node *node) +{ + if (node->npages) { + unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, + node->npages); + atomic_sub(node->npages, &node->pq->n_locked); + } + kfree(node); +} + +/* + * kref_get()'s an additional kref on the returned rb_node to prevent rb_node + * from being released until after rb_node is assigned to an SDMA descriptor + * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the + * virtual address range for rb_node is invalidated between now and then. + */ +static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, + unsigned long start, + unsigned long end) +{ + struct mmu_rb_node *rb_node; + unsigned long flags; + + spin_lock_irqsave(&handler->lock, flags); + rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start)); + if (!rb_node) { + spin_unlock_irqrestore(&handler->lock, flags); + return NULL; + } + + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ + kref_get(&rb_node->refcount); + spin_unlock_irqrestore(&handler->lock, flags); + + return container_of(rb_node, struct sdma_mmu_node, rb); +} + +static int pin_system_pages(struct user_sdma_request *req, + uintptr_t start_address, size_t length, + struct sdma_mmu_node *node, int npages) +{ + struct hfi1_user_sdma_pkt_q *pq = req->pq; + int pinned, cleared; + struct page **pages; + + pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + +retry: + if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked), + npages)) { + SDMA_DBG(req, "Evicting: nlocked %u npages %u", + atomic_read(&pq->n_locked), npages); + cleared = sdma_cache_evict(pq, npages); + if (cleared >= npages) + goto retry; + } + + SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u", + start_address, node->npages, npages); + pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0, + pages); + + if (pinned < 0) { + kfree(pages); + SDMA_DBG(req, "pinned %d", pinned); + return pinned; + } + if (pinned != npages) { + unpin_vector_pages(current->mm, pages, node->npages, pinned); + SDMA_DBG(req, "npages %u pinned %d", npages, pinned); + return -EFAULT; + } + node->rb.addr = start_address; + node->rb.len = length; + node->pages = pages; + node->npages = npages; + atomic_add(pinned, &pq->n_locked); + SDMA_DBG(req, "done. pinned %d", pinned); + return 0; +} + +/* + * kref refcount on *node_p will be 2 on successful addition: one kref from + * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being + * released until after *node_p is assigned to an SDMA descriptor (struct + * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual + * address range for *node_p is invalidated between now and then. + */ +static int add_system_pinning(struct user_sdma_request *req, + struct sdma_mmu_node **node_p, + unsigned long start, unsigned long len) + +{ + struct hfi1_user_sdma_pkt_q *pq = req->pq; + struct sdma_mmu_node *node; + int ret; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + /* First kref "moves" to mmu_rb_handler */ + kref_init(&node->rb.refcount); + + /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ + kref_get(&node->rb.refcount); + + node->pq = pq; + ret = pin_system_pages(req, start, len, node, PFN_DOWN(len)); + if (ret == 0) { + ret = hfi1_mmu_rb_insert(pq->handler, &node->rb); + if (ret) + free_system_node(node); + else + *node_p = node; + + return ret; + } + + kfree(node); + return ret; +} + +static int get_system_cache_entry(struct user_sdma_request *req, + struct sdma_mmu_node **node_p, + size_t req_start, size_t req_len) +{ + struct hfi1_user_sdma_pkt_q *pq = req->pq; + u64 start = ALIGN_DOWN(req_start, PAGE_SIZE); + u64 end = PFN_ALIGN(req_start + req_len); + int ret; + + if ((end - start) == 0) { + SDMA_DBG(req, + "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx", + req_start, req_len, start, end); + return -EINVAL; + } + + SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len); + + while (1) { + struct sdma_mmu_node *node = + find_system_node(pq->handler, start, end); + u64 prepend_len = 0; + + SDMA_DBG(req, "node %p start %llx end %llu", node, start, end); + if (!node) { + ret = add_system_pinning(req, node_p, start, + end - start); + if (ret == -EEXIST) { + /* + * Another execution context has inserted a + * conficting entry first. + */ + continue; + } + return ret; + } + + if (node->rb.addr <= start) { + /* + * This entry covers at least part of the region. If it doesn't extend + * to the end, then this will be called again for the next segment. + */ + *node_p = node; + return 0; + } + + SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d", + node->rb.addr, kref_read(&node->rb.refcount)); + prepend_len = node->rb.addr - start; + + /* + * This node will not be returned, instead a new node + * will be. So release the reference. + */ + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); + + /* Prepend a node to cover the beginning of the allocation */ + ret = add_system_pinning(req, node_p, start, prepend_len); + if (ret == -EEXIST) { + /* Another execution context has inserted a conficting entry first. */ + continue; + } + return ret; + } +} + +static void sdma_mmu_rb_node_get(void *ctx) +{ + struct mmu_rb_node *node = ctx; + + kref_get(&node->refcount); +} + +static void sdma_mmu_rb_node_put(void *ctx) +{ + struct sdma_mmu_node *node = ctx; + + kref_put(&node->rb.refcount, hfi1_mmu_rb_release); +} + +static int add_mapping_to_sdma_packet(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct sdma_mmu_node *cache_entry, + size_t start, + size_t from_this_cache_entry) +{ + struct hfi1_user_sdma_pkt_q *pq = req->pq; + unsigned int page_offset; + unsigned int from_this_page; + size_t page_index; + void *ctx; + int ret; + + /* + * Because the cache may be more fragmented than the memory that is being accessed, + * it's not strictly necessary to have a descriptor per cache entry. + */ + + while (from_this_cache_entry) { + page_index = PFN_DOWN(start - cache_entry->rb.addr); + + if (page_index >= cache_entry->npages) { + SDMA_DBG(req, + "Request for page_index %zu >= cache_entry->npages %u", + page_index, cache_entry->npages); + return -EINVAL; + } + + page_offset = start - ALIGN_DOWN(start, PAGE_SIZE); + from_this_page = PAGE_SIZE - page_offset; + + if (from_this_page < from_this_cache_entry) { + ctx = NULL; + } else { + /* + * In the case they are equal the next line has no practical effect, + * but it's better to do a register to register copy than a conditional + * branch. + */ + from_this_page = from_this_cache_entry; + ctx = cache_entry; + } + + ret = sdma_txadd_page(pq->dd, &tx->txreq, + cache_entry->pages[page_index], + page_offset, from_this_page, + ctx, + sdma_mmu_rb_node_get, + sdma_mmu_rb_node_put); + if (ret) { + /* + * When there's a failure, the entire request is freed by + * user_sdma_send_pkts(). + */ + SDMA_DBG(req, + "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u", + ret, page_index, page_offset, from_this_page); + return ret; + } + start += from_this_page; + from_this_cache_entry -= from_this_page; + } + return 0; +} + +static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, + size_t from_this_iovec) +{ + while (from_this_iovec > 0) { + struct sdma_mmu_node *cache_entry; + size_t from_this_cache_entry; + size_t start; + int ret; + + start = (uintptr_t)iovec->iov.iov_base + iovec->offset; + ret = get_system_cache_entry(req, &cache_entry, start, + from_this_iovec); + if (ret) { + SDMA_DBG(req, "pin system segment failed %d", ret); + return ret; + } + + from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr); + if (from_this_cache_entry > from_this_iovec) + from_this_cache_entry = from_this_iovec; + + ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start, + from_this_cache_entry); + + /* + * Done adding cache_entry to zero or more sdma_desc. Can + * kref_put() the "safety" kref taken under + * get_system_cache_entry(). + */ + kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release); + + if (ret) { + SDMA_DBG(req, "add system segment failed %d", ret); + return ret; + } + + iovec->offset += from_this_cache_entry; + from_this_iovec -= from_this_cache_entry; + } + + return 0; +} + +/* + * Add up to pkt_data_remaining bytes to the txreq, starting at the current + * offset in the given iovec entry and continuing until all data has been added + * to the iovec or the iovec entry type changes. + * + * On success, prior to returning, adjust pkt_data_remaining, req->iov_idx, and + * the offset value in req->iov[req->iov_idx] to reflect the data that has been + * consumed. + */ +int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, + u32 *pkt_data_remaining) +{ + size_t remaining_to_add = *pkt_data_remaining; + /* + * Walk through iovec entries, ensure the associated pages + * are pinned and mapped, add data to the packet until no more + * data remains to be added or the iovec entry type changes. + */ + while (remaining_to_add > 0) { + struct user_sdma_iovec *cur_iovec; + size_t from_this_iovec; + int ret; + + cur_iovec = iovec; + from_this_iovec = iovec->iov.iov_len - iovec->offset; + + if (from_this_iovec > remaining_to_add) { + from_this_iovec = remaining_to_add; + } else { + /* The current iovec entry will be consumed by this pass. */ + req->iov_idx++; + iovec++; + } + + ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec, + from_this_iovec); + if (ret) + return ret; + + remaining_to_add -= from_this_iovec; + } + *pkt_data_remaining = remaining_to_add; + + return 0; +} + +static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, + unsigned long len) +{ + return (bool)(node->addr == addr); +} + +/* + * Return 1 to remove the node from the rb tree and call the remove op. + * + * Called with the rb tree lock held. + */ +static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, + void *evict_arg, bool *stop) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + struct evict_data *evict_data = evict_arg; + + /* this node will be evicted, add its pages to our count */ + evict_data->cleared += node->npages; + + /* have enough pages been cleared? */ + if (evict_data->cleared >= evict_data->target) + *stop = true; + + return 1; /* remove this node */ +} + +static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) +{ + struct sdma_mmu_node *node = + container_of(mnode, struct sdma_mmu_node, rb); + + free_system_node(node); +} diff --git a/drivers/infiniband/hw/hfi1/pinning.h b/drivers/infiniband/hw/hfi1/pinning.h new file mode 100644 index 000000000000..a814a3aa9654 --- /dev/null +++ b/drivers/infiniband/hw/hfi1/pinning.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ +/* + * Copyright(c) 2023 Cornelis Networks, Inc. + */ +#ifndef _HFI1_PINNING_H +#define _HFI1_PINNING_H + +struct hfi1_user_sdma_pkt_q; +struct user_sdma_request; +struct user_sdma_txreq; +struct user_sdma_iovec; + +int hfi1_init_system_pinning(struct hfi1_user_sdma_pkt_q *pq); +void hfi1_free_system_pinning(struct hfi1_user_sdma_pkt_q *pq); +int hfi1_add_pages_to_sdma_packet(struct user_sdma_request *req, + struct user_sdma_txreq *tx, + struct user_sdma_iovec *iovec, + u32 *pkt_data_remaining); + +#endif /* _HFI1_PINNING_H */ diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c index 62e7dc9bea7b..dfea53e0fdeb 100644 --- a/drivers/infiniband/hw/hfi1/pio.c +++ b/drivers/infiniband/hw/hfi1/pio.c @@ -1893,9 +1893,7 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) vl_scontexts[i] = sc_per_vl + (extra > 0 ? 1 : 0); } /* build new map */ - newmap = kzalloc(sizeof(*newmap) + - roundup_pow_of_two(num_vls) * - sizeof(struct pio_map_elem *), + newmap = kzalloc(struct_size(newmap, map, roundup_pow_of_two(num_vls)), GFP_KERNEL); if (!newmap) goto bail; @@ -1910,9 +1908,8 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts) int sz = roundup_pow_of_two(vl_scontexts[i]); /* only allocate once */ - newmap->map[i] = kzalloc(sizeof(*newmap->map[i]) + - sz * sizeof(struct - send_context *), + newmap->map[i] = kzalloc(struct_size(newmap->map[i], + ksc, sz), GFP_KERNEL); if (!newmap->map[i]) goto bail; diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c index 02bd62b857b7..29ae7beb9b03 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.c +++ b/drivers/infiniband/hw/hfi1/user_sdma.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause /* - * Copyright(c) 2020 - Cornelis Networks, Inc. + * Copyright(c) 2020 - 2023 Cornelis Networks, Inc. * Copyright(c) 2015 - 2018 Intel Corporation. */ @@ -60,22 +60,6 @@ static int defer_packet_queue( uint seq, bool pkts_sent); static void activate_packet_queue(struct iowait *wait, int reason); -static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, - unsigned long len); -static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, - void *arg2, bool *stop); -static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode); - -static struct mmu_rb_ops sdma_rb_ops = { - .filter = sdma_rb_filter, - .evict = sdma_rb_evict, - .remove = sdma_rb_remove, -}; - -static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, - struct user_sdma_txreq *tx, - struct user_sdma_iovec *iovec, - u32 *pkt_remaining); static int defer_packet_queue( struct sdma_engine *sde, @@ -185,12 +169,9 @@ int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt, cq->nentries = hfi1_sdma_comp_ring_size; - ret = hfi1_mmu_rb_register(pq, &sdma_rb_ops, dd->pport->hfi1_wq, - &pq->handler); - if (ret) { - dd_dev_err(dd, "Failed to register with MMU %d", ret); + ret = hfi1_init_system_pinning(pq); + if (ret) goto pq_mmu_fail; - } rcu_assign_pointer(fd->pq, pq); fd->cq = cq; @@ -249,8 +230,7 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, pq->wait, !atomic_read(&pq->n_reqs)); kfree(pq->reqs); - if (pq->handler) - hfi1_mmu_rb_unregister(pq->handler); + hfi1_free_system_pinning(pq); bitmap_free(pq->req_in_use); kmem_cache_destroy(pq->txreq_cache); flush_pq_iowait(pq); @@ -821,8 +801,8 @@ static int user_sdma_send_pkts(struct user_sdma_request *req, u16 maxpkts) req->tidoffset += datalen; req->sent += datalen; while (datalen) { - ret = add_system_pages_to_sdma_packet(req, tx, iovec, - &datalen); + ret = hfi1_add_pages_to_sdma_packet(req, tx, iovec, + &datalen); if (ret) goto free_txreq; iovec = &req->iovs[req->iov_idx]; @@ -860,17 +840,6 @@ free_tx: return ret; } -static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages) -{ - struct evict_data evict_data; - struct mmu_rb_handler *handler = pq->handler; - - evict_data.cleared = 0; - evict_data.target = npages; - hfi1_mmu_rb_evict(handler, &evict_data); - return evict_data.cleared; -} - static int check_header_template(struct user_sdma_request *req, struct hfi1_pkt_header *hdr, u32 lrhlen, u32 datalen) @@ -1253,401 +1222,3 @@ static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq, trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt, idx, state, ret); } - -static void unpin_vector_pages(struct mm_struct *mm, struct page **pages, - unsigned int start, unsigned int npages) -{ - hfi1_release_user_pages(mm, pages + start, npages, false); - kfree(pages); -} - -static void free_system_node(struct sdma_mmu_node *node) -{ - if (node->npages) { - unpin_vector_pages(mm_from_sdma_node(node), node->pages, 0, - node->npages); - atomic_sub(node->npages, &node->pq->n_locked); - } - kfree(node); -} - -/* - * kref_get()'s an additional kref on the returned rb_node to prevent rb_node - * from being released until after rb_node is assigned to an SDMA descriptor - * (struct sdma_desc) under add_system_iovec_to_sdma_packet(), even if the - * virtual address range for rb_node is invalidated between now and then. - */ -static struct sdma_mmu_node *find_system_node(struct mmu_rb_handler *handler, - unsigned long start, - unsigned long end) -{ - struct mmu_rb_node *rb_node; - unsigned long flags; - - spin_lock_irqsave(&handler->lock, flags); - rb_node = hfi1_mmu_rb_get_first(handler, start, (end - start)); - if (!rb_node) { - spin_unlock_irqrestore(&handler->lock, flags); - return NULL; - } - - /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ - kref_get(&rb_node->refcount); - spin_unlock_irqrestore(&handler->lock, flags); - - return container_of(rb_node, struct sdma_mmu_node, rb); -} - -static int pin_system_pages(struct user_sdma_request *req, - uintptr_t start_address, size_t length, - struct sdma_mmu_node *node, int npages) -{ - struct hfi1_user_sdma_pkt_q *pq = req->pq; - int pinned, cleared; - struct page **pages; - - pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); - if (!pages) - return -ENOMEM; - -retry: - if (!hfi1_can_pin_pages(pq->dd, current->mm, atomic_read(&pq->n_locked), - npages)) { - SDMA_DBG(req, "Evicting: nlocked %u npages %u", - atomic_read(&pq->n_locked), npages); - cleared = sdma_cache_evict(pq, npages); - if (cleared >= npages) - goto retry; - } - - SDMA_DBG(req, "Acquire user pages start_address %lx node->npages %u npages %u", - start_address, node->npages, npages); - pinned = hfi1_acquire_user_pages(current->mm, start_address, npages, 0, - pages); - - if (pinned < 0) { - kfree(pages); - SDMA_DBG(req, "pinned %d", pinned); - return pinned; - } - if (pinned != npages) { - unpin_vector_pages(current->mm, pages, node->npages, pinned); - SDMA_DBG(req, "npages %u pinned %d", npages, pinned); - return -EFAULT; - } - node->rb.addr = start_address; - node->rb.len = length; - node->pages = pages; - node->npages = npages; - atomic_add(pinned, &pq->n_locked); - SDMA_DBG(req, "done. pinned %d", pinned); - return 0; -} - -/* - * kref refcount on *node_p will be 2 on successful addition: one kref from - * kref_init() for mmu_rb_handler and one kref to prevent *node_p from being - * released until after *node_p is assigned to an SDMA descriptor (struct - * sdma_desc) under add_system_iovec_to_sdma_packet(), even if the virtual - * address range for *node_p is invalidated between now and then. - */ -static int add_system_pinning(struct user_sdma_request *req, - struct sdma_mmu_node **node_p, - unsigned long start, unsigned long len) - -{ - struct hfi1_user_sdma_pkt_q *pq = req->pq; - struct sdma_mmu_node *node; - int ret; - - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (!node) - return -ENOMEM; - - /* First kref "moves" to mmu_rb_handler */ - kref_init(&node->rb.refcount); - - /* "safety" kref to prevent release before add_system_iovec_to_sdma_packet() */ - kref_get(&node->rb.refcount); - - node->pq = pq; - ret = pin_system_pages(req, start, len, node, PFN_DOWN(len)); - if (ret == 0) { - ret = hfi1_mmu_rb_insert(pq->handler, &node->rb); - if (ret) - free_system_node(node); - else - *node_p = node; - - return ret; - } - - kfree(node); - return ret; -} - -static int get_system_cache_entry(struct user_sdma_request *req, - struct sdma_mmu_node **node_p, - size_t req_start, size_t req_len) -{ - struct hfi1_user_sdma_pkt_q *pq = req->pq; - u64 start = ALIGN_DOWN(req_start, PAGE_SIZE); - u64 end = PFN_ALIGN(req_start + req_len); - struct mmu_rb_handler *handler = pq->handler; - int ret; - - if ((end - start) == 0) { - SDMA_DBG(req, - "Request for empty cache entry req_start %lx req_len %lx start %llx end %llx", - req_start, req_len, start, end); - return -EINVAL; - } - - SDMA_DBG(req, "req_start %lx req_len %lu", req_start, req_len); - - while (1) { - struct sdma_mmu_node *node = - find_system_node(handler, start, end); - u64 prepend_len = 0; - - SDMA_DBG(req, "node %p start %llx end %llu", node, start, end); - if (!node) { - ret = add_system_pinning(req, node_p, start, - end - start); - if (ret == -EEXIST) { - /* - * Another execution context has inserted a - * conficting entry first. - */ - continue; - } - return ret; - } - - if (node->rb.addr <= start) { - /* - * This entry covers at least part of the region. If it doesn't extend - * to the end, then this will be called again for the next segment. - */ - *node_p = node; - return 0; - } - - SDMA_DBG(req, "prepend: node->rb.addr %lx, node->rb.refcount %d", - node->rb.addr, kref_read(&node->rb.refcount)); - prepend_len = node->rb.addr - start; - - /* - * This node will not be returned, instead a new node - * will be. So release the reference. - */ - kref_put(&node->rb.refcount, hfi1_mmu_rb_release); - - /* Prepend a node to cover the beginning of the allocation */ - ret = add_system_pinning(req, node_p, start, prepend_len); - if (ret == -EEXIST) { - /* Another execution context has inserted a conficting entry first. */ - continue; - } - return ret; - } -} - -static void sdma_mmu_rb_node_get(void *ctx) -{ - struct mmu_rb_node *node = ctx; - - kref_get(&node->refcount); -} - -static void sdma_mmu_rb_node_put(void *ctx) -{ - struct sdma_mmu_node *node = ctx; - - kref_put(&node->rb.refcount, hfi1_mmu_rb_release); -} - -static int add_mapping_to_sdma_packet(struct user_sdma_request *req, - struct user_sdma_txreq *tx, - struct sdma_mmu_node *cache_entry, - size_t start, - size_t from_this_cache_entry) -{ - struct hfi1_user_sdma_pkt_q *pq = req->pq; - unsigned int page_offset; - unsigned int from_this_page; - size_t page_index; - void *ctx; - int ret; - - /* - * Because the cache may be more fragmented than the memory that is being accessed, - * it's not strictly necessary to have a descriptor per cache entry. - */ - - while (from_this_cache_entry) { - page_index = PFN_DOWN(start - cache_entry->rb.addr); - - if (page_index >= cache_entry->npages) { - SDMA_DBG(req, - "Request for page_index %zu >= cache_entry->npages %u", - page_index, cache_entry->npages); - return -EINVAL; - } - - page_offset = start - ALIGN_DOWN(start, PAGE_SIZE); - from_this_page = PAGE_SIZE - page_offset; - - if (from_this_page < from_this_cache_entry) { - ctx = NULL; - } else { - /* - * In the case they are equal the next line has no practical effect, - * but it's better to do a register to register copy than a conditional - * branch. - */ - from_this_page = from_this_cache_entry; - ctx = cache_entry; - } - - ret = sdma_txadd_page(pq->dd, &tx->txreq, - cache_entry->pages[page_index], - page_offset, from_this_page, - ctx, - sdma_mmu_rb_node_get, - sdma_mmu_rb_node_put); - if (ret) { - /* - * When there's a failure, the entire request is freed by - * user_sdma_send_pkts(). - */ - SDMA_DBG(req, - "sdma_txadd_page failed %d page_index %lu page_offset %u from_this_page %u", - ret, page_index, page_offset, from_this_page); - return ret; - } - start += from_this_page; - from_this_cache_entry -= from_this_page; - } - return 0; -} - -static int add_system_iovec_to_sdma_packet(struct user_sdma_request *req, - struct user_sdma_txreq *tx, - struct user_sdma_iovec *iovec, - size_t from_this_iovec) -{ - while (from_this_iovec > 0) { - struct sdma_mmu_node *cache_entry; - size_t from_this_cache_entry; - size_t start; - int ret; - - start = (uintptr_t)iovec->iov.iov_base + iovec->offset; - ret = get_system_cache_entry(req, &cache_entry, start, - from_this_iovec); - if (ret) { - SDMA_DBG(req, "pin system segment failed %d", ret); - return ret; - } - - from_this_cache_entry = cache_entry->rb.len - (start - cache_entry->rb.addr); - if (from_this_cache_entry > from_this_iovec) - from_this_cache_entry = from_this_iovec; - - ret = add_mapping_to_sdma_packet(req, tx, cache_entry, start, - from_this_cache_entry); - - /* - * Done adding cache_entry to zero or more sdma_desc. Can - * kref_put() the "safety" kref taken under - * get_system_cache_entry(). - */ - kref_put(&cache_entry->rb.refcount, hfi1_mmu_rb_release); - - if (ret) { - SDMA_DBG(req, "add system segment failed %d", ret); - return ret; - } - - iovec->offset += from_this_cache_entry; - from_this_iovec -= from_this_cache_entry; - } - - return 0; -} - -static int add_system_pages_to_sdma_packet(struct user_sdma_request *req, - struct user_sdma_txreq *tx, - struct user_sdma_iovec *iovec, - u32 *pkt_data_remaining) -{ - size_t remaining_to_add = *pkt_data_remaining; - /* - * Walk through iovec entries, ensure the associated pages - * are pinned and mapped, add data to the packet until no more - * data remains to be added. - */ - while (remaining_to_add > 0) { - struct user_sdma_iovec *cur_iovec; - size_t from_this_iovec; - int ret; - - cur_iovec = iovec; - from_this_iovec = iovec->iov.iov_len - iovec->offset; - - if (from_this_iovec > remaining_to_add) { - from_this_iovec = remaining_to_add; - } else { - /* The current iovec entry will be consumed by this pass. */ - req->iov_idx++; - iovec++; - } - - ret = add_system_iovec_to_sdma_packet(req, tx, cur_iovec, - from_this_iovec); - if (ret) - return ret; - - remaining_to_add -= from_this_iovec; - } - *pkt_data_remaining = remaining_to_add; - - return 0; -} - -static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr, - unsigned long len) -{ - return (bool)(node->addr == addr); -} - -/* - * Return 1 to remove the node from the rb tree and call the remove op. - * - * Called with the rb tree lock held. - */ -static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode, - void *evict_arg, bool *stop) -{ - struct sdma_mmu_node *node = - container_of(mnode, struct sdma_mmu_node, rb); - struct evict_data *evict_data = evict_arg; - - /* this node will be evicted, add its pages to our count */ - evict_data->cleared += node->npages; - - /* have enough pages been cleared? */ - if (evict_data->cleared >= evict_data->target) - *stop = true; - - return 1; /* remove this node */ -} - -static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode) -{ - struct sdma_mmu_node *node = - container_of(mnode, struct sdma_mmu_node, rb); - - free_system_node(node); -} diff --git a/drivers/infiniband/hw/hfi1/user_sdma.h b/drivers/infiniband/hw/hfi1/user_sdma.h index 548347d4c5bc..742ec1470cc5 100644 --- a/drivers/infiniband/hw/hfi1/user_sdma.h +++ b/drivers/infiniband/hw/hfi1/user_sdma.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */ /* - * Copyright(c) 2020 - Cornelis Networks, Inc. + * Copyright(c) 2023 - Cornelis Networks, Inc. * Copyright(c) 2015 - 2018 Intel Corporation. */ #ifndef _HFI1_USER_SDMA_H @@ -13,6 +13,8 @@ #include "iowait.h" #include "user_exp_rcv.h" #include "mmu_rb.h" +#include "pinning.h" +#include "sdma.h" /* The maximum number of Data io vectors per message/request */ #define MAX_VECTORS_PER_REQ 8 @@ -101,13 +103,6 @@ struct hfi1_user_sdma_comp_q { struct hfi1_sdma_comp_entry *comps; }; -struct sdma_mmu_node { - struct mmu_rb_node rb; - struct hfi1_user_sdma_pkt_q *pq; - struct page **pages; - unsigned int npages; -}; - struct user_sdma_iovec { struct list_head list; struct iovec iov; @@ -203,10 +198,4 @@ int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd, int hfi1_user_sdma_process_request(struct hfi1_filedata *fd, struct iovec *iovec, unsigned long dim, unsigned long *count); - -static inline struct mm_struct *mm_from_sdma_node(struct sdma_mmu_node *node) -{ - return node->rb.handler->mn.mm; -} - #endif /* _HFI1_USER_SDMA_H */ diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 84239b907de2..7f0d0288beb1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -97,6 +97,7 @@ #define HNS_ROCE_CQ_BANK_NUM 4 #define CQ_BANKID_SHIFT 2 +#define CQ_BANKID_MASK GENMASK(1, 0) enum { SERV_TYPE_RC, @@ -714,7 +715,6 @@ struct hns_roce_caps { u32 max_rq_sg; u32 rsv0; u32 num_qps; - u32 num_pi_qps; u32 reserved_qps; u32 num_srqs; u32 max_wqes; @@ -840,6 +840,32 @@ enum hns_roce_device_state { HNS_ROCE_DEVICE_STATE_UNINIT, }; +enum hns_roce_hw_pkt_stat_index { + HNS_ROCE_HW_RX_RC_PKT_CNT, + HNS_ROCE_HW_RX_UC_PKT_CNT, + HNS_ROCE_HW_RX_UD_PKT_CNT, + HNS_ROCE_HW_RX_XRC_PKT_CNT, + HNS_ROCE_HW_RX_PKT_CNT, + HNS_ROCE_HW_RX_ERR_PKT_CNT, + HNS_ROCE_HW_RX_CNP_PKT_CNT, + HNS_ROCE_HW_TX_RC_PKT_CNT, + HNS_ROCE_HW_TX_UC_PKT_CNT, + HNS_ROCE_HW_TX_UD_PKT_CNT, + HNS_ROCE_HW_TX_XRC_PKT_CNT, + HNS_ROCE_HW_TX_PKT_CNT, + HNS_ROCE_HW_TX_ERR_PKT_CNT, + HNS_ROCE_HW_TX_CNP_PKT_CNT, + HNS_ROCE_HW_TRP_GET_MPT_ERR_PKT_CNT, + HNS_ROCE_HW_TRP_GET_IRRL_ERR_PKT_CNT, + HNS_ROCE_HW_ECN_DB_CNT, + HNS_ROCE_HW_RX_BUF_CNT, + HNS_ROCE_HW_TRP_RX_SOF_CNT, + HNS_ROCE_HW_CQ_CQE_CNT, + HNS_ROCE_HW_CQ_POE_CNT, + HNS_ROCE_HW_CQ_NOTIFY_CNT, + HNS_ROCE_HW_CNT_TOTAL +}; + struct hns_roce_hw { int (*cmq_init)(struct hns_roce_dev *hr_dev); void (*cmq_exit)(struct hns_roce_dev *hr_dev); @@ -882,6 +908,8 @@ struct hns_roce_hw { int (*query_cqc)(struct hns_roce_dev *hr_dev, u32 cqn, void *buffer); int (*query_qpc)(struct hns_roce_dev *hr_dev, u32 qpn, void *buffer); int (*query_mpt)(struct hns_roce_dev *hr_dev, u32 key, void *buffer); + int (*query_hw_counter)(struct hns_roce_dev *hr_dev, + u64 *stats, u32 port, int *hw_counters); const struct ib_device_ops *hns_roce_dev_ops; const struct ib_device_ops *hns_roce_dev_srq_ops; }; @@ -1112,7 +1140,6 @@ int hns_roce_init_qp_table(struct hns_roce_dev *hr_dev); void hns_roce_init_srq_table(struct hns_roce_dev *hr_dev); void hns_roce_init_xrcd_table(struct hns_roce_dev *hr_dev); -void hns_roce_cleanup_eq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_cq_table(struct hns_roce_dev *hr_dev); void hns_roce_cleanup_qp_table(struct hns_roce_dev *hr_dev); @@ -1161,9 +1188,6 @@ int hns_roce_get_umem_bufs(struct hns_roce_dev *hr_dev, dma_addr_t *bufs, int hns_roce_create_srq(struct ib_srq *srq, struct ib_srq_init_attr *srq_init_attr, struct ib_udata *udata); -int hns_roce_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr, - enum ib_srq_attr_mask srq_attr_mask, - struct ib_udata *udata); int hns_roce_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata); int hns_roce_alloc_xrcd(struct ib_xrcd *ib_xrcd, struct ib_udata *udata); @@ -1206,7 +1230,6 @@ void hns_roce_cq_event(struct hns_roce_dev *hr_dev, u32 cqn, int event_type); void flush_cqe(struct hns_roce_dev *dev, struct hns_roce_qp *qp); void hns_roce_qp_event(struct hns_roce_dev *hr_dev, u32 qpn, int event_type); void hns_roce_srq_event(struct hns_roce_dev *hr_dev, u32 srqn, int event_type); -u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u32 port, int gid_index); void hns_roce_handle_device_err(struct hns_roce_dev *hr_dev); int hns_roce_init(struct hns_roce_dev *hr_dev); void hns_roce_exit(struct hns_roce_dev *hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index 47c0efed1821..c4ac06a33869 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -78,7 +78,7 @@ bool hns_roce_check_whether_mhop(struct hns_roce_dev *hr_dev, u32 type) return false; } - return hop_num ? true : false; + return hop_num; } static bool hns_roce_check_hem_null(struct hns_roce_hem **hem, u64 hem_idx, diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 8f7eb11066b4..d82daff2d9bd 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -750,7 +750,8 @@ out: qp->sq.head += nreq; qp->next_sge = sge_idx; - if (nreq == 1 && (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) + if (nreq == 1 && !ret && + (qp->en_flags & HNS_ROCE_QP_CAP_DIRECT_WQE)) write_dwqe(hr_dev, qp, wqe); else update_sq_db(hr_dev, qp); @@ -1612,6 +1613,56 @@ static int hns_roce_query_func_info(struct hns_roce_dev *hr_dev) return 0; } +static int hns_roce_hw_v2_query_counter(struct hns_roce_dev *hr_dev, + u64 *stats, u32 port, int *num_counters) +{ +#define CNT_PER_DESC 3 + struct hns_roce_cmq_desc *desc; + int bd_idx, cnt_idx; + __le64 *cnt_data; + int desc_num; + int ret; + int i; + + if (port > hr_dev->caps.num_ports) + return -EINVAL; + + desc_num = DIV_ROUND_UP(HNS_ROCE_HW_CNT_TOTAL, CNT_PER_DESC); + desc = kcalloc(desc_num, sizeof(*desc), GFP_KERNEL); + if (!desc) + return -ENOMEM; + + for (i = 0; i < desc_num; i++) { + hns_roce_cmq_setup_basic_desc(&desc[i], + HNS_ROCE_OPC_QUERY_COUNTER, true); + if (i != desc_num - 1) + desc[i].flag |= cpu_to_le16(HNS_ROCE_CMD_FLAG_NEXT); + } + + ret = hns_roce_cmq_send(hr_dev, desc, desc_num); + if (ret) { + ibdev_err(&hr_dev->ib_dev, + "failed to get counter, ret = %d.\n", ret); + goto err_out; + } + + for (i = 0; i < HNS_ROCE_HW_CNT_TOTAL && i < *num_counters; i++) { + bd_idx = i / CNT_PER_DESC; + if (!(desc[bd_idx].flag & HNS_ROCE_CMD_FLAG_NEXT) && + bd_idx != HNS_ROCE_HW_CNT_TOTAL / CNT_PER_DESC) + break; + + cnt_data = (__le64 *)&desc[bd_idx].data[0]; + cnt_idx = i % CNT_PER_DESC; + stats[i] = le64_to_cpu(cnt_data[cnt_idx]); + } + *num_counters = i; + +err_out: + kfree(desc); + return ret; +} + static int hns_roce_config_global_param(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; @@ -1680,29 +1731,6 @@ static int load_func_res_caps(struct hns_roce_dev *hr_dev, bool is_vf) return 0; } -static int load_ext_cfg_caps(struct hns_roce_dev *hr_dev, bool is_vf) -{ - struct hns_roce_cmq_desc desc; - struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; - struct hns_roce_caps *caps = &hr_dev->caps; - u32 func_num, qp_num; - int ret; - - hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, true); - ret = hns_roce_cmq_send(hr_dev, &desc, 1); - if (ret) - return ret; - - func_num = is_vf ? 1 : max_t(u32, 1, hr_dev->func_num); - qp_num = hr_reg_read(req, EXT_CFG_QP_PI_NUM) / func_num; - caps->num_pi_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM); - - qp_num = hr_reg_read(req, EXT_CFG_QP_NUM) / func_num; - caps->num_qps = round_down(qp_num, HNS_ROCE_QP_BANK_NUM); - - return 0; -} - static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev) { struct hns_roce_cmq_desc desc; @@ -1723,50 +1751,37 @@ static int load_pf_timer_res_caps(struct hns_roce_dev *hr_dev) return 0; } -static int query_func_resource_caps(struct hns_roce_dev *hr_dev, bool is_vf) +static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; int ret; - ret = load_func_res_caps(hr_dev, is_vf); + ret = load_func_res_caps(hr_dev, false); if (ret) { - dev_err(dev, "failed to load res caps, ret = %d (%s).\n", ret, - is_vf ? "vf" : "pf"); + dev_err(dev, "failed to load pf res caps, ret = %d.\n", ret); return ret; } - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - ret = load_ext_cfg_caps(hr_dev, is_vf); - if (ret) - dev_err(dev, "failed to load ext cfg, ret = %d (%s).\n", - ret, is_vf ? "vf" : "pf"); - } + ret = load_pf_timer_res_caps(hr_dev); + if (ret) + dev_err(dev, "failed to load pf timer resource, ret = %d.\n", + ret); return ret; } -static int hns_roce_query_pf_resource(struct hns_roce_dev *hr_dev) +static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev) { struct device *dev = hr_dev->dev; int ret; - ret = query_func_resource_caps(hr_dev, false); + ret = load_func_res_caps(hr_dev, true); if (ret) - return ret; - - ret = load_pf_timer_res_caps(hr_dev); - if (ret) - dev_err(dev, "failed to load pf timer resource, ret = %d.\n", - ret); + dev_err(dev, "failed to load vf res caps, ret = %d.\n", ret); return ret; } -static int hns_roce_query_vf_resource(struct hns_roce_dev *hr_dev) -{ - return query_func_resource_caps(hr_dev, true); -} - static int __hns_roce_set_vf_switch_param(struct hns_roce_dev *hr_dev, u32 vf_id) { @@ -1849,24 +1864,6 @@ static int config_vf_hem_resource(struct hns_roce_dev *hr_dev, int vf_id) return hns_roce_cmq_send(hr_dev, desc, 2); } -static int config_vf_ext_resource(struct hns_roce_dev *hr_dev, u32 vf_id) -{ - struct hns_roce_cmq_desc desc; - struct hns_roce_cmq_req *req = (struct hns_roce_cmq_req *)desc.data; - struct hns_roce_caps *caps = &hr_dev->caps; - - hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_EXT_CFG, false); - - hr_reg_write(req, EXT_CFG_VF_ID, vf_id); - - hr_reg_write(req, EXT_CFG_QP_PI_NUM, caps->num_pi_qps); - hr_reg_write(req, EXT_CFG_QP_PI_IDX, vf_id * caps->num_pi_qps); - hr_reg_write(req, EXT_CFG_QP_NUM, caps->num_qps); - hr_reg_write(req, EXT_CFG_QP_IDX, vf_id * caps->num_qps); - - return hns_roce_cmq_send(hr_dev, &desc, 1); -} - static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) { u32 func_num = max_t(u32, 1, hr_dev->func_num); @@ -1881,16 +1878,6 @@ static int hns_roce_alloc_vf_resource(struct hns_roce_dev *hr_dev) vf_id, ret); return ret; } - - if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { - ret = config_vf_ext_resource(hr_dev, vf_id); - if (ret) { - dev_err(hr_dev->dev, - "failed to config vf-%u ext res, ret = %d.\n", - vf_id, ret); - return ret; - } - } } return 0; @@ -2075,9 +2062,6 @@ static void apply_func_caps(struct hns_roce_dev *hr_dev) caps->qpc_timer_hop_num = HNS_ROCE_HOP_NUM_0; caps->cqc_timer_hop_num = HNS_ROCE_HOP_NUM_0; - caps->num_xrcds = HNS_ROCE_V2_MAX_XRCD_NUM; - caps->reserved_xrcds = HNS_ROCE_V2_RSV_XRCD_NUM; - caps->num_srqwqe_segs = HNS_ROCE_V2_MAX_SRQWQE_SEGS; caps->num_idx_segs = HNS_ROCE_V2_MAX_IDX_SEGS; @@ -2200,6 +2184,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->num_cqs = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_CQS); caps->gid_table_len[0] = hr_reg_read(resp_c, PF_CAPS_C_MAX_GID); caps->max_cqes = 1 << hr_reg_read(resp_c, PF_CAPS_C_CQ_DEPTH); + caps->num_xrcds = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_XRCDS); caps->num_mtpts = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_MRWS); caps->num_qps = 1 << hr_reg_read(resp_c, PF_CAPS_C_NUM_QPS); caps->max_qp_init_rdma = hr_reg_read(resp_c, PF_CAPS_C_MAX_ORD); @@ -2220,6 +2205,7 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev) caps->reserved_mrws = hr_reg_read(resp_e, PF_CAPS_E_RSV_MRWS); caps->chunk_sz = 1 << hr_reg_read(resp_e, PF_CAPS_E_CHUNK_SIZE_SHIFT); caps->reserved_cqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_CQS); + caps->reserved_xrcds = hr_reg_read(resp_e, PF_CAPS_E_RSV_XRCDS); caps->reserved_srqs = hr_reg_read(resp_e, PF_CAPS_E_RSV_SRQS); caps->reserved_lkey = hr_reg_read(resp_e, PF_CAPS_E_RSV_LKEYS); @@ -6646,6 +6632,7 @@ static const struct hns_roce_hw hns_roce_hw_v2 = { .query_cqc = hns_roce_v2_query_cqc, .query_qpc = hns_roce_v2_query_qpc, .query_mpt = hns_roce_v2_query_mpt, + .query_hw_counter = hns_roce_hw_v2_query_counter, .hns_roce_dev_ops = &hns_roce_v2_dev_ops, .hns_roce_dev_srq_ops = &hns_roce_v2_dev_srq_ops, }; @@ -6722,14 +6709,14 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) ret = hns_roce_init(hr_dev); if (ret) { dev_err(hr_dev->dev, "RoCE Engine init failed!\n"); - goto error_failed_cfg; + goto error_failed_roce_init; } if (hr_dev->pci_dev->revision == PCI_REVISION_ID_HIP08) { ret = free_mr_init(hr_dev); if (ret) { dev_err(hr_dev->dev, "failed to init free mr!\n"); - goto error_failed_roce_init; + goto error_failed_free_mr_init; } } @@ -6737,10 +6724,10 @@ static int __hns_roce_hw_v2_init_instance(struct hnae3_handle *handle) return 0; -error_failed_roce_init: +error_failed_free_mr_init: hns_roce_exit(hr_dev); -error_failed_cfg: +error_failed_roce_init: kfree(hr_dev->priv); error_failed_kzalloc: diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index 7033eae2407c..cd97cbee682a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -42,7 +42,6 @@ #define HNS_ROCE_V2_MAX_SRQWQE_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_IDX_SEGS 0x1000000 #define HNS_ROCE_V2_MAX_XRCD_NUM 0x1000000 -#define HNS_ROCE_V2_RSV_XRCD_NUM 0 #define HNS_ROCE_V2_QP_ACK_TIMEOUT_OFS_HIP08 10 @@ -199,6 +198,7 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_HW_VER = 0x8000, HNS_ROCE_OPC_CFG_GLOBAL_PARAM = 0x8001, HNS_ROCE_OPC_ALLOC_PF_RES = 0x8004, + HNS_ROCE_OPC_QUERY_COUNTER = 0x8206, HNS_ROCE_OPC_QUERY_PF_RES = 0x8400, HNS_ROCE_OPC_ALLOC_VF_RES = 0x8401, HNS_ROCE_OPC_CFG_EXT_LLM = 0x8403, @@ -220,7 +220,6 @@ enum hns_roce_opcode_type { HNS_ROCE_OPC_QUERY_VF_RES = 0x850e, HNS_ROCE_OPC_CFG_GMV_TBL = 0x850f, HNS_ROCE_OPC_CFG_GMV_BT = 0x8510, - HNS_ROCE_OPC_EXT_CFG = 0x8512, HNS_ROCE_QUERY_RAM_ECC = 0x8513, HNS_SWITCH_PARAMETER_CFG = 0x1033, }; @@ -957,15 +956,6 @@ struct hns_roce_func_clear { #define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_INTERVAL 40 #define HNS_ROCE_V2_READ_FUNC_CLEAR_FLAG_FAIL_WAIT 20 -/* Fields of HNS_ROCE_OPC_EXT_CFG */ -#define EXT_CFG_VF_ID CMQ_REQ_FIELD_LOC(31, 0) -#define EXT_CFG_QP_PI_IDX CMQ_REQ_FIELD_LOC(45, 32) -#define EXT_CFG_QP_PI_NUM CMQ_REQ_FIELD_LOC(63, 48) -#define EXT_CFG_QP_NUM CMQ_REQ_FIELD_LOC(87, 64) -#define EXT_CFG_QP_IDX CMQ_REQ_FIELD_LOC(119, 96) -#define EXT_CFG_LLM_IDX CMQ_REQ_FIELD_LOC(139, 128) -#define EXT_CFG_LLM_NUM CMQ_REQ_FIELD_LOC(156, 144) - #define CFG_LLM_A_BA_L CMQ_REQ_FIELD_LOC(31, 0) #define CFG_LLM_A_BA_H CMQ_REQ_FIELD_LOC(63, 32) #define CFG_LLM_A_DEPTH CMQ_REQ_FIELD_LOC(76, 64) @@ -1202,6 +1192,7 @@ struct hns_roce_query_pf_caps_c { #define PF_CAPS_C_NUM_CQS PF_CAPS_C_FIELD_LOC(51, 32) #define PF_CAPS_C_MAX_GID PF_CAPS_C_FIELD_LOC(60, 52) #define PF_CAPS_C_CQ_DEPTH PF_CAPS_C_FIELD_LOC(86, 64) +#define PF_CAPS_C_NUM_XRCDS PF_CAPS_C_FIELD_LOC(91, 87) #define PF_CAPS_C_NUM_MRWS PF_CAPS_C_FIELD_LOC(115, 96) #define PF_CAPS_C_NUM_QPS PF_CAPS_C_FIELD_LOC(147, 128) #define PF_CAPS_C_MAX_ORD PF_CAPS_C_FIELD_LOC(155, 148) @@ -1260,6 +1251,7 @@ struct hns_roce_query_pf_caps_e { #define PF_CAPS_E_RSV_MRWS PF_CAPS_E_FIELD_LOC(19, 0) #define PF_CAPS_E_CHUNK_SIZE_SHIFT PF_CAPS_E_FIELD_LOC(31, 20) #define PF_CAPS_E_RSV_CQS PF_CAPS_E_FIELD_LOC(51, 32) +#define PF_CAPS_E_RSV_XRCDS PF_CAPS_E_FIELD_LOC(63, 52) #define PF_CAPS_E_RSV_SRQS PF_CAPS_E_FIELD_LOC(83, 64) #define PF_CAPS_E_RSV_LKEYS PF_CAPS_E_FIELD_LOC(115, 96) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index 485e110ca433..d9d546cdef52 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -219,6 +219,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num, unsigned long flags; enum ib_mtu mtu; u32 port; + int ret; port = port_num - 1; @@ -231,8 +232,10 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u32 port_num, IB_PORT_BOOT_MGMT_SUP; props->max_msg_sz = HNS_ROCE_MAX_MSG_LEN; props->pkey_tbl_len = 1; - props->active_width = IB_WIDTH_4X; - props->active_speed = 1; + ret = ib_get_eth_speed(ib_dev, port_num, &props->active_speed, + &props->active_width); + if (ret) + ibdev_warn(ib_dev, "failed to get speed, ret = %d.\n", ret); spin_lock_irqsave(&hr_dev->iboe.lock, flags); @@ -512,6 +515,83 @@ static void hns_roce_get_fw_ver(struct ib_device *device, char *str) sub_minor); } +#define HNS_ROCE_HW_CNT(ename, cname) \ + [HNS_ROCE_HW_##ename##_CNT].name = cname + +static const struct rdma_stat_desc hns_roce_port_stats_descs[] = { + HNS_ROCE_HW_CNT(RX_RC_PKT, "rx_rc_pkt"), + HNS_ROCE_HW_CNT(RX_UC_PKT, "rx_uc_pkt"), + HNS_ROCE_HW_CNT(RX_UD_PKT, "rx_ud_pkt"), + HNS_ROCE_HW_CNT(RX_XRC_PKT, "rx_xrc_pkt"), + HNS_ROCE_HW_CNT(RX_PKT, "rx_pkt"), + HNS_ROCE_HW_CNT(RX_ERR_PKT, "rx_err_pkt"), + HNS_ROCE_HW_CNT(RX_CNP_PKT, "rx_cnp_pkt"), + HNS_ROCE_HW_CNT(TX_RC_PKT, "tx_rc_pkt"), + HNS_ROCE_HW_CNT(TX_UC_PKT, "tx_uc_pkt"), + HNS_ROCE_HW_CNT(TX_UD_PKT, "tx_ud_pkt"), + HNS_ROCE_HW_CNT(TX_XRC_PKT, "tx_xrc_pkt"), + HNS_ROCE_HW_CNT(TX_PKT, "tx_pkt"), + HNS_ROCE_HW_CNT(TX_ERR_PKT, "tx_err_pkt"), + HNS_ROCE_HW_CNT(TX_CNP_PKT, "tx_cnp_pkt"), + HNS_ROCE_HW_CNT(TRP_GET_MPT_ERR_PKT, "trp_get_mpt_err_pkt"), + HNS_ROCE_HW_CNT(TRP_GET_IRRL_ERR_PKT, "trp_get_irrl_err_pkt"), + HNS_ROCE_HW_CNT(ECN_DB, "ecn_doorbell"), + HNS_ROCE_HW_CNT(RX_BUF, "rx_buffer"), + HNS_ROCE_HW_CNT(TRP_RX_SOF, "trp_rx_sof"), + HNS_ROCE_HW_CNT(CQ_CQE, "cq_cqe"), + HNS_ROCE_HW_CNT(CQ_POE, "cq_poe"), + HNS_ROCE_HW_CNT(CQ_NOTIFY, "cq_notify"), +}; + +static struct rdma_hw_stats *hns_roce_alloc_hw_port_stats( + struct ib_device *device, u32 port_num) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(device); + u32 port = port_num - 1; + + if (port > hr_dev->caps.num_ports) { + ibdev_err(device, "invalid port num.\n"); + return NULL; + } + + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 || + hr_dev->is_vf) + return NULL; + + return rdma_alloc_hw_stats_struct(hns_roce_port_stats_descs, + ARRAY_SIZE(hns_roce_port_stats_descs), + RDMA_HW_STATS_DEFAULT_LIFESPAN); +} + +static int hns_roce_get_hw_stats(struct ib_device *device, + struct rdma_hw_stats *stats, + u32 port, int index) +{ + struct hns_roce_dev *hr_dev = to_hr_dev(device); + int num_counters = HNS_ROCE_HW_CNT_TOTAL; + int ret; + + if (port == 0) + return 0; + + if (port > hr_dev->caps.num_ports) + return -EINVAL; + + if (hr_dev->pci_dev->revision <= PCI_REVISION_ID_HIP08 || + hr_dev->is_vf) + return -EOPNOTSUPP; + + ret = hr_dev->hw->query_hw_counter(hr_dev, stats->value, port, + &num_counters); + if (ret) { + ibdev_err(device, "failed to query hw counter, ret = %d\n", + ret); + return ret; + } + + return num_counters; +} + static void hns_roce_unregister_device(struct hns_roce_dev *hr_dev) { struct hns_roce_ib_iboe *iboe = &hr_dev->iboe; @@ -554,6 +634,8 @@ static const struct ib_device_ops hns_roce_dev_ops = { .query_pkey = hns_roce_query_pkey, .query_port = hns_roce_query_port, .reg_user_mr = hns_roce_reg_user_mr, + .alloc_hw_port_stats = hns_roce_alloc_hw_port_stats, + .get_hw_stats = hns_roce_get_hw_stats, INIT_RDMA_OBJ_SIZE(ib_ah, hns_roce_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, hns_roce_cq, ib_cq), diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index d855a917f4cf..cdc1c6de43a1 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -170,14 +170,29 @@ static void hns_roce_ib_qp_event(struct hns_roce_qp *hr_qp, } } -static u8 get_least_load_bankid_for_qp(struct hns_roce_bank *bank) +static u8 get_affinity_cq_bank(u8 qp_bank) { - u32 least_load = bank[0].inuse; + return (qp_bank >> 1) & CQ_BANKID_MASK; +} + +static u8 get_least_load_bankid_for_qp(struct ib_qp_init_attr *init_attr, + struct hns_roce_bank *bank) +{ +#define INVALID_LOAD_QPNUM 0xFFFFFFFF + struct ib_cq *scq = init_attr->send_cq; + u32 least_load = INVALID_LOAD_QPNUM; + unsigned long cqn = 0; u8 bankid = 0; u32 bankcnt; u8 i; - for (i = 1; i < HNS_ROCE_QP_BANK_NUM; i++) { + if (scq) + cqn = to_hr_cq(scq)->cqn; + + for (i = 0; i < HNS_ROCE_QP_BANK_NUM; i++) { + if (scq && (get_affinity_cq_bank(i) != (cqn & CQ_BANKID_MASK))) + continue; + bankcnt = bank[i].inuse; if (bankcnt < least_load) { least_load = bankcnt; @@ -209,7 +224,8 @@ static int alloc_qpn_with_bankid(struct hns_roce_bank *bank, u8 bankid, return 0; } -static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) +static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, + struct ib_qp_init_attr *init_attr) { struct hns_roce_qp_table *qp_table = &hr_dev->qp_table; unsigned long num = 0; @@ -220,7 +236,7 @@ static int alloc_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) num = 1; } else { mutex_lock(&qp_table->bank_mutex); - bankid = get_least_load_bankid_for_qp(qp_table->bank); + bankid = get_least_load_bankid_for_qp(init_attr, qp_table->bank); ret = alloc_qpn_with_bankid(&qp_table->bank[bankid], bankid, &num); @@ -1082,7 +1098,7 @@ static int hns_roce_create_qp_common(struct hns_roce_dev *hr_dev, goto err_buf; } - ret = alloc_qpn(hr_dev, hr_qp); + ret = alloc_qpn(hr_dev, hr_qp, init_attr); if (ret) { ibdev_err(ibdev, "failed to alloc QPN, ret = %d.\n", ret); goto err_qpn; diff --git a/drivers/infiniband/hw/hns/hns_roce_restrack.c b/drivers/infiniband/hw/hns/hns_roce_restrack.c index 989a2af2e938..081a01de3055 100644 --- a/drivers/infiniband/hw/hns/hns_roce_restrack.c +++ b/drivers/infiniband/hw/hns/hns_roce_restrack.c @@ -9,8 +9,6 @@ #include "hns_roce_device.h" #include "hns_roce_hw_v2.h" -#define MAX_ENTRY_NUM 256 - int hns_roce_fill_res_cq_entry(struct sk_buff *msg, struct ib_cq *ib_cq) { struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); @@ -47,8 +45,6 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) struct hns_roce_dev *hr_dev = to_hr_dev(ib_cq->device); struct hns_roce_cq *hr_cq = to_hr_cq(ib_cq); struct hns_roce_v2_cq_context context; - u32 data[MAX_ENTRY_NUM] = {}; - int offset = 0; int ret; if (!hr_dev->hw->query_cqc) @@ -58,23 +54,7 @@ int hns_roce_fill_res_cq_entry_raw(struct sk_buff *msg, struct ib_cq *ib_cq) if (ret) return -EINVAL; - data[offset++] = hr_reg_read(&context, CQC_CQ_ST); - data[offset++] = hr_reg_read(&context, CQC_SHIFT); - data[offset++] = hr_reg_read(&context, CQC_CQE_SIZE); - data[offset++] = hr_reg_read(&context, CQC_CQE_CNT); - data[offset++] = hr_reg_read(&context, CQC_CQ_PRODUCER_IDX); - data[offset++] = hr_reg_read(&context, CQC_CQ_CONSUMER_IDX); - data[offset++] = hr_reg_read(&context, CQC_DB_RECORD_EN); - data[offset++] = hr_reg_read(&context, CQC_ARM_ST); - data[offset++] = hr_reg_read(&context, CQC_CMD_SN); - data[offset++] = hr_reg_read(&context, CQC_CEQN); - data[offset++] = hr_reg_read(&context, CQC_CQ_MAX_CNT); - data[offset++] = hr_reg_read(&context, CQC_CQ_PERIOD); - data[offset++] = hr_reg_read(&context, CQC_CQE_HOP_NUM); - data[offset++] = hr_reg_read(&context, CQC_CQE_BAR_PG_SZ); - data[offset++] = hr_reg_read(&context, CQC_CQE_BUF_PG_SZ); - - ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context); return ret; } @@ -118,8 +98,6 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) struct hns_roce_dev *hr_dev = to_hr_dev(ib_qp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ib_qp); struct hns_roce_v2_qp_context context; - u32 data[MAX_ENTRY_NUM] = {}; - int offset = 0; int ret; if (!hr_dev->hw->query_qpc) @@ -129,42 +107,7 @@ int hns_roce_fill_res_qp_entry_raw(struct sk_buff *msg, struct ib_qp *ib_qp) if (ret) return -EINVAL; - data[offset++] = hr_reg_read(&context, QPC_QP_ST); - data[offset++] = hr_reg_read(&context, QPC_ERR_TYPE); - data[offset++] = hr_reg_read(&context, QPC_CHECK_FLG); - data[offset++] = hr_reg_read(&context, QPC_SRQ_EN); - data[offset++] = hr_reg_read(&context, QPC_SRQN); - data[offset++] = hr_reg_read(&context, QPC_QKEY_XRCD); - data[offset++] = hr_reg_read(&context, QPC_TX_CQN); - data[offset++] = hr_reg_read(&context, QPC_RX_CQN); - data[offset++] = hr_reg_read(&context, QPC_SQ_PRODUCER_IDX); - data[offset++] = hr_reg_read(&context, QPC_SQ_CONSUMER_IDX); - data[offset++] = hr_reg_read(&context, QPC_RQ_RECORD_EN); - data[offset++] = hr_reg_read(&context, QPC_RQ_PRODUCER_IDX); - data[offset++] = hr_reg_read(&context, QPC_RQ_CONSUMER_IDX); - data[offset++] = hr_reg_read(&context, QPC_SQ_SHIFT); - data[offset++] = hr_reg_read(&context, QPC_RQWS); - data[offset++] = hr_reg_read(&context, QPC_RQ_SHIFT); - data[offset++] = hr_reg_read(&context, QPC_SGE_SHIFT); - data[offset++] = hr_reg_read(&context, QPC_SQ_HOP_NUM); - data[offset++] = hr_reg_read(&context, QPC_RQ_HOP_NUM); - data[offset++] = hr_reg_read(&context, QPC_SGE_HOP_NUM); - data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BA_PG_SZ); - data[offset++] = hr_reg_read(&context, QPC_WQE_SGE_BUF_PG_SZ); - data[offset++] = hr_reg_read(&context, QPC_RETRY_NUM_INIT); - data[offset++] = hr_reg_read(&context, QPC_RETRY_CNT); - data[offset++] = hr_reg_read(&context, QPC_SQ_CUR_PSN); - data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_PSN); - data[offset++] = hr_reg_read(&context, QPC_SQ_FLUSH_IDX); - data[offset++] = hr_reg_read(&context, QPC_SQ_MAX_IDX); - data[offset++] = hr_reg_read(&context, QPC_SQ_TX_ERR); - data[offset++] = hr_reg_read(&context, QPC_SQ_RX_ERR); - data[offset++] = hr_reg_read(&context, QPC_RQ_RX_ERR); - data[offset++] = hr_reg_read(&context, QPC_RQ_TX_ERR); - data[offset++] = hr_reg_read(&context, QPC_RQ_CQE_IDX); - data[offset++] = hr_reg_read(&context, QPC_RQ_RTY_TX_ERR); - - ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context); return ret; } @@ -204,8 +147,6 @@ int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) struct hns_roce_dev *hr_dev = to_hr_dev(ib_mr->device); struct hns_roce_mr *hr_mr = to_hr_mr(ib_mr); struct hns_roce_v2_mpt_entry context; - u32 data[MAX_ENTRY_NUM] = {}; - int offset = 0; int ret; if (!hr_dev->hw->query_mpt) @@ -215,17 +156,7 @@ int hns_roce_fill_res_mr_entry_raw(struct sk_buff *msg, struct ib_mr *ib_mr) if (ret) return -EINVAL; - data[offset++] = hr_reg_read(&context, MPT_ST); - data[offset++] = hr_reg_read(&context, MPT_PD); - data[offset++] = hr_reg_read(&context, MPT_LKEY); - data[offset++] = hr_reg_read(&context, MPT_LEN_L); - data[offset++] = hr_reg_read(&context, MPT_LEN_H); - data[offset++] = hr_reg_read(&context, MPT_PBL_SIZE); - data[offset++] = hr_reg_read(&context, MPT_PBL_HOP_NUM); - data[offset++] = hr_reg_read(&context, MPT_PBL_BA_PG_SZ); - data[offset++] = hr_reg_read(&context, MPT_PBL_BUF_PG_SZ); - - ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, offset * sizeof(u32), data); + ret = nla_put(msg, RDMA_NLDEV_ATTR_RES_RAW, sizeof(context), &context); return ret; } diff --git a/drivers/infiniband/hw/irdma/cm.c b/drivers/infiniband/hw/irdma/cm.c index 70009b970e08..42d1e9771066 100644 --- a/drivers/infiniband/hw/irdma/cm.c +++ b/drivers/infiniband/hw/irdma/cm.c @@ -1555,22 +1555,56 @@ static int irdma_del_multiple_qhash(struct irdma_device *iwdev, return ret; } +static u8 irdma_iw_get_vlan_prio(u32 *loc_addr, u8 prio, bool ipv4) +{ + struct net_device *ndev = NULL; + + rcu_read_lock(); + if (ipv4) { + ndev = ip_dev_find(&init_net, htonl(loc_addr[0])); + } else if (IS_ENABLED(CONFIG_IPV6)) { + struct net_device *ip_dev; + struct in6_addr laddr6; + + irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, loc_addr); + + for_each_netdev_rcu (&init_net, ip_dev) { + if (ipv6_chk_addr(&init_net, &laddr6, ip_dev, 1)) { + ndev = ip_dev; + break; + } + } + } + + if (!ndev) + goto done; + if (is_vlan_dev(ndev)) + prio = (vlan_dev_get_egress_qos_mask(ndev, prio) & VLAN_PRIO_MASK) + >> VLAN_PRIO_SHIFT; + if (ipv4) + dev_put(ndev); + +done: + rcu_read_unlock(); + + return prio; +} + /** - * irdma_netdev_vlan_ipv6 - Gets the netdev and mac + * irdma_get_vlan_mac_ipv6 - Gets the vlan and mac * @addr: local IPv6 address * @vlan_id: vlan id for the given IPv6 address * @mac: mac address for the given IPv6 address * - * Returns the net_device of the IPv6 address and also sets the - * vlan id and mac for that address. + * Returns the vlan id and mac for an IPv6 address. */ -struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) +void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) { struct net_device *ip_dev = NULL; struct in6_addr laddr6; if (!IS_ENABLED(CONFIG_IPV6)) - return NULL; + return; irdma_copy_ip_htonl(laddr6.in6_u.u6_addr32, addr); if (vlan_id) @@ -1589,8 +1623,6 @@ struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac) } } rcu_read_unlock(); - - return ip_dev; } /** @@ -1667,6 +1699,12 @@ static int irdma_add_mqh_6(struct irdma_device *iwdev, ifp->addr.in6_u.u6_addr32); memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); + if (!iwdev->vsi.dscp_mode) + cm_info->user_pri = + irdma_iw_get_vlan_prio(child_listen_node->loc_addr, + cm_info->user_pri, + false); + ret = irdma_manage_qhash(iwdev, cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, @@ -1751,6 +1789,11 @@ static int irdma_add_mqh_4(struct irdma_device *iwdev, ntohl(ifa->ifa_address); memcpy(cm_info->loc_addr, child_listen_node->loc_addr, sizeof(cm_info->loc_addr)); + if (!iwdev->vsi.dscp_mode) + cm_info->user_pri = + irdma_iw_get_vlan_prio(child_listen_node->loc_addr, + cm_info->user_pri, + true); ret = irdma_manage_qhash(iwdev, cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, @@ -2219,6 +2262,10 @@ irdma_make_cm_node(struct irdma_cm_core *cm_core, struct irdma_device *iwdev, } else { cm_node->tos = max(listener->tos, cm_info->tos); cm_node->user_pri = rt_tos2priority(cm_node->tos); + cm_node->user_pri = + irdma_iw_get_vlan_prio(cm_info->loc_addr, + cm_node->user_pri, + cm_info->ipv4); } ibdev_dbg(&iwdev->ibdev, "DCB: listener: TOS:[%d] UP:[%d]\n", cm_node->tos, @@ -3577,7 +3624,6 @@ void irdma_free_lsmm_rsrc(struct irdma_qp *iwqp) iwqp->ietf_mem.size, iwqp->ietf_mem.va, iwqp->ietf_mem.pa); iwqp->ietf_mem.va = NULL; - iwqp->ietf_mem.va = NULL; } } @@ -3617,8 +3663,8 @@ int irdma_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) cm_node->vlan_id = irdma_get_vlan_ipv4(cm_node->loc_addr); } else { cm_node->ipv4 = false; - irdma_netdev_vlan_ipv6(cm_node->loc_addr, &cm_node->vlan_id, - NULL); + irdma_get_vlan_mac_ipv6(cm_node->loc_addr, &cm_node->vlan_id, + NULL); } ibdev_dbg(&iwdev->ibdev, "CM: Accept vlan_id=%d\n", cm_node->vlan_id); @@ -3826,17 +3872,21 @@ int irdma_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) raddr6->sin6_addr.in6_u.u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); cm_info.rem_port = ntohs(raddr6->sin6_port); - irdma_netdev_vlan_ipv6(cm_info.loc_addr, &cm_info.vlan_id, - NULL); + irdma_get_vlan_mac_ipv6(cm_info.loc_addr, &cm_info.vlan_id, + NULL); } cm_info.cm_id = cm_id; cm_info.qh_qpid = iwdev->vsi.ilq->qp_id; cm_info.tos = cm_id->tos; - if (iwdev->vsi.dscp_mode) + if (iwdev->vsi.dscp_mode) { cm_info.user_pri = iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(cm_info.tos)]; - else + } else { cm_info.user_pri = rt_tos2priority(cm_id->tos); + cm_info.user_pri = irdma_iw_get_vlan_prio(cm_info.loc_addr, + cm_info.user_pri, + cm_info.ipv4); + } if (iwqp->sc_qp.dev->ws_add(iwqp->sc_qp.vsi, cm_info.user_pri)) return -ENOMEM; @@ -3952,8 +4002,8 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) laddr6->sin6_addr.in6_u.u6_addr32); cm_info.loc_port = ntohs(laddr6->sin6_port); if (ipv6_addr_type(&laddr6->sin6_addr) != IPV6_ADDR_ANY) { - irdma_netdev_vlan_ipv6(cm_info.loc_addr, - &cm_info.vlan_id, NULL); + irdma_get_vlan_mac_ipv6(cm_info.loc_addr, + &cm_info.vlan_id, NULL); } else { cm_info.vlan_id = 0xFFFF; wildcard = true; @@ -3980,7 +4030,7 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) cm_listen_node->tos = cm_id->tos; if (iwdev->vsi.dscp_mode) cm_listen_node->user_pri = - iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)]; + iwdev->vsi.dscp_map[irdma_tos2dscp(cm_id->tos)]; else cm_listen_node->user_pri = rt_tos2priority(cm_id->tos); cm_info.user_pri = cm_listen_node->user_pri; @@ -3990,6 +4040,12 @@ int irdma_create_listen(struct iw_cm_id *cm_id, int backlog) if (err) goto error; } else { + if (!iwdev->vsi.dscp_mode) + cm_listen_node->user_pri = + irdma_iw_get_vlan_prio(cm_info.loc_addr, + cm_info.user_pri, + cm_info.ipv4); + cm_info.user_pri = cm_listen_node->user_pri; err = irdma_manage_qhash(iwdev, &cm_info, IRDMA_QHASH_TYPE_TCP_SYN, IRDMA_QHASH_MANAGE_TYPE_ADD, diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c index 45e3344daa04..8a6200e55c54 100644 --- a/drivers/infiniband/hw/irdma/ctrl.c +++ b/drivers/infiniband/hw/irdma/ctrl.c @@ -1061,6 +1061,9 @@ static int irdma_sc_alloc_stag(struct irdma_sc_dev *dev, u64 hdr; enum irdma_page_size page_size; + if (!info->total_len && !info->all_memory) + return -EINVAL; + if (info->page_size == 0x40000000) page_size = IRDMA_PAGE_SIZE_1G; else if (info->page_size == 0x200000) @@ -1126,6 +1129,9 @@ static int irdma_sc_mr_reg_non_shared(struct irdma_sc_dev *dev, u8 addr_type; enum irdma_page_size page_size; + if (!info->total_len && !info->all_memory) + return -EINVAL; + if (info->page_size == 0x40000000) page_size = IRDMA_PAGE_SIZE_1G; else if (info->page_size == 0x200000) @@ -1301,7 +1307,6 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, sq_info.wr_id = info->wr_id; sq_info.signaled = info->signaled; - sq_info.push_wqe = info->push_wqe; wqe = irdma_qp_get_next_send_wqe(&qp->qp_uk, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, &sq_info); @@ -1335,7 +1340,6 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, FIELD_PREP(IRDMAQPSQ_HPAGESIZE, page_size) | FIELD_PREP(IRDMAQPSQ_STAGRIGHTS, info->access_rights) | FIELD_PREP(IRDMAQPSQ_VABASEDTO, info->addr_type) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, (sq_info.push_wqe ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -1346,13 +1350,9 @@ int irdma_sc_mr_fast_register(struct irdma_sc_qp *qp, print_hex_dump_debug("WQE: FAST_REG WQE", DUMP_PREFIX_OFFSET, 16, 8, wqe, IRDMA_QP_WQE_MIN_SIZE, false); - if (sq_info.push_wqe) { - irdma_qp_push_wqe(&qp->qp_uk, wqe, IRDMA_QP_WQE_MIN_QUANTA, - wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(&qp->qp_uk); - } + + if (post_sq) + irdma_uk_qp_post_wr(&qp->qp_uk); return 0; } @@ -4007,7 +4007,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, { u64 temp, compl_ctx; __le64 *aeqe; - u16 wqe_idx; u8 ae_src; u8 polarity; @@ -4027,7 +4026,7 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, aeqe, 16, false); ae_src = (u8)FIELD_GET(IRDMA_AEQE_AESRC, temp); - wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp); + info->wqe_idx = (u16)FIELD_GET(IRDMA_AEQE_WQDESCIDX, temp); info->qp_cq_id = (u32)FIELD_GET(IRDMA_AEQE_QPCQID_LOW, temp) | ((u32)FIELD_GET(IRDMA_AEQE_QPCQID_HI, temp) << 18); info->ae_id = (u16)FIELD_GET(IRDMA_AEQE_AECODE, temp); @@ -4110,7 +4109,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, case IRDMA_AE_SOURCE_RQ_0011: info->qp = true; info->rq = true; - info->wqe_idx = wqe_idx; info->compl_ctx = compl_ctx; break; case IRDMA_AE_SOURCE_CQ: @@ -4124,7 +4122,6 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq, case IRDMA_AE_SOURCE_SQ_0111: info->qp = true; info->sq = true; - info->wqe_idx = wqe_idx; info->compl_ctx = compl_ctx; break; case IRDMA_AE_SOURCE_IN_RR_WR: diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c index 457368e324e1..7cbdd5433dba 100644 --- a/drivers/infiniband/hw/irdma/hw.c +++ b/drivers/infiniband/hw/irdma/hw.c @@ -219,7 +219,6 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) struct irdma_aeqe_info *info = &aeinfo; int ret; struct irdma_qp *iwqp = NULL; - struct irdma_sc_cq *cq = NULL; struct irdma_cq *iwcq = NULL; struct irdma_sc_qp *qp = NULL; struct irdma_qp_host_ctx_info *ctx_info = NULL; @@ -336,10 +335,18 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) ibdev_err(&iwdev->ibdev, "Processing an iWARP related AE for CQ misc = 0x%04X\n", info->ae_id); - cq = (struct irdma_sc_cq *)(unsigned long) - info->compl_ctx; - iwcq = cq->back_cq; + spin_lock_irqsave(&rf->cqtable_lock, flags); + iwcq = rf->cq_table[info->qp_cq_id]; + if (!iwcq) { + spin_unlock_irqrestore(&rf->cqtable_lock, + flags); + ibdev_dbg(to_ibdev(dev), + "cq_id %d is already freed\n", info->qp_cq_id); + continue; + } + irdma_cq_add_ref(&iwcq->ibcq); + spin_unlock_irqrestore(&rf->cqtable_lock, flags); if (iwcq->ibcq.event_handler) { struct ib_event ibevent; @@ -350,6 +357,7 @@ static void irdma_process_aeq(struct irdma_pci_f *rf) iwcq->ibcq.event_handler(&ibevent, iwcq->ibcq.cq_context); } + irdma_cq_rem_ref(&iwcq->ibcq); break; case IRDMA_AE_RESET_NOT_SENT: case IRDMA_AE_LLP_DOUBT_REACHABILITY: @@ -563,12 +571,11 @@ static void irdma_destroy_irq(struct irdma_pci_f *rf, /** * irdma_destroy_cqp - destroy control qp * @rf: RDMA PCI function - * @free_hwcqp: 1 if hw cqp should be freed * * Issue destroy cqp request and * free the resources associated with the cqp */ -static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) +static void irdma_destroy_cqp(struct irdma_pci_f *rf) { struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_cqp *cqp = &rf->cqp; @@ -576,8 +583,8 @@ static void irdma_destroy_cqp(struct irdma_pci_f *rf, bool free_hwcqp) if (rf->cqp_cmpl_wq) destroy_workqueue(rf->cqp_cmpl_wq); - if (free_hwcqp) - status = irdma_sc_cqp_destroy(dev->cqp); + + status = irdma_sc_cqp_destroy(dev->cqp); if (status) ibdev_dbg(to_ibdev(dev), "ERR: Destroy CQP failed %d\n", status); @@ -921,8 +928,8 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) cqp->scratch_array = kcalloc(sqsize, sizeof(*cqp->scratch_array), GFP_KERNEL); if (!cqp->scratch_array) { - kfree(cqp->cqp_requests); - return -ENOMEM; + status = -ENOMEM; + goto err_scratch; } dev->cqp = &cqp->sc_cqp; @@ -932,15 +939,14 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) cqp->sq.va = dma_alloc_coherent(dev->hw->device, cqp->sq.size, &cqp->sq.pa, GFP_KERNEL); if (!cqp->sq.va) { - kfree(cqp->scratch_array); - kfree(cqp->cqp_requests); - return -ENOMEM; + status = -ENOMEM; + goto err_sq; } status = irdma_obj_aligned_mem(rf, &mem, sizeof(struct irdma_cqp_ctx), IRDMA_HOST_CTX_ALIGNMENT_M); if (status) - goto exit; + goto err_ctx; dev->cqp->host_ctx_pa = mem.pa; dev->cqp->host_ctx = mem.va; @@ -966,7 +972,7 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) status = irdma_sc_cqp_init(dev->cqp, &cqp_init_info); if (status) { ibdev_dbg(to_ibdev(dev), "ERR: cqp init status %d\n", status); - goto exit; + goto err_ctx; } spin_lock_init(&cqp->req_lock); @@ -977,7 +983,7 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) ibdev_dbg(to_ibdev(dev), "ERR: cqp create failed - status %d maj_err %d min_err %d\n", status, maj_err, min_err); - goto exit; + goto err_ctx; } INIT_LIST_HEAD(&cqp->cqp_avail_reqs); @@ -991,8 +997,16 @@ static int irdma_create_cqp(struct irdma_pci_f *rf) init_waitqueue_head(&cqp->remove_wq); return 0; -exit: - irdma_destroy_cqp(rf, false); +err_ctx: + dma_free_coherent(dev->hw->device, cqp->sq.size, + cqp->sq.va, cqp->sq.pa); + cqp->sq.va = NULL; +err_sq: + kfree(cqp->scratch_array); + cqp->scratch_array = NULL; +err_scratch: + kfree(cqp->cqp_requests); + cqp->cqp_requests = NULL; return status; } @@ -1549,7 +1563,7 @@ static void irdma_del_init_mem(struct irdma_pci_f *rf) kfree(dev->hmc_info->sd_table.sd_entry); dev->hmc_info->sd_table.sd_entry = NULL; - kfree(rf->mem_rsrc); + vfree(rf->mem_rsrc); rf->mem_rsrc = NULL; dma_free_coherent(rf->hw.device, rf->obj_mem.size, rf->obj_mem.va, rf->obj_mem.pa); @@ -1747,7 +1761,7 @@ void irdma_ctrl_deinit_hw(struct irdma_pci_f *rf) rf->reset, rf->rdma_ver); fallthrough; case CQP_CREATED: - irdma_destroy_cqp(rf, true); + irdma_destroy_cqp(rf); fallthrough; case INITIAL_STATE: irdma_del_init_mem(rf); @@ -1945,10 +1959,12 @@ static void irdma_set_hw_rsrc(struct irdma_pci_f *rf) rf->allocated_arps = &rf->allocated_mcgs[BITS_TO_LONGS(rf->max_mcg)]; rf->qp_table = (struct irdma_qp **) (&rf->allocated_arps[BITS_TO_LONGS(rf->arp_table_size)]); + rf->cq_table = (struct irdma_cq **)(&rf->qp_table[rf->max_qp]); spin_lock_init(&rf->rsrc_lock); spin_lock_init(&rf->arp_lock); spin_lock_init(&rf->qptable_lock); + spin_lock_init(&rf->cqtable_lock); spin_lock_init(&rf->qh_list_lock); } @@ -1969,6 +1985,7 @@ static u32 irdma_calc_mem_rsrc_size(struct irdma_pci_f *rf) rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_ah); rsrc_size += sizeof(unsigned long) * BITS_TO_LONGS(rf->max_mcg); rsrc_size += sizeof(struct irdma_qp **) * rf->max_qp; + rsrc_size += sizeof(struct irdma_cq **) * rf->max_cq; return rsrc_size; } @@ -2002,10 +2019,10 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) rf->max_mcg = rf->max_qp; rsrc_size = irdma_calc_mem_rsrc_size(rf); - rf->mem_rsrc = kzalloc(rsrc_size, GFP_KERNEL); + rf->mem_rsrc = vzalloc(rsrc_size); if (!rf->mem_rsrc) { ret = -ENOMEM; - goto mem_rsrc_kzalloc_fail; + goto mem_rsrc_vzalloc_fail; } rf->arp_table = (struct irdma_arp_entry *)rf->mem_rsrc; @@ -2033,7 +2050,7 @@ u32 irdma_initialize_hw_rsrc(struct irdma_pci_f *rf) return 0; -mem_rsrc_kzalloc_fail: +mem_rsrc_vzalloc_fail: bitmap_free(rf->allocated_ws_nodes); rf->allocated_ws_nodes = NULL; diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.c b/drivers/infiniband/hw/irdma/i40iw_hw.c index 37a40fb4d0d7..638d127fb3e0 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.c +++ b/drivers/infiniband/hw/irdma/i40iw_hw.c @@ -254,5 +254,6 @@ void i40iw_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_1; dev->hw_attrs.max_hw_outbound_msg_size = I40IW_MAX_OUTBOUND_MSG_SIZE; dev->hw_attrs.max_hw_inbound_msg_size = I40IW_MAX_INBOUND_MSG_SIZE; + dev->hw_attrs.uk_attrs.min_hw_wq_size = I40IW_MIN_WQ_SIZE; dev->hw_attrs.max_qp_wr = I40IW_MAX_QP_WRS; } diff --git a/drivers/infiniband/hw/irdma/i40iw_hw.h b/drivers/infiniband/hw/irdma/i40iw_hw.h index 1c438b3593ea..10afc165f5ea 100644 --- a/drivers/infiniband/hw/irdma/i40iw_hw.h +++ b/drivers/infiniband/hw/irdma/i40iw_hw.h @@ -140,11 +140,11 @@ enum i40iw_device_caps_const { I40IW_MAX_CQ_SIZE = 1048575, I40IW_MAX_OUTBOUND_MSG_SIZE = 2147483647, I40IW_MAX_INBOUND_MSG_SIZE = 2147483647, + I40IW_MIN_WQ_SIZE = 4 /* WQEs */, }; #define I40IW_QP_WQE_MIN_SIZE 32 #define I40IW_QP_WQE_MAX_SIZE 128 -#define I40IW_QP_SW_MIN_WQSIZE 4 #define I40IW_MAX_RQ_WQE_SHIFT 2 #define I40IW_MAX_QUANTA_PER_WR 2 diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.c b/drivers/infiniband/hw/irdma/icrdma_hw.c index 298d14905993..10ccf4bc3f2d 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.c +++ b/drivers/infiniband/hw/irdma/icrdma_hw.c @@ -195,6 +195,7 @@ void icrdma_init_hw(struct irdma_sc_dev *dev) dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT; dev->hw_attrs.max_stat_idx = IRDMA_HW_STAT_INDEX_MAX_GEN_2; + dev->hw_attrs.uk_attrs.min_hw_wq_size = ICRDMA_MIN_WQ_SIZE; dev->hw_attrs.uk_attrs.max_hw_sq_chunk = IRDMA_MAX_QUANTA_PER_WR; dev->hw_attrs.uk_attrs.feature_flags |= IRDMA_FEATURE_RTS_AE | IRDMA_FEATURE_CQ_RESIZE; diff --git a/drivers/infiniband/hw/irdma/icrdma_hw.h b/drivers/infiniband/hw/irdma/icrdma_hw.h index b65c463abf0b..54035a08cc93 100644 --- a/drivers/infiniband/hw/irdma/icrdma_hw.h +++ b/drivers/infiniband/hw/irdma/icrdma_hw.h @@ -64,6 +64,7 @@ enum icrdma_device_caps_const { ICRDMA_MAX_IRD_SIZE = 127, ICRDMA_MAX_ORD_SIZE = 255, + ICRDMA_MIN_WQ_SIZE = 8 /* WQEs */, }; diff --git a/drivers/infiniband/hw/irdma/irdma.h b/drivers/infiniband/hw/irdma/irdma.h index 173e2dc2fc35..3237fa64bc8f 100644 --- a/drivers/infiniband/hw/irdma/irdma.h +++ b/drivers/infiniband/hw/irdma/irdma.h @@ -119,6 +119,7 @@ struct irdma_uk_attrs { u32 min_hw_cq_size; u32 max_hw_cq_size; u16 max_hw_sq_chunk; + u16 min_hw_wq_size; u8 hw_rev; }; diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h index 2323962cdeac..82fc5f5b002c 100644 --- a/drivers/infiniband/hw/irdma/main.h +++ b/drivers/infiniband/hw/irdma/main.h @@ -239,7 +239,7 @@ struct irdma_qv_info { struct irdma_qvlist_info { u32 num_vectors; - struct irdma_qv_info qv_info[1]; + struct irdma_qv_info qv_info[]; }; struct irdma_gen_ops { @@ -309,7 +309,9 @@ struct irdma_pci_f { spinlock_t arp_lock; /*protect ARP table access*/ spinlock_t rsrc_lock; /* protect HW resource array access */ spinlock_t qptable_lock; /*protect QP table access*/ + spinlock_t cqtable_lock; /*protect CQ table access*/ struct irdma_qp **qp_table; + struct irdma_cq **cq_table; spinlock_t qh_list_lock; /* protect mc_qht_list */ struct mc_table_list mc_qht_list; struct irdma_msix_vector *iw_msixtbl; @@ -500,6 +502,8 @@ int irdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata); +void irdma_cq_add_ref(struct ib_cq *ibcq); +void irdma_cq_rem_ref(struct ib_cq *ibcq); void irdma_cq_wq_destroy(struct irdma_pci_f *rf, struct irdma_sc_cq *cq); void irdma_cleanup_pending_cqp_op(struct irdma_pci_f *rf); @@ -529,7 +533,7 @@ void irdma_gen_ae(struct irdma_pci_f *rf, struct irdma_sc_qp *qp, void irdma_copy_ip_ntohl(u32 *dst, __be32 *src); void irdma_copy_ip_htonl(__be32 *dst, u32 *src); u16 irdma_get_vlan_ipv4(u32 *addr); -struct net_device *irdma_netdev_vlan_ipv6(u32 *addr, u16 *vlan_id, u8 *mac); +void irdma_get_vlan_mac_ipv6(u32 *addr, u16 *vlan_id, u8 *mac); struct ib_mr *irdma_reg_phys_mr(struct ib_pd *ib_pd, u64 addr, u64 size, int acc, u64 *iova_start); int irdma_upload_qp_context(struct irdma_qp *iwqp, bool freeze, bool raw); diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h index a20709577ab0..c84ec4dd8536 100644 --- a/drivers/infiniband/hw/irdma/type.h +++ b/drivers/infiniband/hw/irdma/type.h @@ -971,6 +971,7 @@ struct irdma_allocate_stag_info { bool remote_access:1; bool use_hmc_fcn_index:1; bool use_pf_rid:1; + bool all_memory:1; u8 hmc_fcn_index; }; @@ -998,6 +999,7 @@ struct irdma_reg_ns_stag_info { bool use_hmc_fcn_index:1; u8 hmc_fcn_index; bool use_pf_rid:1; + bool all_memory:1; }; struct irdma_fast_reg_stag_info { @@ -1017,7 +1019,6 @@ struct irdma_fast_reg_stag_info { bool local_fence:1; bool read_fence:1; bool signaled:1; - bool push_wqe:1; bool use_hmc_fcn_index:1; u8 hmc_fcn_index; bool use_pf_rid:1; diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index 280d633d4ec4..d8285ca16293 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -127,10 +127,7 @@ void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp) hw_sq_tail = (u32)FIELD_GET(IRDMA_QP_DBSA_HW_SQ_TAIL, temp); sw_sq_head = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); if (sw_sq_head != qp->initial_ring.head) { - if (qp->push_dropped) { - writel(qp->qp_id, qp->wqe_alloc_db); - qp->push_dropped = false; - } else if (sw_sq_head != hw_sq_tail) { + if (sw_sq_head != hw_sq_tail) { if (sw_sq_head > qp->initial_ring.head) { if (hw_sq_tail >= qp->initial_ring.head && hw_sq_tail < sw_sq_head) @@ -147,38 +144,6 @@ void irdma_uk_qp_post_wr(struct irdma_qp_uk *qp) } /** - * irdma_qp_ring_push_db - ring qp doorbell - * @qp: hw qp ptr - * @wqe_idx: wqe index - */ -static void irdma_qp_ring_push_db(struct irdma_qp_uk *qp, u32 wqe_idx) -{ - set_32bit_val(qp->push_db, 0, - FIELD_PREP(IRDMA_WQEALLOC_WQE_DESC_INDEX, wqe_idx >> 3) | qp->qp_id); - qp->initial_ring.head = qp->sq_ring.head; - qp->push_mode = true; - qp->push_dropped = false; -} - -void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, - u32 wqe_idx, bool post_sq) -{ - __le64 *push; - - if (IRDMA_RING_CURRENT_HEAD(qp->initial_ring) != - IRDMA_RING_CURRENT_TAIL(qp->sq_ring) && - !qp->push_mode) { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } else { - push = (__le64 *)((uintptr_t)qp->push_wqe + - (wqe_idx & 0x7) * 0x20); - memcpy(push, wqe, quanta * IRDMA_QP_WQE_MIN_SIZE); - irdma_qp_ring_push_db(qp, wqe_idx); - } -} - -/** * irdma_qp_get_next_send_wqe - pad with NOP if needed, return where next WR should go * @qp: hw qp ptr * @wqe_idx: return wqe index @@ -192,7 +157,6 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, { __le64 *wqe; __le64 *wqe_0 = NULL; - u32 nop_wqe_idx; u16 avail_quanta; u16 i; @@ -209,14 +173,10 @@ __le64 *irdma_qp_get_next_send_wqe(struct irdma_qp_uk *qp, u32 *wqe_idx, IRDMA_SQ_RING_FREE_QUANTA(qp->sq_ring)) return NULL; - nop_wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); for (i = 0; i < avail_quanta; i++) { irdma_nop_1(qp); IRDMA_RING_MOVE_HEAD_NOCHECK(qp->sq_ring); } - if (qp->push_db && info->push_wqe) - irdma_qp_push_wqe(qp, qp->sq_base[nop_wqe_idx].elem, - avail_quanta, nop_wqe_idx, true); } *wqe_idx = IRDMA_RING_CURRENT_HEAD(qp->sq_ring); @@ -282,8 +242,6 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.rdma_write; if (op_info->num_lo_sges > qp->max_sq_frag_cnt) return -EINVAL; @@ -344,7 +302,6 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -353,12 +310,9 @@ int irdma_uk_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, dma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -383,8 +337,6 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, u16 quanta; u64 hdr; - info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.rdma_read; if (qp->max_sq_frag_cnt < op_info->num_lo_sges) return -EINVAL; @@ -431,7 +383,6 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | FIELD_PREP(IRDMAQPSQ_OPCODE, (inv_stag ? IRDMAQP_OP_RDMA_READ_LOC_INV : IRDMAQP_OP_RDMA_READ)) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -440,12 +391,9 @@ int irdma_uk_rdma_read(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, dma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -468,8 +416,6 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool read_fence = false; u16 quanta; - info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.send; if (qp->max_sq_frag_cnt < op_info->num_sges) return -EINVAL; @@ -530,7 +476,6 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | FIELD_PREP(IRDMAQPSQ_ADDFRAGCNT, addl_frag_cnt) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -541,12 +486,9 @@ int irdma_uk_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, dma_wmb(); /* make sure WQE is populated before valid bit is set */ set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -720,7 +662,6 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, u32 i, total_size = 0; u16 quanta; - info->push_wqe = qp->push_db ? true : false; op_info = &info->op.rdma_write; if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) @@ -750,7 +691,6 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -767,12 +707,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -794,7 +730,6 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, u32 i, total_size = 0; u16 quanta; - info->push_wqe = qp->push_db ? true : false; op_info = &info->op.send; if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) @@ -827,7 +762,6 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, (info->imm_data_valid ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, info->local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -845,12 +779,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, quanta, wqe_idx, post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -872,7 +802,6 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, bool local_fence = false; struct ib_sge sge = {}; - info->push_wqe = qp->push_db ? true : false; op_info = &info->op.inv_local_stag; local_fence = info->local_fence; @@ -889,7 +818,6 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, set_64bit_val(wqe, 16, 0); hdr = FIELD_PREP(IRDMAQPSQ_OPCODE, IRDMA_OP_TYPE_INV_STAG) | - FIELD_PREP(IRDMAQPSQ_PUSHWQE, info->push_wqe) | FIELD_PREP(IRDMAQPSQ_READFENCE, info->read_fence) | FIELD_PREP(IRDMAQPSQ_LOCALFENCE, local_fence) | FIELD_PREP(IRDMAQPSQ_SIGCOMPL, info->signaled) | @@ -899,13 +827,8 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, set_64bit_val(wqe, 24, hdr); - if (info->push_wqe) { - irdma_qp_push_wqe(qp, wqe, IRDMA_QP_WQE_MIN_QUANTA, wqe_idx, - post_sq); - } else { - if (post_sq) - irdma_uk_qp_post_wr(qp); - } + if (post_sq) + irdma_uk_qp_post_wr(qp); return 0; } @@ -1124,7 +1047,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); - info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3); info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); if (info->error) { info->major_err = FIELD_GET(IRDMA_CQ_MAJERR, qword3); @@ -1213,11 +1135,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, return irdma_uk_cq_poll_cmpl(cq, info); } } - /*cease posting push mode on push drop*/ - if (info->push_dropped) { - qp->push_mode = false; - qp->push_dropped = true; - } if (info->comp_status != IRDMA_COMPL_STATUS_FLUSHED) { info->wr_id = qp->sq_wrtrk_array[wqe_idx].wrid; if (!info->comp_status) @@ -1349,10 +1266,12 @@ void irdma_get_wqe_shift(struct irdma_uk_attrs *uk_attrs, u32 sge, int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *sqdepth) { + u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + *sqdepth = irdma_qp_round_up((sq_size << shift) + IRDMA_SQ_RSVD); - if (*sqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift)) - *sqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift; + if (*sqdepth < min_size) + *sqdepth = min_size; else if (*sqdepth > uk_attrs->max_hw_wq_quanta) return -EINVAL; @@ -1369,10 +1288,12 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *rqdepth) { + u32 min_size = (u32)uk_attrs->min_hw_wq_size << shift; + *rqdepth = irdma_qp_round_up((rq_size << shift) + IRDMA_RQ_RSVD); - if (*rqdepth < (IRDMA_QP_SW_MIN_WQSIZE << shift)) - *rqdepth = IRDMA_QP_SW_MIN_WQSIZE << shift; + if (*rqdepth < min_size) + *rqdepth = min_size; else if (*rqdepth > uk_attrs->max_hw_rq_quanta) return -EINVAL; @@ -1415,6 +1336,78 @@ static void irdma_setup_connection_wqes(struct irdma_qp_uk *qp, } /** + * irdma_uk_calc_shift_wq - calculate WQE shift for both SQ and RQ + * @ukinfo: qp initialization info + * @sq_shift: Returns shift of SQ + * @rq_shift: Returns shift of RQ + */ +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } +} + +/** + * irdma_uk_calc_depth_shift_sq - calculate depth and shift for SQ size. + * @ukinfo: qp initialization info + * @sq_depth: Returns depth of SQ + * @sq_shift: Returns shift of SQ + */ +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift) +{ + bool imm_support = ukinfo->uk_attrs->hw_rev >= IRDMA_GEN_2; + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, + imm_support ? ukinfo->max_sq_frag_cnt + 1 : + ukinfo->max_sq_frag_cnt, + ukinfo->max_inline_data, sq_shift); + status = irdma_get_sqdepth(ukinfo->uk_attrs, ukinfo->sq_size, + *sq_shift, sq_depth); + + return status; +} + +/** + * irdma_uk_calc_depth_shift_rq - calculate depth and shift for RQ size. + * @ukinfo: qp initialization info + * @rq_depth: Returns depth of RQ + * @rq_shift: Returns shift of RQ + */ +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift) +{ + int status; + + irdma_get_wqe_shift(ukinfo->uk_attrs, ukinfo->max_rq_frag_cnt, 0, + rq_shift); + + if (ukinfo->uk_attrs->hw_rev == IRDMA_GEN_1) { + if (ukinfo->abi_ver > 4) + *rq_shift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; + } + + status = irdma_get_rqdepth(ukinfo->uk_attrs, ukinfo->rq_size, + *rq_shift, rq_depth); + + return status; +} + +/** * irdma_uk_qp_init - initialize shared qp * @qp: hw qp (user and kernel) * @info: qp initialization info @@ -1428,23 +1421,12 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) { int ret_code = 0; u32 sq_ring_size; - u8 sqshift, rqshift; qp->uk_attrs = info->uk_attrs; if (info->max_sq_frag_cnt > qp->uk_attrs->max_hw_wq_frags || info->max_rq_frag_cnt > qp->uk_attrs->max_hw_wq_frags) return -EINVAL; - irdma_get_wqe_shift(qp->uk_attrs, info->max_rq_frag_cnt, 0, &rqshift); - if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt, - info->max_inline_data, &sqshift); - if (info->abi_ver > 4) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - } else { - irdma_get_wqe_shift(qp->uk_attrs, info->max_sq_frag_cnt + 1, - info->max_inline_data, &sqshift); - } qp->qp_caps = info->qp_caps; qp->sq_base = info->sq; qp->rq_base = info->rq; @@ -1456,9 +1438,8 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->wqe_alloc_db = info->wqe_alloc_db; qp->qp_id = info->qp_id; qp->sq_size = info->sq_size; - qp->push_mode = false; qp->max_sq_frag_cnt = info->max_sq_frag_cnt; - sq_ring_size = qp->sq_size << sqshift; + sq_ring_size = qp->sq_size << info->sq_shift; IRDMA_RING_INIT(qp->sq_ring, sq_ring_size); IRDMA_RING_INIT(qp->initial_ring, sq_ring_size); if (info->first_sq_wq) { @@ -1473,9 +1454,9 @@ int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info) qp->rq_size = info->rq_size; qp->max_rq_frag_cnt = info->max_rq_frag_cnt; qp->max_inline_data = info->max_inline_data; - qp->rq_wqe_size = rqshift; + qp->rq_wqe_size = info->rq_shift; IRDMA_RING_INIT(qp->rq_ring, qp->rq_size); - qp->rq_wqe_size_multiplier = 1 << rqshift; + qp->rq_wqe_size_multiplier = 1 << info->rq_shift; if (qp->uk_attrs->hw_rev == IRDMA_GEN_1) qp->wqe_ops = iw_wqe_uk_ops_gen_1; else @@ -1554,7 +1535,6 @@ int irdma_nop(struct irdma_qp_uk *qp, u64 wr_id, bool signaled, bool post_sq) u32 wqe_idx; struct irdma_post_sq_info info = {}; - info.push_wqe = false; info.wr_id = wr_id; wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, IRDMA_QP_WQE_MIN_QUANTA, 0, &info); diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index d0cdf609f5e0..36feca57b274 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -85,6 +85,7 @@ enum irdma_device_caps_const { IRDMA_Q2_BUF_SIZE = 256, IRDMA_QP_CTX_SIZE = 256, IRDMA_MAX_PDS = 262144, + IRDMA_MIN_WQ_SIZE_GEN2 = 8, }; enum irdma_addressing_type { @@ -215,7 +216,6 @@ struct irdma_post_sq_info { bool local_fence:1; bool inline_data:1; bool imm_data_valid:1; - bool push_wqe:1; bool report_rtt:1; bool udp_hdr:1; bool defer_flag:1; @@ -247,7 +247,6 @@ struct irdma_cq_poll_info { u8 op_type; u8 q_type; bool stag_invalid_set:1; /* or L_R_Key set */ - bool push_dropped:1; bool error:1; bool solicited_event:1; bool ipv4:1; @@ -295,6 +294,12 @@ void irdma_uk_cq_init(struct irdma_cq_uk *cq, struct irdma_cq_uk_init_info *info); int irdma_uk_qp_init(struct irdma_qp_uk *qp, struct irdma_qp_uk_init_info *info); +void irdma_uk_calc_shift_wq(struct irdma_qp_uk_init_info *ukinfo, u8 *sq_shift, + u8 *rq_shift); +int irdma_uk_calc_depth_shift_sq(struct irdma_qp_uk_init_info *ukinfo, + u32 *sq_depth, u8 *sq_shift); +int irdma_uk_calc_depth_shift_rq(struct irdma_qp_uk_init_info *ukinfo, + u32 *rq_depth, u8 *rq_shift); struct irdma_sq_uk_wr_trk_info { u64 wrid; u32 wr_len; @@ -314,8 +319,6 @@ struct irdma_qp_uk { struct irdma_sq_uk_wr_trk_info *sq_wrtrk_array; u64 *rq_wrid_array; __le64 *shadow_area; - __le32 *push_db; - __le64 *push_wqe; struct irdma_ring sq_ring; struct irdma_ring rq_ring; struct irdma_ring initial_ring; @@ -335,8 +338,6 @@ struct irdma_qp_uk { u8 rq_wqe_size; u8 rq_wqe_size_multiplier; bool deferred_flag:1; - bool push_mode:1; /* whether the last post wqe was pushed */ - bool push_dropped:1; bool first_sq_wq:1; bool sq_flush_complete:1; /* Indicates flush was seen and SQ was empty after the flush */ bool rq_flush_complete:1; /* Indicates flush was seen and RQ was empty after the flush */ @@ -374,8 +375,12 @@ struct irdma_qp_uk_init_info { u32 max_sq_frag_cnt; u32 max_rq_frag_cnt; u32 max_inline_data; + u32 sq_depth; + u32 rq_depth; u8 first_sq_wq; u8 type; + u8 sq_shift; + u8 rq_shift; int abi_ver; bool legacy_mode; }; @@ -404,7 +409,5 @@ int irdma_get_sqdepth(struct irdma_uk_attrs *uk_attrs, u32 sq_size, u8 shift, u32 *wqdepth); int irdma_get_rqdepth(struct irdma_uk_attrs *uk_attrs, u32 rq_size, u8 shift, u32 *wqdepth); -void irdma_qp_push_wqe(struct irdma_qp_uk *qp, __le64 *wqe, u16 quanta, - u32 wqe_idx, bool post_sq); void irdma_clr_wqes(struct irdma_qp_uk *qp, u32 qp_wqe_idx); #endif /* IRDMA_USER_H */ diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index eb083f70b09f..6cd5cb85dafe 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -760,6 +760,31 @@ void irdma_qp_rem_ref(struct ib_qp *ibqp) complete(&iwqp->free_qp); } +void irdma_cq_add_ref(struct ib_cq *ibcq) +{ + struct irdma_cq *iwcq = to_iwcq(ibcq); + + refcount_inc(&iwcq->refcnt); +} + +void irdma_cq_rem_ref(struct ib_cq *ibcq) +{ + struct ib_device *ibdev = ibcq->device; + struct irdma_device *iwdev = to_iwdev(ibdev); + struct irdma_cq *iwcq = to_iwcq(ibcq); + unsigned long flags; + + spin_lock_irqsave(&iwdev->rf->cqtable_lock, flags); + if (!refcount_dec_and_test(&iwcq->refcnt)) { + spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags); + return; + } + + iwdev->rf->cq_table[iwcq->cq_num] = NULL; + spin_unlock_irqrestore(&iwdev->rf->cqtable_lock, flags); + complete(&iwcq->free_cq); +} + struct ib_device *to_ibdev(struct irdma_sc_dev *dev) { return &(container_of(dev, struct irdma_pci_f, sc_dev))->iwdev->ibdev; diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index 9c4fe4fa9001..3eb7a7a3a975 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -277,7 +277,7 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, struct irdma_alloc_ucontext_req req = {}; struct irdma_alloc_ucontext_resp uresp = {}; struct irdma_ucontext *ucontext = to_ucontext(uctx); - struct irdma_uk_attrs *uk_attrs; + struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; if (udata->inlen < IRDMA_ALLOC_UCTX_MIN_REQ_LEN || udata->outlen < IRDMA_ALLOC_UCTX_MIN_RESP_LEN) @@ -292,7 +292,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, ucontext->iwdev = iwdev; ucontext->abi_ver = req.userspace_ver; - uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; + if (req.comp_mask & IRDMA_ALLOC_UCTX_USE_RAW_ATTR) + ucontext->use_raw_attrs = true; + /* GEN_1 legacy support with libi40iw */ if (udata->outlen == IRDMA_ALLOC_UCTX_MIN_RESP_LEN) { if (uk_attrs->hw_rev != IRDMA_GEN_1) @@ -327,6 +329,9 @@ static int irdma_alloc_ucontext(struct ib_ucontext *uctx, uresp.max_hw_cq_size = uk_attrs->max_hw_cq_size; uresp.min_hw_cq_size = uk_attrs->min_hw_cq_size; uresp.hw_rev = uk_attrs->hw_rev; + uresp.comp_mask |= IRDMA_ALLOC_UCTX_USE_RAW_ATTR; + uresp.min_hw_wq_size = uk_attrs->min_hw_wq_size; + uresp.comp_mask |= IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE; if (ib_copy_to_udata(udata, &uresp, min(sizeof(uresp), udata->outlen))) { rdma_user_mmap_entry_remove(ucontext->db_mmap_entry); @@ -567,6 +572,87 @@ static void irdma_setup_virt_qp(struct irdma_device *iwdev, } /** + * irdma_setup_umode_qp - setup sq and rq size in user mode qp + * @udata: udata + * @iwdev: iwarp device + * @iwqp: qp ptr (user or kernel) + * @info: initialize info to return + * @init_attr: Initial QP create attributes + */ +static int irdma_setup_umode_qp(struct ib_udata *udata, + struct irdma_device *iwdev, + struct irdma_qp *iwqp, + struct irdma_qp_init_info *info, + struct ib_qp_init_attr *init_attr) +{ + struct irdma_ucontext *ucontext = rdma_udata_to_drv_context(udata, + struct irdma_ucontext, ibucontext); + struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; + struct irdma_create_qp_req req; + unsigned long flags; + int ret; + + ret = ib_copy_from_udata(&req, udata, + min(sizeof(req), udata->inlen)); + if (ret) { + ibdev_dbg(&iwdev->ibdev, "VERBS: ib_copy_from_data fail\n"); + return ret; + } + + iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; + iwqp->user_mode = 1; + if (req.user_wqe_bufs) { + info->qp_uk_init_info.legacy_mode = ucontext->legacy_mode; + spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); + iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, + &ucontext->qp_reg_mem_list); + spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); + + if (!iwqp->iwpbl) { + ret = -ENODATA; + ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); + return ret; + } + } + + if (!ucontext->use_raw_attrs) { + /** + * Maintain backward compat with older ABI which passes sq and + * rq depth in quanta in cap.max_send_wr and cap.max_recv_wr. + * There is no way to compute the correct value of + * iwqp->max_send_wr/max_recv_wr in the kernel. + */ + iwqp->max_send_wr = init_attr->cap.max_send_wr; + iwqp->max_recv_wr = init_attr->cap.max_recv_wr; + ukinfo->sq_size = init_attr->cap.max_send_wr; + ukinfo->rq_size = init_attr->cap.max_recv_wr; + irdma_uk_calc_shift_wq(ukinfo, &ukinfo->sq_shift, + &ukinfo->rq_shift); + } else { + ret = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); + if (ret) + return ret; + + ret = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); + if (ret) + return ret; + + iwqp->max_send_wr = + (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = + (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; + } + + irdma_setup_virt_qp(iwdev, iwqp, info); + + return 0; +} + +/** * irdma_setup_kmode_qp - setup initialization for kernel mode qp * @iwdev: iwarp device * @iwqp: qp ptr (user or kernel) @@ -579,40 +665,28 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, struct ib_qp_init_attr *init_attr) { struct irdma_dma_mem *mem = &iwqp->kqp.dma_mem; - u32 sqdepth, rqdepth; - u8 sqshift, rqshift; u32 size; int status; struct irdma_qp_uk_init_info *ukinfo = &info->qp_uk_init_info; - struct irdma_uk_attrs *uk_attrs = &iwdev->rf->sc_dev.hw_attrs.uk_attrs; - irdma_get_wqe_shift(uk_attrs, - uk_attrs->hw_rev >= IRDMA_GEN_2 ? ukinfo->max_sq_frag_cnt + 1 : - ukinfo->max_sq_frag_cnt, - ukinfo->max_inline_data, &sqshift); - status = irdma_get_sqdepth(uk_attrs, ukinfo->sq_size, sqshift, - &sqdepth); + status = irdma_uk_calc_depth_shift_sq(ukinfo, &ukinfo->sq_depth, + &ukinfo->sq_shift); if (status) return status; - if (uk_attrs->hw_rev == IRDMA_GEN_1) - rqshift = IRDMA_MAX_RQ_WQE_SHIFT_GEN1; - else - irdma_get_wqe_shift(uk_attrs, ukinfo->max_rq_frag_cnt, 0, - &rqshift); - - status = irdma_get_rqdepth(uk_attrs, ukinfo->rq_size, rqshift, - &rqdepth); + status = irdma_uk_calc_depth_shift_rq(ukinfo, &ukinfo->rq_depth, + &ukinfo->rq_shift); if (status) return status; iwqp->kqp.sq_wrid_mem = - kcalloc(sqdepth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->sq_depth, sizeof(*iwqp->kqp.sq_wrid_mem), GFP_KERNEL); if (!iwqp->kqp.sq_wrid_mem) return -ENOMEM; iwqp->kqp.rq_wrid_mem = - kcalloc(rqdepth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + kcalloc(ukinfo->rq_depth, sizeof(*iwqp->kqp.rq_wrid_mem), GFP_KERNEL); + if (!iwqp->kqp.rq_wrid_mem) { kfree(iwqp->kqp.sq_wrid_mem); iwqp->kqp.sq_wrid_mem = NULL; @@ -622,7 +696,7 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq_wrtrk_array = iwqp->kqp.sq_wrid_mem; ukinfo->rq_wrid_array = iwqp->kqp.rq_wrid_mem; - size = (sqdepth + rqdepth) * IRDMA_QP_WQE_MIN_SIZE; + size = (ukinfo->sq_depth + ukinfo->rq_depth) * IRDMA_QP_WQE_MIN_SIZE; size += (IRDMA_SHADOW_AREA_SIZE << 3); mem->size = ALIGN(size, 256); @@ -638,16 +712,19 @@ static int irdma_setup_kmode_qp(struct irdma_device *iwdev, ukinfo->sq = mem->va; info->sq_pa = mem->pa; - ukinfo->rq = &ukinfo->sq[sqdepth]; - info->rq_pa = info->sq_pa + (sqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->shadow_area = ukinfo->rq[rqdepth].elem; - info->shadow_area_pa = info->rq_pa + (rqdepth * IRDMA_QP_WQE_MIN_SIZE); - ukinfo->sq_size = sqdepth >> sqshift; - ukinfo->rq_size = rqdepth >> rqshift; + ukinfo->rq = &ukinfo->sq[ukinfo->sq_depth]; + info->rq_pa = info->sq_pa + (ukinfo->sq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->shadow_area = ukinfo->rq[ukinfo->rq_depth].elem; + info->shadow_area_pa = + info->rq_pa + (ukinfo->rq_depth * IRDMA_QP_WQE_MIN_SIZE); + ukinfo->sq_size = ukinfo->sq_depth >> ukinfo->sq_shift; + ukinfo->rq_size = ukinfo->rq_depth >> ukinfo->rq_shift; ukinfo->qp_id = iwqp->ibqp.qp_num; - init_attr->cap.max_send_wr = (sqdepth - IRDMA_SQ_RSVD) >> sqshift; - init_attr->cap.max_recv_wr = (rqdepth - IRDMA_RQ_RSVD) >> rqshift; + iwqp->max_send_wr = (ukinfo->sq_depth - IRDMA_SQ_RSVD) >> ukinfo->sq_shift; + iwqp->max_recv_wr = (ukinfo->rq_depth - IRDMA_RQ_RSVD) >> ukinfo->rq_shift; + init_attr->cap.max_send_wr = iwqp->max_send_wr; + init_attr->cap.max_recv_wr = iwqp->max_recv_wr; return 0; } @@ -803,18 +880,14 @@ static int irdma_create_qp(struct ib_qp *ibqp, struct irdma_device *iwdev = to_iwdev(ibpd->device); struct irdma_pci_f *rf = iwdev->rf; struct irdma_qp *iwqp = to_iwqp(ibqp); - struct irdma_create_qp_req req = {}; struct irdma_create_qp_resp uresp = {}; u32 qp_num = 0; int err_code; - int sq_size; - int rq_size; struct irdma_sc_qp *qp; struct irdma_sc_dev *dev = &rf->sc_dev; struct irdma_uk_attrs *uk_attrs = &dev->hw_attrs.uk_attrs; struct irdma_qp_init_info init_info = {}; struct irdma_qp_host_ctx_info *ctx_info; - unsigned long flags; err_code = irdma_validate_qp_attrs(init_attr, iwdev); if (err_code) @@ -824,13 +897,10 @@ static int irdma_create_qp(struct ib_qp *ibqp, udata->outlen < IRDMA_CREATE_QP_MIN_RESP_LEN)) return -EINVAL; - sq_size = init_attr->cap.max_send_wr; - rq_size = init_attr->cap.max_recv_wr; - init_info.vsi = &iwdev->vsi; init_info.qp_uk_init_info.uk_attrs = uk_attrs; - init_info.qp_uk_init_info.sq_size = sq_size; - init_info.qp_uk_init_info.rq_size = rq_size; + init_info.qp_uk_init_info.sq_size = init_attr->cap.max_send_wr; + init_info.qp_uk_init_info.rq_size = init_attr->cap.max_recv_wr; init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge; init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge; init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data; @@ -880,36 +950,9 @@ static int irdma_create_qp(struct ib_qp *ibqp, init_waitqueue_head(&iwqp->mod_qp_waitq); if (udata) { - err_code = ib_copy_from_udata(&req, udata, - min(sizeof(req), udata->inlen)); - if (err_code) { - ibdev_dbg(&iwdev->ibdev, - "VERBS: ib_copy_from_data fail\n"); - goto error; - } - - iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx; - iwqp->user_mode = 1; - if (req.user_wqe_bufs) { - struct irdma_ucontext *ucontext = - rdma_udata_to_drv_context(udata, - struct irdma_ucontext, - ibucontext); - - init_info.qp_uk_init_info.legacy_mode = ucontext->legacy_mode; - spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags); - iwqp->iwpbl = irdma_get_pbl((unsigned long)req.user_wqe_bufs, - &ucontext->qp_reg_mem_list); - spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags); - - if (!iwqp->iwpbl) { - err_code = -ENODATA; - ibdev_dbg(&iwdev->ibdev, "VERBS: no pbl info\n"); - goto error; - } - } init_info.qp_uk_init_info.abi_ver = iwpd->sc_pd.abi_ver; - irdma_setup_virt_qp(iwdev, iwqp, &init_info); + err_code = irdma_setup_umode_qp(udata, iwdev, iwqp, &init_info, + init_attr); } else { INIT_DELAYED_WORK(&iwqp->dwork_flush, irdma_flush_worker); init_info.qp_uk_init_info.abi_ver = IRDMA_ABI_VER; @@ -962,10 +1005,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, refcount_set(&iwqp->refcnt, 1); spin_lock_init(&iwqp->lock); spin_lock_init(&iwqp->sc_qp.pfpdu.lock); - iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0; + iwqp->sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR; rf->qp_table[qp_num] = iwqp; - iwqp->max_send_wr = sq_size; - iwqp->max_recv_wr = rq_size; if (rdma_protocol_roce(&iwdev->ibdev, 1)) { if (dev->ws_add(&iwdev->vsi, 0)) { @@ -986,8 +1027,8 @@ static int irdma_create_qp(struct ib_qp *ibqp, if (rdma_protocol_iwarp(&iwdev->ibdev, 1)) uresp.lsmm = 1; } - uresp.actual_sq_size = sq_size; - uresp.actual_rq_size = rq_size; + uresp.actual_sq_size = init_info.qp_uk_init_info.sq_size; + uresp.actual_rq_size = init_info.qp_uk_init_info.rq_size; uresp.qp_id = qp_num; uresp.qp_caps = qp->qp_uk.qp_caps; @@ -1098,6 +1139,24 @@ static int irdma_query_pkey(struct ib_device *ibdev, u32 port, u16 index, return 0; } +static u8 irdma_roce_get_vlan_prio(const struct ib_gid_attr *attr, u8 prio) +{ + struct net_device *ndev; + + rcu_read_lock(); + ndev = rcu_dereference(attr->ndev); + if (!ndev) + goto exit; + if (is_vlan_dev(ndev)) { + u16 vlan_qos = vlan_dev_get_egress_qos_mask(ndev, prio); + + prio = (vlan_qos & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + } +exit: + rcu_read_unlock(); + return prio; +} + /** * irdma_modify_qp_roce - modify qp request * @ibqp: qp's pointer for modify @@ -1174,7 +1233,8 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, if (attr_mask & IB_QP_AV) { struct irdma_av *av = &iwqp->roce_ah.av; - const struct ib_gid_attr *sgid_attr; + const struct ib_gid_attr *sgid_attr = + attr->ah_attr.grh.sgid_attr; u16 vlan_id = VLAN_N_VID; u32 local_ip[4]; @@ -1189,17 +1249,22 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, roce_info->dest_qp); irdma_qp_rem_qos(&iwqp->sc_qp); dev->ws_remove(iwqp->sc_qp.vsi, ctx_info->user_pri); - ctx_info->user_pri = rt_tos2priority(udp_info->tos); - iwqp->sc_qp.user_pri = ctx_info->user_pri; - if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri)) - return -ENOMEM; - irdma_qp_add_qos(&iwqp->sc_qp); + if (iwqp->sc_qp.vsi->dscp_mode) + ctx_info->user_pri = + iwqp->sc_qp.vsi->dscp_map[irdma_tos2dscp(udp_info->tos)]; + else + ctx_info->user_pri = rt_tos2priority(udp_info->tos); } - sgid_attr = attr->ah_attr.grh.sgid_attr; ret = rdma_read_gid_l2_fields(sgid_attr, &vlan_id, ctx_info->roce_info->mac_addr); if (ret) return ret; + ctx_info->user_pri = irdma_roce_get_vlan_prio(sgid_attr, + ctx_info->user_pri); + if (dev->ws_add(iwqp->sc_qp.vsi, ctx_info->user_pri)) + return -ENOMEM; + iwqp->sc_qp.user_pri = ctx_info->user_pri; + irdma_qp_add_qos(&iwqp->sc_qp); if (vlan_id >= VLAN_N_VID && iwdev->dcb_vlan_mode) vlan_id = 0; @@ -1781,6 +1846,9 @@ static int irdma_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) irdma_process_resize_list(iwcq, iwdev, NULL); spin_unlock_irqrestore(&iwcq->lock, flags); + irdma_cq_rem_ref(ib_cq); + wait_for_completion(&iwcq->free_cq); + irdma_cq_wq_destroy(iwdev->rf, cq); spin_lock_irqsave(&iwceq->ce_lock, flags); @@ -1990,6 +2058,7 @@ static int irdma_create_cq(struct ib_cq *ibcq, cq = &iwcq->sc_cq; cq->back_cq = iwcq; + refcount_set(&iwcq->refcnt, 1); spin_lock_init(&iwcq->lock); INIT_LIST_HEAD(&iwcq->resize_list); INIT_LIST_HEAD(&iwcq->cmpl_generated); @@ -2141,6 +2210,9 @@ static int irdma_create_cq(struct ib_cq *ibcq, goto cq_destroy; } } + rf->cq_table[cq_num] = iwcq; + init_completion(&iwcq->free_cq); + return 0; cq_destroy: irdma_cq_wq_destroy(rf, cq); @@ -2552,7 +2624,8 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev, struct irdma_mr *iwmr) { struct irdma_allocate_stag_info *info; - struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); + struct ib_pd *pd = iwmr->ibmr.pd; + struct irdma_pd *iwpd = to_iwpd(pd); int status; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -2568,6 +2641,7 @@ static int irdma_hw_alloc_stag(struct irdma_device *iwdev, info->stag_idx = iwmr->stag >> IRDMA_CQPSQ_STAG_IDX_S; info->pd_id = iwpd->sc_pd.pd_id; info->total_len = iwmr->len; + info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; info->remote_access = true; cqp_info->cqp_cmd = IRDMA_OP_ALLOC_STAG; cqp_info->post_sq = 1; @@ -2615,6 +2689,8 @@ static struct ib_mr *irdma_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, iwmr->type = IRDMA_MEMREG_TYPE_MEM; palloc = &iwpbl->pble_alloc; iwmr->page_cnt = max_num_sg; + /* Use system PAGE_SIZE as the sg page sizes are unknown at this point */ + iwmr->len = max_num_sg * PAGE_SIZE; err_code = irdma_get_pble(iwdev->rf->pble_rsrc, palloc, iwmr->page_cnt, false); if (err_code) @@ -2694,7 +2770,8 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, { struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_reg_ns_stag_info *stag_info; - struct irdma_pd *iwpd = to_iwpd(iwmr->ibmr.pd); + struct ib_pd *pd = iwmr->ibmr.pd; + struct irdma_pd *iwpd = to_iwpd(pd); struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; struct irdma_cqp_request *cqp_request; struct cqp_cmds_info *cqp_info; @@ -2713,6 +2790,7 @@ static int irdma_hwreg_mr(struct irdma_device *iwdev, struct irdma_mr *iwmr, stag_info->total_len = iwmr->len; stag_info->access_rights = irdma_get_mr_access(access); stag_info->pd_id = iwpd->sc_pd.pd_id; + stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY; if (stag_info->access_rights & IRDMA_ACCESS_FLAGS_ZERO_BASED) stag_info->addr_type = IRDMA_ADDR_TYPE_ZERO_BASED; else @@ -2794,8 +2872,8 @@ static struct irdma_mr *irdma_alloc_iwmr(struct ib_umem *region, enum irdma_memreg_type reg_type) { struct irdma_device *iwdev = to_iwdev(pd->device); - struct irdma_pbl *iwpbl = NULL; - struct irdma_mr *iwmr = NULL; + struct irdma_pbl *iwpbl; + struct irdma_mr *iwmr; unsigned long pgsz_bitmap; iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL); @@ -3476,8 +3554,7 @@ static void irdma_process_cqe(struct ib_wc *entry, set_ib_wc_op_sq(cq_poll_info, entry); } else { set_ib_wc_op_rq(cq_poll_info, entry, - qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ? - true : false); + qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM); if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD && cq_poll_info->stag_invalid_set) { entry->ex.invalidate_rkey = cq_poll_info->inv_stag; @@ -3963,7 +4040,7 @@ static int irdma_attach_mcast(struct ib_qp *ibqp, union ib_gid *ibgid, u16 lid) if (!ipv6_addr_v4mapped((struct in6_addr *)ibgid)) { irdma_copy_ip_ntohl(ip_addr, sgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32); - irdma_netdev_vlan_ipv6(ip_addr, &vlan_id, NULL); + irdma_get_vlan_mac_ipv6(ip_addr, &vlan_id, NULL); ipv4 = false; ibdev_dbg(&iwdev->ibdev, "VERBS: qp_id=%d, IP6address=%pI6\n", ibqp->qp_num, @@ -4261,9 +4338,12 @@ static int irdma_setup_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *attr) ah_info->vlan_tag = 0; if (ah_info->vlan_tag < VLAN_N_VID) { + u8 prio = rt_tos2priority(ah_info->tc_tos); + + prio = irdma_roce_get_vlan_prio(sgid_attr, prio); + + ah_info->vlan_tag |= (u16)prio << VLAN_PRIO_SHIFT; ah_info->insert_vlan_tag = true; - ah_info->vlan_tag |= - rt_tos2priority(ah_info->tc_tos) << VLAN_PRIO_SHIFT; } return 0; @@ -4424,7 +4504,6 @@ static int irdma_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *ah_attr) ah_attr->grh.traffic_class = ah->sc_ah.ah_info.tc_tos; ah_attr->grh.hop_limit = ah->sc_ah.ah_info.hop_ttl; ah_attr->grh.sgid_index = ah->sgid_index; - ah_attr->grh.sgid_index = ah->sgid_index; memcpy(&ah_attr->grh.dgid, &ah->dgid, sizeof(ah_attr->grh.dgid)); } diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index a536e9fa85eb..5d7b983f47a2 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -18,7 +18,8 @@ struct irdma_ucontext { struct list_head qp_reg_mem_list; spinlock_t qp_reg_mem_list_lock; /* protect QP memory list */ int abi_ver; - bool legacy_mode; + u8 legacy_mode : 1; + u8 use_raw_attrs : 1; }; struct irdma_pd { @@ -122,6 +123,8 @@ struct irdma_cq { u32 cq_mem_size; struct irdma_dma_mem kmem; struct irdma_dma_mem kmem_shadow; + struct completion free_cq; + refcount_t refcnt; spinlock_t lock; /* for poll cq */ struct irdma_pbl *iwpbl; struct irdma_pbl *iwpbl_shadow; diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 1f8d0d2c5f17..529db874d67c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -136,7 +136,7 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, continue; if (mlx4_is_bonded(ibdev->dev)) { - struct net_device *upper = NULL; + struct net_device *upper; upper = netdev_master_upper_dev_get_rcu(dev); if (upper) { @@ -261,7 +261,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) int ret = 0; int hw_update = 0; int i; - struct gid_entry *gids = NULL; + struct gid_entry *gids; u16 vlan_id = 0xffff; u8 mac[ETH_ALEN]; @@ -300,8 +300,7 @@ static int mlx4_ib_add_gid(const struct ib_gid_attr *attr, void **context) ret = -ENOMEM; } else { *context = port_gid_table->gids[free].ctx; - memcpy(&port_gid_table->gids[free].gid, - &attr->gid, sizeof(attr->gid)); + port_gid_table->gids[free].gid = attr->gid; port_gid_table->gids[free].gid_type = attr->gid_type; port_gid_table->gids[free].vlan_id = vlan_id; port_gid_table->gids[free].ctx->real_index = free; @@ -352,7 +351,7 @@ static int mlx4_ib_del_gid(const struct ib_gid_attr *attr, void **context) struct mlx4_port_gid_table *port_gid_table; int ret = 0; int hw_update = 0; - struct gid_entry *gids = NULL; + struct gid_entry *gids; if (!rdma_cap_roce_gid_table(attr->device, attr->port_num)) return -EINVAL; @@ -438,8 +437,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, struct ib_udata *uhw) { struct mlx4_ib_dev *dev = to_mdev(ibdev); - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err; int have_ib_ports; struct mlx4_uverbs_ex_query_device cmd; @@ -656,8 +655,8 @@ mlx4_ib_port_link_layer(struct ib_device *device, u32 port_num) static int ib_link_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props, int netw_view) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int ext_active_speed; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; @@ -834,8 +833,8 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u32 port, int __mlx4_ib_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid, int netw_view) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; struct mlx4_ib_dev *dev = to_mdev(ibdev); int clear = 0; @@ -899,8 +898,8 @@ static int mlx4_ib_query_sl2vl(struct ib_device *ibdev, u32 port, u64 *sl2vl_tbl) { union sl2vl_tbl_to_u64 sl2vl64; - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; int jj; @@ -959,8 +958,8 @@ static void mlx4_init_sl2vl_tbl(struct mlx4_ib_dev *mdev) int __mlx4_ib_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey, int netw_view) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; @@ -1975,8 +1974,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) static int init_node_data(struct mlx4_ib_dev *dev) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int mad_ifc_flags = MLX4_MAD_IFC_IGNORE_KEYS; int err = -ENOMEM; @@ -2623,7 +2622,7 @@ static int mlx4_ib_probe(struct auxiliary_device *adev, int num_req_counters; int allocated; u32 counter_index; - struct counter_index *new_counter_index = NULL; + struct counter_index *new_counter_index; pr_info_once("%s", mlx4_ib_version); @@ -2946,7 +2945,7 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, { int err; size_t flow_size; - struct ib_flow_attr *flow = NULL; + struct ib_flow_attr *flow; struct ib_flow_spec_ib *ib_spec; if (is_attach) { @@ -2966,11 +2965,11 @@ int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, err = __mlx4_ib_create_flow(&mqp->ibqp, flow, MLX4_DOMAIN_NIC, MLX4_FS_REGULAR, &mqp->reg_id); - } else { - err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); + kfree(flow); + return err; } - kfree(flow); - return err; + + return __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id); } static void mlx4_ib_remove(struct auxiliary_device *adev) @@ -3019,7 +3018,7 @@ static void mlx4_ib_remove(struct auxiliary_device *adev) static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) { - struct mlx4_ib_demux_work **dm = NULL; + struct mlx4_ib_demux_work **dm; struct mlx4_dev *dev = ibdev->dev; int i; unsigned long flags; diff --git a/drivers/infiniband/hw/mlx5/counters.c b/drivers/infiniband/hw/mlx5/counters.c index 93257fa5aae8..8300ce622835 100644 --- a/drivers/infiniband/hw/mlx5/counters.c +++ b/drivers/infiniband/hw/mlx5/counters.c @@ -27,6 +27,7 @@ static const struct mlx5_ib_counter basic_q_cnts[] = { INIT_Q_COUNTER(rx_write_requests), INIT_Q_COUNTER(rx_read_requests), INIT_Q_COUNTER(rx_atomic_requests), + INIT_Q_COUNTER(rx_dct_connect), INIT_Q_COUNTER(out_of_buffer), }; @@ -46,6 +47,7 @@ static const struct mlx5_ib_counter vport_basic_q_cnts[] = { INIT_VPORT_Q_COUNTER(rx_write_requests), INIT_VPORT_Q_COUNTER(rx_read_requests), INIT_VPORT_Q_COUNTER(rx_atomic_requests), + INIT_VPORT_Q_COUNTER(rx_dct_connect), INIT_VPORT_Q_COUNTER(out_of_buffer), }; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 9c8a7b206dcf..8102ef113b7e 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -308,8 +308,8 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u32 port_num, int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, unsigned int port) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; u16 packet_error; @@ -338,8 +338,8 @@ out: static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, struct ib_smp *out_mad) { - struct ib_smp *in_mad = NULL; - int err = -ENOMEM; + struct ib_smp *in_mad; + int err; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); if (!in_mad) @@ -358,8 +358,8 @@ static int mlx5_query_mad_ifc_smp_attr_node_info(struct ib_device *ibdev, int mlx5_query_mad_ifc_system_image_guid(struct ib_device *ibdev, __be64 *sys_image_guid) { - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct ib_smp *out_mad; + int err; out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!out_mad) @@ -380,8 +380,8 @@ out: int mlx5_query_mad_ifc_max_pkeys(struct ib_device *ibdev, u16 *max_pkeys) { - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct ib_smp *out_mad; + int err; out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!out_mad) @@ -402,8 +402,8 @@ out: int mlx5_query_mad_ifc_vendor_id(struct ib_device *ibdev, u32 *vendor_id) { - struct ib_smp *out_mad = NULL; - int err = -ENOMEM; + struct ib_smp *out_mad; + int err; out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL); if (!out_mad) @@ -423,8 +423,8 @@ out: int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); @@ -448,8 +448,8 @@ out: int mlx5_query_mad_ifc_node_guid(struct mlx5_ib_dev *dev, __be64 *node_guid) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); @@ -474,8 +474,8 @@ out: int mlx5_query_mad_ifc_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); @@ -503,8 +503,8 @@ out: int mlx5_query_mad_ifc_gids(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof(*in_mad), GFP_KERNEL); @@ -545,8 +545,8 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u32 port, { struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int ext_active_speed; int err = -ENOMEM; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 2017ede100a6..3e345ef380f1 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1235,7 +1235,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, struct ib_umem *umem, } /* The pg_access bit allows setting the access flags - * in the page list submitted with the command. */ + * in the page list submitted with the command. + */ MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); @@ -1766,6 +1767,11 @@ mlx5_alloc_priv_descs(struct ib_device *device, int ret; add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); + if (is_power_of_2(MLX5_UMR_ALIGN) && add_size) { + int end = max_t(int, MLX5_UMR_ALIGN, roundup_pow_of_two(size)); + + add_size = min_t(int, end - size, add_size); + } mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); if (!mr->descs_alloc) diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index c46df53f26cf..e1325f2927d6 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -53,8 +53,8 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props, struct ib_udata *uhw) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; struct mthca_dev *mdev = to_mdev(ibdev); @@ -121,8 +121,8 @@ static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *pr static int mthca_query_port(struct ib_device *ibdev, u32 port, struct ib_port_attr *props) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -217,8 +217,8 @@ out: static int mthca_query_pkey(struct ib_device *ibdev, u32 port, u16 index, u16 *pkey) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -246,8 +246,8 @@ static int mthca_query_pkey(struct ib_device *ibdev, static int mthca_query_gid(struct ib_device *ibdev, u32 port, int index, union ib_gid *gid) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); @@ -989,8 +989,8 @@ static const struct attribute_group mthca_attr_group = { static int mthca_init_node_data(struct mthca_dev *dev) { - struct ib_smp *in_mad = NULL; - struct ib_smp *out_mad = NULL; + struct ib_smp *in_mad; + struct ib_smp *out_mad; int err = -ENOMEM; in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL); diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index 58f994341e9a..c849fdbd4c99 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -1277,7 +1277,7 @@ static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp, qp->sq.max_sges = attrs->cap.max_send_sge; qp->rq.max_sges = attrs->cap.max_recv_sge; qp->state = OCRDMA_QPS_RST; - qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; + qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; } static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev, diff --git a/drivers/infiniband/hw/qedr/qedr_roce_cm.c b/drivers/infiniband/hw/qedr/qedr_roce_cm.c index 05307c1488b8..859f66a51bd2 100644 --- a/drivers/infiniband/hw/qedr/qedr_roce_cm.c +++ b/drivers/infiniband/hw/qedr/qedr_roce_cm.c @@ -354,7 +354,6 @@ int qedr_create_gsi_qp(struct qedr_dev *dev, struct ib_qp_init_attr *attrs, /* the GSI CQ is handled by the driver so remove it from the FW */ qedr_destroy_gsi_cq(dev, attrs); dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI; - dev->gsi_rqcq->cq_type = QEDR_CQ_TYPE_GSI; DP_DEBUG(dev, QEDR_MSG_GSI, "created GSI QP %p\n", qp); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index d745ce9dc88a..7887a6786ed4 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -1358,7 +1358,7 @@ static void qedr_set_common_qp_params(struct qedr_dev *dev, qp->prev_wqe_size = 0; - qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false; + qp->signaled = attrs->sq_sig_type == IB_SIGNAL_ALL_WR; qp->dev = dev; if (qedr_qp_has_sq(qp)) { qedr_reset_qp_hwq_info(&qp->sq); diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c index ef85bc8d9384..152952127f13 100644 --- a/drivers/infiniband/hw/qib/qib_file_ops.c +++ b/drivers/infiniband/hw/qib/qib_file_ops.c @@ -2250,7 +2250,9 @@ static ssize_t qib_write_iter(struct kiocb *iocb, struct iov_iter *from) return qib_user_sdma_writev(rcd, pq, iter_iov(from), from->nr_segs); } -static struct class *qib_class; +static const struct class qib_class = { + .name = "ipath", +}; static dev_t qib_dev; int qib_cdev_init(int minor, const char *name, @@ -2281,7 +2283,7 @@ int qib_cdev_init(int minor, const char *name, goto err_cdev; } - device = device_create(qib_class, NULL, dev, NULL, "%s", name); + device = device_create(&qib_class, NULL, dev, NULL, "%s", name); if (!IS_ERR(device)) goto done; ret = PTR_ERR(device); @@ -2325,9 +2327,8 @@ int __init qib_dev_init(void) goto done; } - qib_class = class_create("ipath"); - if (IS_ERR(qib_class)) { - ret = PTR_ERR(qib_class); + ret = class_register(&qib_class); + if (ret) { pr_err("Could not create device class (err %d)\n", -ret); unregister_chrdev_region(qib_dev, QIB_NMINORS); } @@ -2338,10 +2339,8 @@ done: void qib_dev_cleanup(void) { - if (qib_class) { - class_destroy(qib_class); - qib_class = NULL; - } + if (class_is_registered(&qib_class)) + class_unregister(&qib_class); unregister_chrdev_region(qib_dev, QIB_NMINORS); } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index 5111735aafae..d0bdc2d8adc8 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -597,6 +597,10 @@ static void flush_send_queue(struct rxe_qp *qp, bool notify) struct rxe_queue *q = qp->sq.queue; int err; + /* send queue never got created. nothing to do. */ + if (!qp->sq.queue) + return; + while ((wqe = queue_head(q, q->type))) { if (notify) { err = flush_send_wqe(qp, wqe); diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index 666e06a82bc9..4d2a8ef52c85 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -136,12 +136,6 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } -static inline int rcv_wqe_size(int max_sge) -{ - return sizeof(struct rxe_recv_wqe) + - max_sge * sizeof(struct ib_sge); -} - void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a569b111a9d2..28e379c108bc 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -183,13 +183,63 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, atomic_set(&qp->skb_out, 0); } +static int rxe_init_sq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->sq.max_wr = init->cap.max_send_wr; + wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), + init->cap.max_inline_data); + qp->sq.max_sge = wqe_size / sizeof(struct ib_sge); + qp->sq.max_inline = wqe_size; + wqe_size += sizeof(struct rxe_send_wqe); + + qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->sq.queue) { + rxe_err_qp(qp, "Unable to allocate send queue"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap send queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, + qp->sq.queue->buf, qp->sq.queue->buf_size, + &qp->sq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_send_wr = qp->sq.max_wr; + init->cap.max_send_sge = qp->sq.max_sge; + init->cap.max_inline_data = qp->sq.max_inline; + + return 0; + +err_free: + vfree(qp->sq.queue->buf); + kfree(qp->sq.queue); + qp->sq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->req_pkts); err = sock_create_kern(&init_net, AF_INET, SOCK_DGRAM, 0, &qp->sk); if (err < 0) @@ -204,32 +254,10 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, * (0xc000 - 0xffff). */ qp->src_port = RXE_ROCE_V2_SPORT + (hash_32(qp_num(qp), 14) & 0x3fff); - qp->sq.max_wr = init->cap.max_send_wr; - - /* These caps are limited by rxe_qp_chk_cap() done by the caller */ - wqe_size = max_t(int, init->cap.max_send_sge * sizeof(struct ib_sge), - init->cap.max_inline_data); - qp->sq.max_sge = init->cap.max_send_sge = - wqe_size / sizeof(struct ib_sge); - qp->sq.max_inline = init->cap.max_inline_data = wqe_size; - wqe_size += sizeof(struct rxe_send_wqe); - type = QUEUE_TYPE_FROM_CLIENT; - qp->sq.queue = rxe_queue_init(rxe, &qp->sq.max_wr, - wqe_size, type); - if (!qp->sq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->sq_mi : NULL, udata, - qp->sq.queue->buf, qp->sq.queue->buf_size, - &qp->sq.queue->ip); - - if (err) { - vfree(qp->sq.queue->buf); - kfree(qp->sq.queue); - qp->sq.queue = NULL; + err = rxe_init_sq(qp, init, udata, uresp); + if (err) return err; - } qp->req.wqe_index = queue_get_producer(qp->sq.queue, QUEUE_TYPE_FROM_CLIENT); @@ -248,36 +276,65 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, return 0; } +static int rxe_init_rq(struct rxe_qp *qp, struct ib_qp_init_attr *init, + struct ib_udata *udata, + struct rxe_create_qp_resp __user *uresp) +{ + struct rxe_dev *rxe = to_rdev(qp->ibqp.device); + int wqe_size; + int err; + + qp->rq.max_wr = init->cap.max_recv_wr; + qp->rq.max_sge = init->cap.max_recv_sge; + wqe_size = sizeof(struct rxe_recv_wqe) + + qp->rq.max_sge*sizeof(struct ib_sge); + + qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); + if (!qp->rq.queue) { + rxe_err_qp(qp, "Unable to allocate recv queue"); + err = -ENOMEM; + goto err_out; + } + + /* prepare info for caller to mmap recv queue if user space qp */ + err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, + qp->rq.queue->buf, qp->rq.queue->buf_size, + &qp->rq.queue->ip); + if (err) { + rxe_err_qp(qp, "do_mmap_info failed, err = %d", err); + goto err_free; + } + + /* return actual capabilities to caller which may be larger + * than requested + */ + init->cap.max_recv_wr = qp->rq.max_wr; + + return 0; + +err_free: + vfree(qp->rq.queue->buf); + kfree(qp->rq.queue); + qp->rq.queue = NULL; +err_out: + return err; +} + static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, struct ib_qp_init_attr *init, struct ib_udata *udata, struct rxe_create_qp_resp __user *uresp) { int err; - int wqe_size; - enum queue_type type; + + /* if we don't finish qp create make sure queue is valid */ + skb_queue_head_init(&qp->resp_pkts); if (!qp->srq) { - qp->rq.max_wr = init->cap.max_recv_wr; - qp->rq.max_sge = init->cap.max_recv_sge; - - wqe_size = rcv_wqe_size(qp->rq.max_sge); - - type = QUEUE_TYPE_FROM_CLIENT; - qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, - wqe_size, type); - if (!qp->rq.queue) - return -ENOMEM; - - err = do_mmap_info(rxe, uresp ? &uresp->rq_mi : NULL, udata, - qp->rq.queue->buf, qp->rq.queue->buf_size, - &qp->rq.queue->ip); - if (err) { - vfree(qp->rq.queue->buf); - kfree(qp->rq.queue); - qp->rq.queue = NULL; + err = rxe_init_rq(qp, init, udata, uresp); + if (err) return err; - } } rxe_init_task(&qp->resp.task, qp, rxe_responder); @@ -307,10 +364,10 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd, if (srq) rxe_get(srq); - qp->pd = pd; - qp->rcq = rcq; - qp->scq = scq; - qp->srq = srq; + qp->pd = pd; + qp->rcq = rcq; + qp->scq = scq; + qp->srq = srq; atomic_inc(&rcq->num_wq); atomic_inc(&scq->num_wq); diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index 2171f19494bc..d8c41fd626a9 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -578,10 +578,11 @@ static void save_state(struct rxe_send_wqe *wqe, struct rxe_send_wqe *rollback_wqe, u32 *rollback_psn) { - rollback_wqe->state = wqe->state; + rollback_wqe->state = wqe->state; rollback_wqe->first_psn = wqe->first_psn; - rollback_wqe->last_psn = wqe->last_psn; - *rollback_psn = qp->req.psn; + rollback_wqe->last_psn = wqe->last_psn; + rollback_wqe->dma = wqe->dma; + *rollback_psn = qp->req.psn; } static void rollback_state(struct rxe_send_wqe *wqe, @@ -589,10 +590,11 @@ static void rollback_state(struct rxe_send_wqe *wqe, struct rxe_send_wqe *rollback_wqe, u32 rollback_psn) { - wqe->state = rollback_wqe->state; + wqe->state = rollback_wqe->state; wqe->first_psn = rollback_wqe->first_psn; - wqe->last_psn = rollback_wqe->last_psn; - qp->req.psn = rollback_psn; + wqe->last_psn = rollback_wqe->last_psn; + wqe->dma = rollback_wqe->dma; + qp->req.psn = rollback_psn; } static void update_state(struct rxe_qp *qp, struct rxe_pkt_info *pkt) @@ -797,6 +799,9 @@ int rxe_requester(struct rxe_qp *qp) pkt.mask = rxe_opcode[opcode].mask; pkt.wqe = wqe; + /* save wqe state before we build and send packet */ + save_state(wqe, qp, &rollback_wqe, &rollback_psn); + av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { rxe_dbg_qp(qp, "Failed no address vector\n"); @@ -829,29 +834,29 @@ int rxe_requester(struct rxe_qp *qp) if (ah) rxe_put(ah); - /* - * To prevent a race on wqe access between requester and completer, - * wqe members state and psn need to be set before calling - * rxe_xmit_packet(). - * Otherwise, completer might initiate an unjustified retry flow. - */ - save_state(wqe, qp, &rollback_wqe, &rollback_psn); + /* update wqe state as though we had sent it */ update_wqe_state(qp, wqe, &pkt); update_wqe_psn(qp, wqe, &pkt, payload); err = rxe_xmit_packet(qp, &pkt, skb); if (err) { - qp->need_req_skb = 1; + if (err != -EAGAIN) { + wqe->status = IB_WC_LOC_QP_OP_ERR; + goto err; + } + /* the packet was dropped so reset wqe to the state + * before we sent it so we can try to resend + */ rollback_state(wqe, qp, &rollback_wqe, rollback_psn); - if (err == -EAGAIN) { - rxe_sched_task(&qp->req.task); - goto exit; - } + /* force a delay until the dropped packet is freed and + * the send queue is drained below the low water mark + */ + qp->need_req_skb = 1; - wqe->status = IB_WC_LOC_QP_OP_ERR; - goto err; + rxe_sched_task(&qp->req.task); + goto exit; } update_state(qp, &pkt); diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 64c64f5f36a8..da470a925efc 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -1469,6 +1469,10 @@ static void flush_recv_queue(struct rxe_qp *qp, bool notify) return; } + /* recv queue not created. nothing to do. */ + if (!qp->rq.queue) + return; + while ((wqe = queue_head(q, q->type))) { if (notify) { err = flush_recv_wqe(qp, wqe); diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 27ca82ec0826..3661cb627d28 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -45,40 +45,41 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_init_attr *init, struct ib_udata *udata, struct rxe_create_srq_resp __user *uresp) { - int err; - int srq_wqe_size; struct rxe_queue *q; - enum queue_type type; + int wqe_size; + int err; - srq->ibsrq.event_handler = init->event_handler; - srq->ibsrq.srq_context = init->srq_context; - srq->limit = init->attr.srq_limit; - srq->srq_num = srq->elem.index; - srq->rq.max_wr = init->attr.max_wr; - srq->rq.max_sge = init->attr.max_sge; + srq->ibsrq.event_handler = init->event_handler; + srq->ibsrq.srq_context = init->srq_context; + srq->limit = init->attr.srq_limit; + srq->srq_num = srq->elem.index; + srq->rq.max_wr = init->attr.max_wr; + srq->rq.max_sge = init->attr.max_sge; - srq_wqe_size = rcv_wqe_size(srq->rq.max_sge); + wqe_size = sizeof(struct rxe_recv_wqe) + + srq->rq.max_sge*sizeof(struct ib_sge); spin_lock_init(&srq->rq.producer_lock); spin_lock_init(&srq->rq.consumer_lock); - type = QUEUE_TYPE_FROM_CLIENT; - q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); + q = rxe_queue_init(rxe, &srq->rq.max_wr, wqe_size, + QUEUE_TYPE_FROM_CLIENT); if (!q) { rxe_dbg_srq(srq, "Unable to allocate queue\n"); - return -ENOMEM; + err = -ENOMEM; + goto err_out; } - srq->rq.queue = q; - err = do_mmap_info(rxe, uresp ? &uresp->mi : NULL, udata, q->buf, q->buf_size, &q->ip); if (err) { - vfree(q->buf); - kfree(q); - return err; + rxe_dbg_srq(srq, "Unable to init mmap info for caller\n"); + goto err_free; } + srq->rq.queue = q; + init->attr.max_wr = srq->rq.max_wr; + if (uresp) { if (copy_to_user(&uresp->srq_num, &srq->srq_num, sizeof(uresp->srq_num))) { @@ -88,6 +89,12 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, } return 0; + +err_free: + vfree(q->buf); + kfree(q); +err_out: + return err; } int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, @@ -145,9 +152,10 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct rxe_modify_srq_cmd *ucmd, struct ib_udata *udata) { - int err; struct rxe_queue *q = srq->rq.queue; struct mminfo __user *mi = NULL; + int wqe_size; + int err; if (mask & IB_SRQ_MAX_WR) { /* @@ -156,12 +164,16 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, */ mi = u64_to_user_ptr(ucmd->mmap_info_addr); - err = rxe_queue_resize(q, &attr->max_wr, - rcv_wqe_size(srq->rq.max_sge), udata, mi, - &srq->rq.producer_lock, + wqe_size = sizeof(struct rxe_recv_wqe) + + srq->rq.max_sge*sizeof(struct ib_sge); + + err = rxe_queue_resize(q, &attr->max_wr, wqe_size, + udata, mi, &srq->rq.producer_lock, &srq->rq.consumer_lock); if (err) - goto err2; + goto err_free; + + srq->rq.max_wr = attr->max_wr; } if (mask & IB_SRQ_LIMIT) @@ -169,7 +181,7 @@ int rxe_srq_from_attr(struct rxe_dev *rxe, struct rxe_srq *srq, return 0; -err2: +err_free: rxe_queue_cleanup(q); srq->rq.queue = NULL; return err; diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 903f0b71447e..48f86839d36a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -798,7 +798,6 @@ static int init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, rxe_err_qp(qp, "unsupported wr opcode %d", wr->opcode); return -EINVAL; - break; } } diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index 2f3a9cda3850..58dddb143b9f 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -74,6 +74,7 @@ struct siw_device { u32 vendor_part_id; int numa_node; + char raw_gid[ETH_ALEN]; /* physical port state (only one port per device) */ enum ib_port_state state; @@ -530,11 +531,12 @@ void siw_qp_llp_data_ready(struct sock *sk); void siw_qp_llp_write_space(struct sock *sk); /* QP TX path functions */ +int siw_create_tx_threads(void); +void siw_stop_tx_threads(void); int siw_run_sq(void *arg); int siw_qp_sq_process(struct siw_qp *qp); int siw_sq_start(struct siw_qp *qp); int siw_activate_tx(struct siw_qp *qp); -void siw_stop_tx_thread(int nr_cpu); int siw_get_tx_cpu(struct siw_device *sdev); void siw_put_tx_cpu(int cpu); diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c index da530c0404da..a2605178f4ed 100644 --- a/drivers/infiniband/sw/siw/siw_cm.c +++ b/drivers/infiniband/sw/siw/siw_cm.c @@ -1501,7 +1501,6 @@ error: cep->cm_id = NULL; id->rem_ref(id); - siw_cep_put(cep); qp->cep = NULL; siw_cep_put(cep); diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index 65b5cda5457b..d4b6e0106851 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -75,8 +75,7 @@ static int siw_device_register(struct siw_device *sdev, const char *name) return rv; } - siw_dbg(base_dev, "HWaddr=%pM\n", sdev->netdev->dev_addr); - + siw_dbg(base_dev, "HWaddr=%pM\n", sdev->raw_gid); return 0; } @@ -88,29 +87,6 @@ static void siw_device_cleanup(struct ib_device *base_dev) xa_destroy(&sdev->mem_xa); } -static int siw_create_tx_threads(void) -{ - int cpu, assigned = 0; - - for_each_online_cpu(cpu) { - /* Skip HT cores */ - if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) - continue; - - siw_tx_thread[cpu] = - kthread_run_on_cpu(siw_run_sq, - (unsigned long *)(long)cpu, - cpu, "siw_tx/%u"); - if (IS_ERR(siw_tx_thread[cpu])) { - siw_tx_thread[cpu] = NULL; - continue; - } - - assigned++; - } - return assigned; -} - static int siw_dev_qualified(struct net_device *netdev) { /* @@ -313,24 +289,19 @@ static struct siw_device *siw_device_create(struct net_device *netdev) return NULL; base_dev = &sdev->base_dev; - sdev->netdev = netdev; - if (netdev->type != ARPHRD_LOOPBACK && netdev->type != ARPHRD_NONE) { - addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, - netdev->dev_addr); + if (netdev->addr_len) { + memcpy(sdev->raw_gid, netdev->dev_addr, + min_t(unsigned int, netdev->addr_len, ETH_ALEN)); } else { /* - * This device does not have a HW address, - * but connection mangagement lib expects gid != 0 + * This device does not have a HW address, but + * connection mangagement requires a unique gid. */ - size_t len = min_t(size_t, strlen(base_dev->name), 6); - char addr[6] = { }; - - memcpy(addr, base_dev->name, len); - addrconf_addr_eui48((unsigned char *)&base_dev->node_guid, - addr); + eth_random_addr(sdev->raw_gid); } + addrconf_addr_eui48((u8 *)&base_dev->node_guid, sdev->raw_gid); base_dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND); @@ -535,7 +506,6 @@ static struct rdma_link_ops siw_link_ops = { static __init int siw_init_module(void) { int rv; - int nr_cpu; if (SENDPAGE_THRESH < SIW_MAX_INLINE) { pr_info("siw: sendpage threshold too small: %u\n", @@ -580,12 +550,8 @@ static __init int siw_init_module(void) return 0; out_error: - for (nr_cpu = 0; nr_cpu < nr_cpu_ids; nr_cpu++) { - if (siw_tx_thread[nr_cpu]) { - siw_stop_tx_thread(nr_cpu); - siw_tx_thread[nr_cpu] = NULL; - } - } + siw_stop_tx_threads(); + if (siw_crypto_shash) crypto_free_shash(siw_crypto_shash); @@ -599,14 +565,8 @@ out_error: static void __exit siw_exit_module(void) { - int cpu; + siw_stop_tx_threads(); - for_each_possible_cpu(cpu) { - if (siw_tx_thread[cpu]) { - siw_stop_tx_thread(cpu); - siw_tx_thread[cpu] = NULL; - } - } unregister_netdevice_notifier(&siw_netdev_nb); rdma_link_unregister(&siw_link_ops); ib_unregister_driver(RDMA_DRIVER_SIW); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 81e9bbd9ebda..47d0197db9a1 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -204,7 +204,7 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { if (irq_size) { irq_size = roundup_pow_of_two(irq_size); - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + qp->irq = vcalloc(irq_size, sizeof(struct siw_sqe)); if (!qp->irq) { qp->attrs.irq_size = 0; return -ENOMEM; @@ -212,7 +212,7 @@ static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) } if (orq_size) { orq_size = roundup_pow_of_two(orq_size); - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + qp->orq = vcalloc(orq_size, sizeof(struct siw_sqe)); if (!qp->orq) { qp->attrs.orq_size = 0; qp->attrs.irq_size = 0; diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index 7c7a51d36d0c..60b6a4135961 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1208,10 +1208,45 @@ struct tx_task_t { static DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g); -void siw_stop_tx_thread(int nr_cpu) +int siw_create_tx_threads(void) { - kthread_stop(siw_tx_thread[nr_cpu]); - wake_up(&per_cpu(siw_tx_task_g, nr_cpu).waiting); + int cpu, assigned = 0; + + for_each_online_cpu(cpu) { + struct tx_task_t *tx_task; + + /* Skip HT cores */ + if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) + continue; + + tx_task = &per_cpu(siw_tx_task_g, cpu); + init_llist_head(&tx_task->active); + init_waitqueue_head(&tx_task->waiting); + + siw_tx_thread[cpu] = + kthread_run_on_cpu(siw_run_sq, + (unsigned long *)(long)cpu, + cpu, "siw_tx/%u"); + if (IS_ERR(siw_tx_thread[cpu])) { + siw_tx_thread[cpu] = NULL; + continue; + } + assigned++; + } + return assigned; +} + +void siw_stop_tx_threads(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + if (siw_tx_thread[cpu]) { + kthread_stop(siw_tx_thread[cpu]); + wake_up(&per_cpu(siw_tx_task_g, cpu).waiting); + siw_tx_thread[cpu] = NULL; + } + } } int siw_run_sq(void *data) @@ -1221,9 +1256,6 @@ int siw_run_sq(void *data) struct siw_qp *qp; struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu); - init_llist_head(&tx_task->active); - init_waitqueue_head(&tx_task->waiting); - while (1) { struct llist_node *fifo_list = NULL; @@ -1239,13 +1271,7 @@ int siw_run_sq(void *data) * llist_del_all returns a list with newest entry first. * Re-order list for fairness among QP's. */ - while (active) { - struct llist_node *tmp = active; - - active = llist_next(active); - tmp->next = fifo_list; - fifo_list = tmp; - } + fifo_list = llist_reverse_order(active); while (fifo_list) { qp = container_of(fifo_list, struct siw_qp, tx_list); fifo_list = llist_next(fifo_list); diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 398ec13db624..fdbef3254e30 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -157,7 +157,7 @@ int siw_query_device(struct ib_device *base_dev, struct ib_device_attr *attr, attr->vendor_part_id = sdev->vendor_part_id; addrconf_addr_eui48((u8 *)&attr->sys_image_guid, - sdev->netdev->dev_addr); + sdev->raw_gid); return 0; } @@ -218,7 +218,7 @@ int siw_query_gid(struct ib_device *base_dev, u32 port, int idx, /* subnet_prefix == interface_id == 0; */ memset(gid, 0, sizeof(*gid)); - memcpy(&gid->raw[0], sdev->netdev->dev_addr, 6); + memcpy(gid->raw, sdev->raw_gid, ETH_ALEN); return 0; } @@ -381,7 +381,7 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); else - qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); + qp->sendq = vcalloc(num_sqe, sizeof(struct siw_sqe)); if (qp->sendq == NULL) { rv = -ENOMEM; @@ -414,7 +414,7 @@ int siw_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, qp->recvq = vmalloc_user(num_rqe * sizeof(struct siw_rqe)); else - qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); + qp->recvq = vcalloc(num_rqe, sizeof(struct siw_rqe)); if (qp->recvq == NULL) { rv = -ENOMEM; @@ -1494,7 +1494,7 @@ int siw_map_mr_sg(struct ib_mr *base_mr, struct scatterlist *sl, int num_sle, if (pbl->max_buf < num_sle) { siw_dbg_mem(mem, "too many SGE's: %d > %d\n", - mem->pbl->max_buf, num_sle); + num_sle, pbl->max_buf); return -ENOMEM; } for_each_sg(sl, slp, num_sle, i) { @@ -1624,7 +1624,7 @@ int siw_create_srq(struct ib_srq *base_srq, srq->recvq = vmalloc_user(srq->num_rqe * sizeof(struct siw_rqe)); else - srq->recvq = vzalloc(srq->num_rqe * sizeof(struct siw_rqe)); + srq->recvq = vcalloc(srq->num_rqe, sizeof(struct siw_rqe)); if (srq->recvq == NULL) { rv = -ENOMEM; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 92e1e7587af8..00a7303c8cc6 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2570,6 +2570,8 @@ static void isert_wait_conn(struct iscsit_conn *conn) isert_put_unsol_pending_cmds(conn); isert_wait4cmds(conn); isert_wait4logout(isert_conn); + + queue_work(isert_release_wq, &isert_conn->release_work); } static void isert_free_conn(struct iscsit_conn *conn) diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index b32941dd67cb..b6ee801fd0ff 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -45,7 +45,9 @@ static struct rtrs_rdma_dev_pd dev_pd = { }; static struct workqueue_struct *rtrs_wq; -static struct class *rtrs_clt_dev_class; +static const struct class rtrs_clt_dev_class = { + .name = "rtrs-client", +}; static inline bool rtrs_clt_is_connected(const struct rtrs_clt_sess *clt) { @@ -2698,7 +2700,7 @@ static struct rtrs_clt_sess *alloc_clt(const char *sessname, size_t paths_num, return ERR_PTR(-ENOMEM); } - clt->dev.class = rtrs_clt_dev_class; + clt->dev.class = &rtrs_clt_dev_class; clt->dev.release = rtrs_clt_dev_release; uuid_gen(&clt->paths_uuid); INIT_LIST_HEAD_RCU(&clt->paths_list); @@ -3151,16 +3153,17 @@ static const struct rtrs_rdma_dev_pd_ops dev_pd_ops = { static int __init rtrs_client_init(void) { - rtrs_rdma_dev_pd_init(0, &dev_pd); + int ret = 0; - rtrs_clt_dev_class = class_create("rtrs-client"); - if (IS_ERR(rtrs_clt_dev_class)) { + rtrs_rdma_dev_pd_init(0, &dev_pd); + ret = class_register(&rtrs_clt_dev_class); + if (ret) { pr_err("Failed to create rtrs-client dev class\n"); - return PTR_ERR(rtrs_clt_dev_class); + return ret; } rtrs_wq = alloc_workqueue("rtrs_client_wq", 0, 0); if (!rtrs_wq) { - class_destroy(rtrs_clt_dev_class); + class_unregister(&rtrs_clt_dev_class); return -ENOMEM; } @@ -3170,7 +3173,7 @@ static int __init rtrs_client_init(void) static void __exit rtrs_client_exit(void) { destroy_workqueue(rtrs_wq); - class_destroy(rtrs_clt_dev_class); + class_unregister(&rtrs_clt_dev_class); rtrs_rdma_dev_pd_deinit(&dev_pd); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 5adba0f754b6..3f305e694fe8 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -164,7 +164,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_path *srv_pat */ goto unlock; } - srv->dev.class = rtrs_dev_class; + srv->dev.class = &rtrs_dev_class; err = dev_set_name(&srv->dev, "%s", srv_path->s.sessname); if (err) goto unlock; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index c38901e2c8f4..75e56604e462 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -27,7 +27,9 @@ MODULE_LICENSE("GPL"); #define MAX_HDR_SIZE PAGE_SIZE static struct rtrs_rdma_dev_pd dev_pd; -struct class *rtrs_dev_class; +const struct class rtrs_dev_class = { + .name = "rtrs-server", +}; static struct rtrs_srv_ib_ctx ib_ctx; static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE; @@ -2253,11 +2255,10 @@ static int __init rtrs_server_init(void) err); return err; } - rtrs_dev_class = class_create("rtrs-server"); - if (IS_ERR(rtrs_dev_class)) { - err = PTR_ERR(rtrs_dev_class); + err = class_register(&rtrs_dev_class); + if (err) goto out_err; - } + rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); if (!rtrs_wq) { err = -ENOMEM; @@ -2267,7 +2268,7 @@ static int __init rtrs_server_init(void) return 0; out_dev_class: - class_destroy(rtrs_dev_class); + class_unregister(&rtrs_dev_class); out_err: return err; } @@ -2275,7 +2276,7 @@ out_err: static void __exit rtrs_server_exit(void) { destroy_workqueue(rtrs_wq); - class_destroy(rtrs_dev_class); + class_unregister(&rtrs_dev_class); rtrs_rdma_dev_pd_deinit(&dev_pd); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.h b/drivers/infiniband/ulp/rtrs/rtrs-srv.h index 2f8a638e36fa..5e325b82ff33 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.h @@ -129,7 +129,7 @@ struct rtrs_srv_ib_ctx { int ib_dev_count; }; -extern struct class *rtrs_dev_class; +extern const struct class rtrs_dev_class; void close_path(struct rtrs_srv_path *srv_path); diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 0c35018239ce..e2857109e966 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -12,13 +12,14 @@ #include "amd_iommu_types.h" irqreturn_t amd_iommu_int_thread(int irq, void *data); +irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data); +irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data); +irqreturn_t amd_iommu_int_thread_galog(int irq, void *data); irqreturn_t amd_iommu_int_handler(int irq, void *data); void amd_iommu_apply_erratum_63(struct amd_iommu *iommu, u16 devid); void amd_iommu_restart_event_logging(struct amd_iommu *iommu); void amd_iommu_restart_ga_log(struct amd_iommu *iommu); -int amd_iommu_init_devices(void); -void amd_iommu_uninit_devices(void); -void amd_iommu_init_notifier(void); +void amd_iommu_restart_ppr_log(struct amd_iommu *iommu); void amd_iommu_set_rlookup_table(struct amd_iommu *iommu, u16 devid); #ifdef CONFIG_AMD_IOMMU_DEBUGFS diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index dc1db6167927..7dc30c2b56b3 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -120,10 +120,13 @@ #define PASID_MASK 0x0000ffff /* MMIO status bits */ -#define MMIO_STATUS_EVT_OVERFLOW_INT_MASK BIT(0) +#define MMIO_STATUS_EVT_OVERFLOW_MASK BIT(0) #define MMIO_STATUS_EVT_INT_MASK BIT(1) #define MMIO_STATUS_COM_WAIT_INT_MASK BIT(2) +#define MMIO_STATUS_EVT_RUN_MASK BIT(3) +#define MMIO_STATUS_PPR_OVERFLOW_MASK BIT(5) #define MMIO_STATUS_PPR_INT_MASK BIT(6) +#define MMIO_STATUS_PPR_RUN_MASK BIT(7) #define MMIO_STATUS_GALOG_RUN_MASK BIT(8) #define MMIO_STATUS_GALOG_OVERFLOW_MASK BIT(9) #define MMIO_STATUS_GALOG_INT_MASK BIT(10) @@ -381,15 +384,15 @@ */ #define DTE_FLAG_V BIT_ULL(0) #define DTE_FLAG_TV BIT_ULL(1) +#define DTE_FLAG_GIOV BIT_ULL(54) +#define DTE_FLAG_GV BIT_ULL(55) +#define DTE_GLX_SHIFT (56) +#define DTE_GLX_MASK (3) #define DTE_FLAG_IR BIT_ULL(61) #define DTE_FLAG_IW BIT_ULL(62) #define DTE_FLAG_IOTLB BIT_ULL(32) -#define DTE_FLAG_GIOV BIT_ULL(54) -#define DTE_FLAG_GV BIT_ULL(55) #define DTE_FLAG_MASK (0x3ffULL << 32) -#define DTE_GLX_SHIFT (56) -#define DTE_GLX_MASK (3) #define DEV_DOMID_MASK 0xffffULL #define DTE_GCR3_VAL_A(x) (((x) >> 12) & 0x00007ULL) @@ -702,12 +705,21 @@ struct amd_iommu { /* event buffer virtual address */ u8 *evt_buf; + /* Name for event log interrupt */ + unsigned char evt_irq_name[16]; + /* Base of the PPR log, if present */ u8 *ppr_log; + /* Name for PPR log interrupt */ + unsigned char ppr_irq_name[16]; + /* Base of the GA log, if present */ u8 *ga_log; + /* Name for GA log interrupt */ + unsigned char ga_irq_name[16]; + /* Tail of the GA log, if present */ u8 *ga_log_tail; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index ea0f1ab94178..45efb7e5d725 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -483,6 +483,10 @@ static void iommu_disable(struct amd_iommu *iommu) iommu_feature_disable(iommu, CONTROL_GALOG_EN); iommu_feature_disable(iommu, CONTROL_GAINT_EN); + /* Disable IOMMU PPR logging */ + iommu_feature_disable(iommu, CONTROL_PPRLOG_EN); + iommu_feature_disable(iommu, CONTROL_PPRINT_EN); + /* Disable IOMMU hardware itself */ iommu_feature_disable(iommu, CONTROL_IOMMU_EN); @@ -753,37 +757,61 @@ static int __init alloc_command_buffer(struct amd_iommu *iommu) } /* + * Interrupt handler has processed all pending events and adjusted head + * and tail pointer. Reset overflow mask and restart logging again. + */ +static void amd_iommu_restart_log(struct amd_iommu *iommu, const char *evt_type, + u8 cntrl_intr, u8 cntrl_log, + u32 status_run_mask, u32 status_overflow_mask) +{ + u32 status; + + status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + if (status & status_run_mask) + return; + + pr_info_ratelimited("IOMMU %s log restarting\n", evt_type); + + iommu_feature_disable(iommu, cntrl_log); + iommu_feature_disable(iommu, cntrl_intr); + + writel(status_overflow_mask, iommu->mmio_base + MMIO_STATUS_OFFSET); + + iommu_feature_enable(iommu, cntrl_intr); + iommu_feature_enable(iommu, cntrl_log); +} + +/* * This function restarts event logging in case the IOMMU experienced * an event log buffer overflow. */ void amd_iommu_restart_event_logging(struct amd_iommu *iommu) { - iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); - iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); + amd_iommu_restart_log(iommu, "Event", CONTROL_EVT_INT_EN, + CONTROL_EVT_LOG_EN, MMIO_STATUS_EVT_RUN_MASK, + MMIO_STATUS_EVT_OVERFLOW_MASK); } /* * This function restarts event logging in case the IOMMU experienced - * an GA log overflow. + * GA log overflow. */ void amd_iommu_restart_ga_log(struct amd_iommu *iommu) { - u32 status; - - status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); - if (status & MMIO_STATUS_GALOG_RUN_MASK) - return; - - pr_info_ratelimited("IOMMU GA Log restarting\n"); - - iommu_feature_disable(iommu, CONTROL_GALOG_EN); - iommu_feature_disable(iommu, CONTROL_GAINT_EN); - - writel(MMIO_STATUS_GALOG_OVERFLOW_MASK, - iommu->mmio_base + MMIO_STATUS_OFFSET); + amd_iommu_restart_log(iommu, "GA", CONTROL_GAINT_EN, + CONTROL_GALOG_EN, MMIO_STATUS_GALOG_RUN_MASK, + MMIO_STATUS_GALOG_OVERFLOW_MASK); +} - iommu_feature_enable(iommu, CONTROL_GAINT_EN); - iommu_feature_enable(iommu, CONTROL_GALOG_EN); +/* + * This function restarts ppr logging in case the IOMMU experienced + * PPR log overflow. + */ +void amd_iommu_restart_ppr_log(struct amd_iommu *iommu) +{ + amd_iommu_restart_log(iommu, "PPR", CONTROL_PPRINT_EN, + CONTROL_PPRLOG_EN, MMIO_STATUS_PPR_RUN_MASK, + MMIO_STATUS_PPR_OVERFLOW_MASK); } /* @@ -906,6 +934,8 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu) if (iommu->ppr_log == NULL) return; + iommu_feature_enable(iommu, CONTROL_PPR_EN); + entry = iommu_virt_to_phys(iommu->ppr_log) | PPR_LOG_SIZE_512; memcpy_toio(iommu->mmio_base + MMIO_PPR_LOG_OFFSET, @@ -916,7 +946,7 @@ static void iommu_enable_ppr_log(struct amd_iommu *iommu) writel(0x00, iommu->mmio_base + MMIO_PPR_TAIL_OFFSET); iommu_feature_enable(iommu, CONTROL_PPRLOG_EN); - iommu_feature_enable(iommu, CONTROL_PPR_EN); + iommu_feature_enable(iommu, CONTROL_PPRINT_EN); } static void __init free_ppr_log(struct amd_iommu *iommu) @@ -2311,6 +2341,7 @@ static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq struct irq_data *irqd = irq_domain_get_irq_data(domain, i); irqd->chip = &intcapxt_controller; + irqd->hwirq = info->hwirq; irqd->chip_data = info->data; __irq_set_handler(i, handle_edge_irq, 0, "edge"); } @@ -2337,22 +2368,14 @@ static void intcapxt_unmask_irq(struct irq_data *irqd) xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); xt.destid_24_31 = cfg->dest_apicid >> 24; - /** - * Current IOMMU implementation uses the same IRQ for all - * 3 IOMMU interrupts. - */ - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + writeq(xt.capxt, iommu->mmio_base + irqd->hwirq); } static void intcapxt_mask_irq(struct irq_data *irqd) { struct amd_iommu *iommu = irqd->chip_data; - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(0, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + writeq(0, iommu->mmio_base + irqd->hwirq); } @@ -2415,7 +2438,8 @@ static struct irq_domain *iommu_get_irqdomain(void) return iommu_irqdomain; } -static int iommu_setup_intcapxt(struct amd_iommu *iommu) +static int __iommu_setup_intcapxt(struct amd_iommu *iommu, const char *devname, + int hwirq, irq_handler_t thread_fn) { struct irq_domain *domain; struct irq_alloc_info info; @@ -2429,6 +2453,7 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) init_irq_alloc_info(&info, NULL); info.type = X86_IRQ_ALLOC_TYPE_AMDVI; info.data = iommu; + info.hwirq = hwirq; irq = irq_domain_alloc_irqs(domain, 1, node, &info); if (irq < 0) { @@ -2437,7 +2462,7 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) } ret = request_threaded_irq(irq, amd_iommu_int_handler, - amd_iommu_int_thread, 0, "AMD-Vi", iommu); + thread_fn, 0, devname, iommu); if (ret) { irq_domain_free_irqs(irq, 1); irq_domain_remove(domain); @@ -2447,6 +2472,37 @@ static int iommu_setup_intcapxt(struct amd_iommu *iommu) return 0; } +static int iommu_setup_intcapxt(struct amd_iommu *iommu) +{ + int ret; + + snprintf(iommu->evt_irq_name, sizeof(iommu->evt_irq_name), + "AMD-Vi%d-Evt", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->evt_irq_name, + MMIO_INTCAPXT_EVT_OFFSET, + amd_iommu_int_thread_evtlog); + if (ret) + return ret; + + snprintf(iommu->ppr_irq_name, sizeof(iommu->ppr_irq_name), + "AMD-Vi%d-PPR", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->ppr_irq_name, + MMIO_INTCAPXT_PPR_OFFSET, + amd_iommu_int_thread_pprlog); + if (ret) + return ret; + +#ifdef CONFIG_IRQ_REMAP + snprintf(iommu->ga_irq_name, sizeof(iommu->ga_irq_name), + "AMD-Vi%d-GA", iommu->index); + ret = __iommu_setup_intcapxt(iommu, iommu->ga_irq_name, + MMIO_INTCAPXT_GALOG_OFFSET, + amd_iommu_int_thread_galog); +#endif + + return ret; +} + static int iommu_init_irq(struct amd_iommu *iommu) { int ret; @@ -2472,8 +2528,6 @@ enable_faults: iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); - if (iommu->ppr_log != NULL) - iommu_feature_enable(iommu, CONTROL_PPRINT_EN); return 0; } @@ -2889,8 +2943,6 @@ static void enable_iommus_vapic(void) static void enable_iommus(void) { early_enable_iommus(); - enable_iommus_vapic(); - enable_iommus_v2(); } static void disable_iommus(void) @@ -3154,6 +3206,13 @@ static int amd_iommu_enable_interrupts(void) goto out; } + /* + * Interrupt handler is ready to process interrupts. Enable + * PPR and GA log interrupt for all IOMMUs. + */ + enable_iommus_vapic(); + enable_iommus_v2(); + out: return ret; } @@ -3233,8 +3292,6 @@ static int __init state_next(void) register_syscore_ops(&amd_iommu_syscore_ops); ret = amd_iommu_init_pci(); init_state = ret ? IOMMU_INIT_ERROR : IOMMU_PCI_INIT; - enable_iommus_vapic(); - enable_iommus_v2(); break; case IOMMU_PCI_INIT: ret = amd_iommu_enable_interrupts(); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7d7d3799199a..95bd7c25ba6f 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -841,50 +841,27 @@ static inline void amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { } #endif /* !CONFIG_IRQ_REMAP */ -#define AMD_IOMMU_INT_MASK \ - (MMIO_STATUS_EVT_OVERFLOW_INT_MASK | \ - MMIO_STATUS_EVT_INT_MASK | \ - MMIO_STATUS_PPR_INT_MASK | \ - MMIO_STATUS_GALOG_OVERFLOW_MASK | \ - MMIO_STATUS_GALOG_INT_MASK) - -irqreturn_t amd_iommu_int_thread(int irq, void *data) +static void amd_iommu_handle_irq(void *data, const char *evt_type, + u32 int_mask, u32 overflow_mask, + void (*int_handler)(struct amd_iommu *), + void (*overflow_handler)(struct amd_iommu *)) { struct amd_iommu *iommu = (struct amd_iommu *) data; u32 status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); + u32 mask = int_mask | overflow_mask; - while (status & AMD_IOMMU_INT_MASK) { + while (status & mask) { /* Enable interrupt sources again */ - writel(AMD_IOMMU_INT_MASK, - iommu->mmio_base + MMIO_STATUS_OFFSET); + writel(mask, iommu->mmio_base + MMIO_STATUS_OFFSET); - if (status & MMIO_STATUS_EVT_INT_MASK) { - pr_devel("Processing IOMMU Event Log\n"); - iommu_poll_events(iommu); + if (int_handler) { + pr_devel("Processing IOMMU (ivhd%d) %s Log\n", + iommu->index, evt_type); + int_handler(iommu); } - if (status & MMIO_STATUS_PPR_INT_MASK) { - pr_devel("Processing IOMMU PPR Log\n"); - iommu_poll_ppr_log(iommu); - } - -#ifdef CONFIG_IRQ_REMAP - if (status & (MMIO_STATUS_GALOG_INT_MASK | - MMIO_STATUS_GALOG_OVERFLOW_MASK)) { - pr_devel("Processing IOMMU GA Log\n"); - iommu_poll_ga_log(iommu); - } - - if (status & MMIO_STATUS_GALOG_OVERFLOW_MASK) { - pr_info_ratelimited("IOMMU GA Log overflow\n"); - amd_iommu_restart_ga_log(iommu); - } -#endif - - if (status & MMIO_STATUS_EVT_OVERFLOW_INT_MASK) { - pr_info_ratelimited("IOMMU event log overflow\n"); - amd_iommu_restart_event_logging(iommu); - } + if ((status & overflow_mask) && overflow_handler) + overflow_handler(iommu); /* * Hardware bug: ERBT1312 @@ -901,6 +878,43 @@ irqreturn_t amd_iommu_int_thread(int irq, void *data) */ status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); } +} + +irqreturn_t amd_iommu_int_thread_evtlog(int irq, void *data) +{ + amd_iommu_handle_irq(data, "Evt", MMIO_STATUS_EVT_INT_MASK, + MMIO_STATUS_EVT_OVERFLOW_MASK, + iommu_poll_events, amd_iommu_restart_event_logging); + + return IRQ_HANDLED; +} + +irqreturn_t amd_iommu_int_thread_pprlog(int irq, void *data) +{ + amd_iommu_handle_irq(data, "PPR", MMIO_STATUS_PPR_INT_MASK, + MMIO_STATUS_PPR_OVERFLOW_MASK, + iommu_poll_ppr_log, amd_iommu_restart_ppr_log); + + return IRQ_HANDLED; +} + +irqreturn_t amd_iommu_int_thread_galog(int irq, void *data) +{ +#ifdef CONFIG_IRQ_REMAP + amd_iommu_handle_irq(data, "GA", MMIO_STATUS_GALOG_INT_MASK, + MMIO_STATUS_GALOG_OVERFLOW_MASK, + iommu_poll_ga_log, amd_iommu_restart_ga_log); +#endif + + return IRQ_HANDLED; +} + +irqreturn_t amd_iommu_int_thread(int irq, void *data) +{ + amd_iommu_int_thread_evtlog(irq, data); + amd_iommu_int_thread_pprlog(irq, data); + amd_iommu_int_thread_galog(irq, data); + return IRQ_HANDLED; } diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 2596466cd5a6..57c2fb1146e2 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -262,8 +262,8 @@ static void put_pasid_state(struct pasid_state *pasid_state) static void put_pasid_state_wait(struct pasid_state *pasid_state) { - refcount_dec(&pasid_state->count); - wait_event(pasid_state->wq, !refcount_read(&pasid_state->count)); + if (!refcount_dec_and_test(&pasid_state->count)) + wait_event(pasid_state->wq, !refcount_read(&pasid_state->count)); free_pasid_state(pasid_state); } @@ -327,6 +327,9 @@ static void free_pasid_states(struct device_state *dev_state) put_pasid_state(pasid_state); + /* Clear the pasid state so that the pasid can be re-used */ + clear_pasid_state(dev_state, pasid_state->pasid); + /* * This will call the mn_release function and * unbind the PASID diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 8af64b57f048..2082081402d3 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -1276,7 +1276,7 @@ static __maybe_unused int apple_dart_resume(struct device *dev) return 0; } -DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(apple_dart_pm_ops, apple_dart_suspend, apple_dart_resume); static const struct of_device_id apple_dart_of_match[] = { { .compatible = "apple,t8103-dart", .data = &apple_dart_hw_t8103 }, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index dbc812a0e57e..4d83edc2be99 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -80,7 +80,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) * be some overlap between use of both ASIDs, until we invalidate the * TLB. */ - arm_smmu_write_ctx_desc(smmu_domain, 0, cd); + arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd); /* Invalidate TLB entries previously associated with that context */ arm_smmu_tlb_inv_asid(smmu, asid); diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9b0dc3505601..e82bf1c449a3 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1059,7 +1059,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, /* * This function handles the following cases: * - * (1) Install primary CD, for normal DMA traffic (SSID = 0). + * (1) Install primary CD, for normal DMA traffic (SSID = IOMMU_NO_PASID = 0). * (2) Install a secondary CD, for SID+SSID traffic. * (3) Update ASID of a CD. Atomically write the first 64 bits of the * CD, then invalidate the old entry and mappings. @@ -1607,7 +1607,7 @@ static void arm_smmu_handle_ppr(struct arm_smmu_device *smmu, u64 *evt) sid = FIELD_GET(PRIQ_0_SID, evt[0]); ssv = FIELD_GET(PRIQ_0_SSID_V, evt[0]); - ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : 0; + ssid = ssv ? FIELD_GET(PRIQ_0_SSID, evt[0]) : IOMMU_NO_PASID; last = FIELD_GET(PRIQ_0_PRG_LAST, evt[0]); grpid = FIELD_GET(PRIQ_1_PRG_IDX, evt[1]); @@ -1748,7 +1748,7 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, */ *cmd = (struct arm_smmu_cmdq_ent) { .opcode = CMDQ_OP_ATC_INV, - .substream_valid = !!ssid, + .substream_valid = (ssid != IOMMU_NO_PASID), .atc.ssid = ssid, }; @@ -1795,7 +1795,7 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds; - arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd); + arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); cmds.num = 0; for (i = 0; i < master->num_streams; i++) { @@ -1875,7 +1875,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } - arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); } static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, @@ -1968,7 +1968,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, * Unfortunately, this can't be leaf-only since we may have * zapped an entire table. */ - arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, iova, size); } void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, @@ -2055,24 +2055,6 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) return &smmu_domain->domain; } -static int arm_smmu_bitmap_alloc(unsigned long *map, int span) -{ - int idx, size = 1 << span; - - do { - idx = find_first_zero_bit(map, size); - if (idx == size) - return -ENOSPC; - } while (test_and_set_bit(idx, map)); - - return idx; -} - -static void arm_smmu_bitmap_free(unsigned long *map, int idx) -{ - clear_bit(idx, map); -} - static void arm_smmu_domain_free(struct iommu_domain *domain) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -2093,7 +2075,7 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; if (cfg->vmid) - arm_smmu_bitmap_free(smmu->vmid_map, cfg->vmid); + ida_free(&smmu->vmid_map, cfg->vmid); } kfree(smmu_domain); @@ -2142,7 +2124,7 @@ static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, * the master has been added to the devices list for this domain. * This isn't an issue because the STE hasn't been installed yet. */ - ret = arm_smmu_write_ctx_desc(smmu_domain, 0, &cfg->cd); + ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd); if (ret) goto out_free_cd_tables; @@ -2167,7 +2149,9 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; typeof(&pgtbl_cfg->arm_lpae_s2_cfg.vtcr) vtcr; - vmid = arm_smmu_bitmap_alloc(smmu->vmid_map, smmu->vmid_bits); + /* Reserve VMID 0 for stage-2 bypass STEs */ + vmid = ida_alloc_range(&smmu->vmid_map, 1, (1 << smmu->vmid_bits) - 1, + GFP_KERNEL); if (vmid < 0) return vmid; @@ -2328,7 +2312,7 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master) pdev = to_pci_dev(master->dev); atomic_inc(&smmu_domain->nr_ats_masters); - arm_smmu_atc_inv_domain(smmu_domain, 0, 0, 0); + arm_smmu_atc_inv_domain(smmu_domain, IOMMU_NO_PASID, 0, 0); if (pci_enable_ats(pdev, stu)) dev_err(master->dev, "Failed to enable ATS (STU %zu)\n", stu); } @@ -3098,8 +3082,8 @@ static int arm_smmu_init_strtab(struct arm_smmu_device *smmu) reg |= STRTAB_BASE_RA; smmu->strtab_cfg.strtab_base = reg; - /* Allocate the first VMID for stage-2 bypass STEs */ - set_bit(0, smmu->vmid_map); + ida_init(&smmu->vmid_map); + return 0; } @@ -3923,6 +3907,7 @@ static void arm_smmu_device_remove(struct platform_device *pdev) iommu_device_sysfs_remove(&smmu->iommu); arm_smmu_device_disable(smmu); iopf_queue_free(smmu->evtq.iopf); + ida_destroy(&smmu->vmid_map); } static void arm_smmu_device_shutdown(struct platform_device *pdev) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index dcab85698a4e..9915850dd4db 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -670,7 +670,7 @@ struct arm_smmu_device { #define ARM_SMMU_MAX_VMIDS (1 << 16) unsigned int vmid_bits; - DECLARE_BITMAP(vmid_map, ARM_SMMU_MAX_VMIDS); + struct ida vmid_map; unsigned int ssid_bits; unsigned int sid_bits; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c index b5b14108e086..bb89d49adf8d 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c @@ -3,7 +3,7 @@ * Copyright (c) 2022 Qualcomm Innovation Center, Inc. All rights reserved. */ -#include <linux/of_device.h> +#include <linux/device.h> #include <linux/firmware/qcom/qcom_scm.h> #include <linux/ratelimit.h> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index c71afda79d64..7f52ac67495f 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -251,10 +251,12 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sc8280xp-mdss" }, - { .compatible = "qcom,sm8150-mdss" }, - { .compatible = "qcom,sm8250-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, + { .compatible = "qcom,sm6350-mdss" }, + { .compatible = "qcom,sm6375-mdss" }, + { .compatible = "qcom,sm8150-mdss" }, + { .compatible = "qcom,sm8250-mdss" }, { } }; @@ -528,6 +530,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm6350-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index a86acd76c1df..d6d1a2a55cc0 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -29,7 +29,6 @@ #include <linux/module.h> #include <linux/of.h> #include <linux/of_address.h> -#include <linux/of_device.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/pm_runtime.h> diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index a503ed758ec3..775a3cbaff4e 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -22,8 +22,7 @@ #include <linux/init.h> #include <linux/mutex.h> #include <linux/of.h> -#include <linux/of_address.h> -#include <linux/of_device.h> +#include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/pm.h> #include <linux/pm_runtime.h> @@ -51,14 +50,15 @@ struct qcom_iommu_dev { struct clk_bulk_data clks[CLK_NUM]; void __iomem *local_base; u32 sec_id; - u8 num_ctxs; - struct qcom_iommu_ctx *ctxs[]; /* indexed by asid-1 */ + u8 max_asid; + struct qcom_iommu_ctx *ctxs[]; /* indexed by asid */ }; struct qcom_iommu_ctx { struct device *dev; void __iomem *base; bool secure_init; + bool secured_ctx; u8 asid; /* asid and ctx bank # are 1:1 */ struct iommu_domain *domain; }; @@ -94,7 +94,7 @@ static struct qcom_iommu_ctx * to_ctx(struct qcom_iommu_domain *d, unsigned asid struct qcom_iommu_dev *qcom_iommu = d->iommu; if (!qcom_iommu) return NULL; - return qcom_iommu->ctxs[asid - 1]; + return qcom_iommu->ctxs[asid]; } static inline void @@ -273,6 +273,19 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, ctx->secure_init = true; } + /* Secured QSMMU-500/QSMMU-v2 contexts cannot be programmed */ + if (ctx->secured_ctx) { + ctx->domain = domain; + continue; + } + + /* Disable context bank before programming */ + iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0); + + /* Clear context bank fault address fault status registers */ + iommu_writel(ctx, ARM_SMMU_CB_FAR, 0); + iommu_writel(ctx, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT); + /* TTBRs */ iommu_writeq(ctx, ARM_SMMU_CB_TTBR0, pgtbl_cfg.arm_lpae_s1_cfg.ttbr | @@ -527,11 +540,10 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args) qcom_iommu = platform_get_drvdata(iommu_pdev); /* make sure the asid specified in dt is valid, so we don't have - * to sanity check this elsewhere, since 'asid - 1' is used to - * index into qcom_iommu->ctxs: + * to sanity check this elsewhere: */ - if (WARN_ON(asid < 1) || - WARN_ON(asid > qcom_iommu->num_ctxs)) { + if (WARN_ON(asid > qcom_iommu->max_asid) || + WARN_ON(qcom_iommu->ctxs[asid] == NULL)) { put_device(&iommu_pdev->dev); return -EINVAL; } @@ -617,7 +629,8 @@ free_mem: static int get_asid(const struct device_node *np) { - u32 reg; + u32 reg, val; + int asid; /* read the "reg" property directly to get the relative address * of the context bank, and calculate the asid from that: @@ -625,7 +638,17 @@ static int get_asid(const struct device_node *np) if (of_property_read_u32_index(np, "reg", 0, ®)) return -ENODEV; - return reg / 0x1000; /* context banks are 0x1000 apart */ + /* + * Context banks are 0x1000 apart but, in some cases, the ASID + * number doesn't match to this logic and needs to be passed + * from the DT configuration explicitly. + */ + if (!of_property_read_u32(np, "qcom,ctx-asid", &val)) + asid = val; + else + asid = reg / 0x1000; + + return asid; } static int qcom_iommu_ctx_probe(struct platform_device *pdev) @@ -633,7 +656,6 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev) struct qcom_iommu_ctx *ctx; struct device *dev = &pdev->dev; struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev->parent); - struct resource *res; int ret, irq; ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL); @@ -643,19 +665,22 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev) ctx->dev = dev; platform_set_drvdata(pdev, ctx); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ctx->base = devm_ioremap_resource(dev, res); + ctx->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ctx->base)) return PTR_ERR(ctx->base); irq = platform_get_irq(pdev, 0); if (irq < 0) - return -ENODEV; + return irq; + + if (of_device_is_compatible(dev->of_node, "qcom,msm-iommu-v2-sec")) + ctx->secured_ctx = true; /* clear IRQs before registering fault handler, just in case the * boot-loader left us a surprise: */ - iommu_writel(ctx, ARM_SMMU_CB_FSR, iommu_readl(ctx, ARM_SMMU_CB_FSR)); + if (!ctx->secured_ctx) + iommu_writel(ctx, ARM_SMMU_CB_FSR, iommu_readl(ctx, ARM_SMMU_CB_FSR)); ret = devm_request_irq(dev, irq, qcom_iommu_fault, @@ -677,7 +702,7 @@ static int qcom_iommu_ctx_probe(struct platform_device *pdev) dev_dbg(dev, "found asid %u\n", ctx->asid); - qcom_iommu->ctxs[ctx->asid - 1] = ctx; + qcom_iommu->ctxs[ctx->asid] = ctx; return 0; } @@ -689,12 +714,14 @@ static void qcom_iommu_ctx_remove(struct platform_device *pdev) platform_set_drvdata(pdev, NULL); - qcom_iommu->ctxs[ctx->asid - 1] = NULL; + qcom_iommu->ctxs[ctx->asid] = NULL; } static const struct of_device_id ctx_of_match[] = { { .compatible = "qcom,msm-iommu-v1-ns" }, { .compatible = "qcom,msm-iommu-v1-sec" }, + { .compatible = "qcom,msm-iommu-v2-ns" }, + { .compatible = "qcom,msm-iommu-v2-sec" }, { /* sentinel */ } }; @@ -712,7 +739,8 @@ static bool qcom_iommu_has_secure_context(struct qcom_iommu_dev *qcom_iommu) struct device_node *child; for_each_child_of_node(qcom_iommu->dev->of_node, child) { - if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec")) { + if (of_device_is_compatible(child, "qcom,msm-iommu-v1-sec") || + of_device_is_compatible(child, "qcom,msm-iommu-v2-sec")) { of_node_put(child); return true; } @@ -736,11 +764,11 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) for_each_child_of_node(dev->of_node, child) max_asid = max(max_asid, get_asid(child)); - qcom_iommu = devm_kzalloc(dev, struct_size(qcom_iommu, ctxs, max_asid), + qcom_iommu = devm_kzalloc(dev, struct_size(qcom_iommu, ctxs, max_asid + 1), GFP_KERNEL); if (!qcom_iommu) return -ENOMEM; - qcom_iommu->num_ctxs = max_asid; + qcom_iommu->max_asid = max_asid; qcom_iommu->dev = dev; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); @@ -856,6 +884,7 @@ static const struct dev_pm_ops qcom_iommu_pm_ops = { static const struct of_device_id qcom_iommu_of_match[] = { { .compatible = "qcom,msm-iommu-v1" }, + { .compatible = "qcom,msm-iommu-v2" }, { /* sentinel */ } }; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index e57724163835..4b1a88f514c9 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -660,7 +660,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; - unsigned long shift, iova_len, iova = 0; + unsigned long shift, iova_len, iova; if (cookie->type == IOMMU_DMA_MSI_COOKIE) { cookie->msi_iova += size; @@ -675,15 +675,29 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, if (domain->geometry.force_aperture) dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end); - /* Try to get PCI devices a SAC address */ - if (dma_limit > DMA_BIT_MASK(32) && !iommu_dma_forcedac && dev_is_pci(dev)) + /* + * Try to use all the 32-bit PCI addresses first. The original SAC vs. + * DAC reasoning loses relevance with PCIe, but enough hardware and + * firmware bugs are still lurking out there that it's safest not to + * venture into the 64-bit space until necessary. + * + * If your device goes wrong after seeing the notice then likely either + * its driver is not setting DMA masks accurately, the hardware has + * some inherent bug in handling >32-bit addresses, or not all the + * expected address bits are wired up between the device and the IOMMU. + */ + if (dma_limit > DMA_BIT_MASK(32) && dev->iommu->pci_32bit_workaround) { iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift, false); + if (iova) + goto done; - if (!iova) - iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, - true); + dev->iommu->pci_32bit_workaround = false; + dev_notice(dev, "Using %d-bit DMA addresses\n", bits_per(dma_limit)); + } + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift, true); +done: return (dma_addr_t)iova << shift; } diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h index 942790009292..c829f1f82a99 100644 --- a/drivers/iommu/dma-iommu.h +++ b/drivers/iommu/dma-iommu.h @@ -17,6 +17,10 @@ int iommu_dma_init_fq(struct iommu_domain *domain); void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); extern bool iommu_dma_forcedac; +static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) +{ + dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac; +} #else /* CONFIG_IOMMU_DMA */ @@ -38,5 +42,9 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he { } +static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 9e6f78830ece..5db283c17e0d 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -114,13 +114,17 @@ static inline unsigned long lvl_to_nr_pages(unsigned int lvl) /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ -static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn) +static inline unsigned long mm_to_dma_pfn_start(unsigned long mm_pfn) { return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT); } +static inline unsigned long mm_to_dma_pfn_end(unsigned long mm_pfn) +{ + return ((mm_pfn + 1) << (PAGE_SHIFT - VTD_PAGE_SHIFT)) - 1; +} static inline unsigned long page_to_dma_pfn(struct page *pg) { - return mm_to_dma_pfn(page_to_pfn(pg)); + return mm_to_dma_pfn_start(page_to_pfn(pg)); } static inline unsigned long virt_to_dma_pfn(void *p) { @@ -878,7 +882,7 @@ void dmar_fault_dump_ptes(struct intel_iommu *iommu, u16 source_id, } /* For request-without-pasid, get the pasid from context entry */ if (intel_iommu_sm && pasid == IOMMU_PASID_INVALID) - pasid = PASID_RID2PASID; + pasid = IOMMU_NO_PASID; dir_index = pasid >> PASID_PDE_SHIFT; pde = &dir[dir_index]; @@ -1360,6 +1364,7 @@ domain_lookup_dev_info(struct dmar_domain *domain, static void domain_update_iotlb(struct dmar_domain *domain) { + struct dev_pasid_info *dev_pasid; struct device_domain_info *info; bool has_iotlb_device = false; unsigned long flags; @@ -1371,6 +1376,14 @@ static void domain_update_iotlb(struct dmar_domain *domain) break; } } + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + if (info->ats_enabled) { + has_iotlb_device = true; + break; + } + } domain->has_iotlb_device = has_iotlb_device; spin_unlock_irqrestore(&domain->lock, flags); } @@ -1450,12 +1463,13 @@ static void __iommu_flush_dev_iotlb(struct device_domain_info *info, qdep = info->ats_qdep; qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, addr, mask); - quirk_extra_dev_tlb_flush(info, addr, mask, PASID_RID2PASID, qdep); + quirk_extra_dev_tlb_flush(info, addr, mask, IOMMU_NO_PASID, qdep); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, u64 addr, unsigned mask) { + struct dev_pasid_info *dev_pasid; struct device_domain_info *info; unsigned long flags; @@ -1465,6 +1479,36 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&domain->lock, flags); list_for_each_entry(info, &domain->devices, link) __iommu_flush_dev_iotlb(info, addr, mask); + + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) { + info = dev_iommu_priv_get(dev_pasid->dev); + + if (!info->ats_enabled) + continue; + + qi_flush_dev_iotlb_pasid(info->iommu, + PCI_DEVID(info->bus, info->devfn), + info->pfsid, dev_pasid->pasid, + info->ats_qdep, addr, + mask); + } + spin_unlock_irqrestore(&domain->lock, flags); +} + +static void domain_flush_pasid_iotlb(struct intel_iommu *iommu, + struct dmar_domain *domain, u64 addr, + unsigned long npages, bool ih) +{ + u16 did = domain_id_iommu(domain, iommu); + struct dev_pasid_info *dev_pasid; + unsigned long flags; + + spin_lock_irqsave(&domain->lock, flags); + list_for_each_entry(dev_pasid, &domain->dev_pasids, link_domain) + qi_flush_piotlb(iommu, did, dev_pasid->pasid, addr, npages, ih); + + if (!list_empty(&domain->devices)) + qi_flush_piotlb(iommu, did, IOMMU_NO_PASID, addr, npages, ih); spin_unlock_irqrestore(&domain->lock, flags); } @@ -1485,7 +1529,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, ih = 1 << 6; if (domain->use_first_level) { - qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih); + domain_flush_pasid_iotlb(iommu, domain, addr, pages, ih); } else { unsigned long bitmask = aligned_pages - 1; @@ -1555,7 +1599,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain) u16 did = domain_id_iommu(dmar_domain, iommu); if (dmar_domain->use_first_level) - qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0); + domain_flush_pasid_iotlb(iommu, dmar_domain, 0, -1, 0); else iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); @@ -1727,6 +1771,7 @@ static struct dmar_domain *alloc_domain(unsigned int type) domain->use_first_level = true; domain->has_iotlb_device = false; INIT_LIST_HEAD(&domain->devices); + INIT_LIST_HEAD(&domain->dev_pasids); spin_lock_init(&domain->lock); xa_init(&domain->iommu_array); @@ -1941,7 +1986,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, context_pdts(pds); /* Setup the RID_PASID field: */ - context_set_sm_rid2pasid(context, PASID_RID2PASID); + context_set_sm_rid2pasid(context, IOMMU_NO_PASID); /* * Setup the Device-TLB enable bit and Page request @@ -2363,8 +2408,8 @@ static int __init si_domain_init(int hw) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn(start_pfn), - mm_to_dma_pfn(end_pfn)); + mm_to_dma_pfn_start(start_pfn), + mm_to_dma_pfn_end(end_pfn)); if (ret) return ret; } @@ -2385,8 +2430,8 @@ static int __init si_domain_init(int hw) continue; ret = iommu_domain_identity_map(si_domain, - mm_to_dma_pfn(start >> PAGE_SHIFT), - mm_to_dma_pfn(end >> PAGE_SHIFT)); + mm_to_dma_pfn_start(start >> PAGE_SHIFT), + mm_to_dma_pfn_end(end >> PAGE_SHIFT)); if (ret) return ret; } @@ -2421,13 +2466,13 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, /* Setup the PASID entry for requests without PASID: */ if (hw_pass_through && domain_type_is_si(domain)) ret = intel_pasid_setup_pass_through(iommu, domain, - dev, PASID_RID2PASID); + dev, IOMMU_NO_PASID); else if (domain->use_first_level) ret = domain_setup_first_level(iommu, domain, dev, - PASID_RID2PASID); + IOMMU_NO_PASID); else ret = intel_pasid_setup_second_level(iommu, domain, - dev, PASID_RID2PASID); + dev, IOMMU_NO_PASID); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); device_block_translation(dev); @@ -2447,30 +2492,6 @@ static int dmar_domain_attach_device(struct dmar_domain *domain, return 0; } -static bool device_has_rmrr(struct device *dev) -{ - struct dmar_rmrr_unit *rmrr; - struct device *tmp; - int i; - - rcu_read_lock(); - for_each_rmrr_units(rmrr) { - /* - * Return TRUE if this RMRR contains the device that - * is passed in. - */ - for_each_active_dev_scope(rmrr->devices, - rmrr->devices_cnt, i, tmp) - if (tmp == dev || - is_downstream_to_pci_bridge(dev, tmp)) { - rcu_read_unlock(); - return true; - } - } - rcu_read_unlock(); - return false; -} - /** * device_rmrr_is_relaxable - Test whether the RMRR of this device * is relaxable (ie. is allowed to be not enforced under some conditions) @@ -2501,34 +2522,6 @@ static bool device_rmrr_is_relaxable(struct device *dev) } /* - * There are a couple cases where we need to restrict the functionality of - * devices associated with RMRRs. The first is when evaluating a device for - * identity mapping because problems exist when devices are moved in and out - * of domains and their respective RMRR information is lost. This means that - * a device with associated RMRRs will never be in a "passthrough" domain. - * The second is use of the device through the IOMMU API. This interface - * expects to have full control of the IOVA space for the device. We cannot - * satisfy both the requirement that RMRR access is maintained and have an - * unencumbered IOVA space. We also have no ability to quiesce the device's - * use of the RMRR space or even inform the IOMMU API user of the restriction. - * We therefore prevent devices associated with an RMRR from participating in - * the IOMMU API, which eliminates them from device assignment. - * - * In both cases, devices which have relaxable RMRRs are not concerned by this - * restriction. See device_rmrr_is_relaxable comment. - */ -static bool device_is_rmrr_locked(struct device *dev) -{ - if (!device_has_rmrr(dev)) - return false; - - if (device_rmrr_is_relaxable(dev)) - return false; - - return true; -} - -/* * Return the required default domain type for a specific device. * * @dev: the device in query @@ -3561,8 +3554,8 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mhp = v; - unsigned long start_vpfn = mm_to_dma_pfn(mhp->start_pfn); - unsigned long last_vpfn = mm_to_dma_pfn(mhp->start_pfn + + unsigned long start_vpfn = mm_to_dma_pfn_start(mhp->start_pfn); + unsigned long last_vpfn = mm_to_dma_pfn_end(mhp->start_pfn + mhp->nr_pages - 1); switch (val) { @@ -3757,7 +3750,6 @@ static int __init probe_acpi_namespace_devices(void) for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { struct acpi_device_physical_node *pn; - struct iommu_group *group; struct acpi_device *adev; if (dev->bus != &acpi_bus_type) @@ -3767,12 +3759,6 @@ static int __init probe_acpi_namespace_devices(void) mutex_lock(&adev->physical_node_lock); list_for_each_entry(pn, &adev->physical_node_list, node) { - group = iommu_group_get(pn->dev); - if (group) { - iommu_group_put(group); - continue; - } - ret = iommu_probe_device(pn->dev); if (ret) break; @@ -3969,7 +3955,7 @@ static void dmar_remove_one_dev_info(struct device *dev) if (!dev_is_real_dma_subdevice(info->dev)) { if (dev_is_pci(info->dev) && sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, info->dev, - PASID_RID2PASID, false); + IOMMU_NO_PASID, false); iommu_disable_pci_caps(info); domain_context_clear(info); @@ -3998,7 +3984,7 @@ static void device_block_translation(struct device *dev) if (!dev_is_real_dma_subdevice(dev)) { if (sm_supported(iommu)) intel_pasid_tear_down_entry(iommu, dev, - PASID_RID2PASID, false); + IOMMU_NO_PASID, false); else domain_context_clear(info); } @@ -4140,12 +4126,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct device_domain_info *info = dev_iommu_priv_get(dev); int ret; - if (domain->type == IOMMU_DOMAIN_UNMANAGED && - device_is_rmrr_locked(dev)) { - dev_warn(dev, "Device is ineligible for IOMMU domain attach due to platform RMRR requirement. Contact your platform vendor.\n"); - return -EPERM; - } - if (info->domain) device_block_translation(dev); @@ -4272,7 +4252,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, unsigned long i; nrpages = aligned_nrpages(gather->start, size); - start_pfn = mm_to_dma_pfn(iova_pfn); + start_pfn = mm_to_dma_pfn_start(iova_pfn); xa_for_each(&dmar_domain->iommu_array, i, info) iommu_flush_iotlb_psi(info->iommu, dmar_domain, @@ -4332,7 +4312,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain) list_for_each_entry(info, &domain->devices, link) intel_pasid_setup_page_snoop_control(info->iommu, info->dev, - PASID_RID2PASID); + IOMMU_NO_PASID); } static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) @@ -4714,23 +4694,96 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) { struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct dev_pasid_info *curr, *dev_pasid = NULL; + struct dmar_domain *dmar_domain; struct iommu_domain *domain; + unsigned long flags; - /* Domain type specific cleanup: */ domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); - if (domain) { - switch (domain->type) { - case IOMMU_DOMAIN_SVA: - intel_svm_remove_dev_pasid(dev, pasid); - break; - default: - /* should never reach here */ - WARN_ON(1); + if (WARN_ON_ONCE(!domain)) + goto out_tear_down; + + /* + * The SVA implementation needs to handle its own stuffs like the mm + * notification. Before consolidating that code into iommu core, let + * the intel sva code handle it. + */ + if (domain->type == IOMMU_DOMAIN_SVA) { + intel_svm_remove_dev_pasid(dev, pasid); + goto out_tear_down; + } + + dmar_domain = to_dmar_domain(domain); + spin_lock_irqsave(&dmar_domain->lock, flags); + list_for_each_entry(curr, &dmar_domain->dev_pasids, link_domain) { + if (curr->dev == dev && curr->pasid == pasid) { + list_del(&curr->link_domain); + dev_pasid = curr; break; } } + WARN_ON_ONCE(!dev_pasid); + spin_unlock_irqrestore(&dmar_domain->lock, flags); + domain_detach_iommu(dmar_domain, iommu); + kfree(dev_pasid); +out_tear_down: intel_pasid_tear_down_entry(iommu, dev, pasid, false); + intel_drain_pasid_prq(dev, pasid); +} + +static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct dmar_domain *dmar_domain = to_dmar_domain(domain); + struct intel_iommu *iommu = info->iommu; + struct dev_pasid_info *dev_pasid; + unsigned long flags; + int ret; + + if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev)) + return -EOPNOTSUPP; + + if (context_copied(iommu, info->bus, info->devfn)) + return -EBUSY; + + ret = prepare_domain_attach_device(domain, dev); + if (ret) + return ret; + + dev_pasid = kzalloc(sizeof(*dev_pasid), GFP_KERNEL); + if (!dev_pasid) + return -ENOMEM; + + ret = domain_attach_iommu(dmar_domain, iommu); + if (ret) + goto out_free; + + if (domain_type_is_si(dmar_domain)) + ret = intel_pasid_setup_pass_through(iommu, dmar_domain, + dev, pasid); + else if (dmar_domain->use_first_level) + ret = domain_setup_first_level(iommu, dmar_domain, + dev, pasid); + else + ret = intel_pasid_setup_second_level(iommu, dmar_domain, + dev, pasid); + if (ret) + goto out_detach_iommu; + + dev_pasid->dev = dev; + dev_pasid->pasid = pasid; + spin_lock_irqsave(&dmar_domain->lock, flags); + list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); + spin_unlock_irqrestore(&dmar_domain->lock, flags); + + return 0; +out_detach_iommu: + domain_detach_iommu(dmar_domain, iommu); +out_free: + kfree(dev_pasid); + return ret; } static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type) @@ -4770,6 +4823,7 @@ const struct iommu_ops intel_iommu_ops = { #endif .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = intel_iommu_attach_device, + .set_dev_pasid = intel_iommu_set_dev_pasid, .map_pages = intel_iommu_map_pages, .unmap_pages = intel_iommu_unmap_pages, .iotlb_sync_map = intel_iommu_iotlb_sync_map, @@ -5006,7 +5060,7 @@ void quirk_extra_dev_tlb_flush(struct device_domain_info *info, return; sid = PCI_DEVID(info->bus, info->devfn); - if (pasid == PASID_RID2PASID) { + if (pasid == IOMMU_NO_PASID) { qi_flush_dev_iotlb(info->iommu, sid, info->pfsid, qdep, address, mask); } else { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 1c5e1d88862b..c18fb699c87a 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -595,6 +595,7 @@ struct dmar_domain { spinlock_t lock; /* Protect device tracking lists */ struct list_head devices; /* all devices' list */ + struct list_head dev_pasids; /* all attached pasids */ struct dma_pte *pgd; /* virtual address */ int gaw; /* max guest address width */ @@ -717,6 +718,12 @@ struct device_domain_info { struct pasid_table *pasid_table; /* pasid table */ }; +struct dev_pasid_info { + struct list_head link_domain; /* link to domain siblings */ + struct device *dev; + ioasid_t pasid; +}; + static inline void __iommu_flush_cache( struct intel_iommu *iommu, void *addr, int size) { @@ -844,6 +851,7 @@ int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); struct iommu_domain *intel_svm_domain_alloc(void); void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); +void intel_drain_pasid_prq(struct device *dev, u32 pasid); struct intel_svm_dev { struct list_head list; @@ -862,6 +870,7 @@ struct intel_svm { }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} +static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {} static inline struct iommu_domain *intel_svm_domain_alloc(void) { return NULL; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index c5d479770e12..8f92b92f3d2a 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -129,7 +129,7 @@ int intel_pasid_alloc_table(struct device *dev) info->pasid_table = pasid_table; if (!ecap_coherent(info->iommu->ecap)) - clflush_cache_range(pasid_table->table, size); + clflush_cache_range(pasid_table->table, (1 << order) * PAGE_SIZE); return 0; } @@ -438,7 +438,7 @@ devtlb_invalidation_with_pasid(struct intel_iommu *iommu, * SVA usage, device could do DMA with multiple PASIDs. It is more * efficient to flush devTLB specific to the PASID. */ - if (pasid == PASID_RID2PASID) + if (pasid == IOMMU_NO_PASID) qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); else qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index d6b7d21244b1..4e9e68c3c388 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -10,8 +10,6 @@ #ifndef __INTEL_PASID_H #define __INTEL_PASID_H -#define PASID_RID2PASID 0x0 -#define PASID_MIN 0x1 #define PASID_MAX 0x100000 #define PASID_PTE_MASK 0x3F #define PASID_PTE_PRESENT 1 diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 8f6d68006ab6..50a481c895b8 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -26,8 +26,6 @@ #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); -static void intel_svm_drain_prq(struct device *dev, u32 pasid); -#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) static DEFINE_XARRAY_ALLOC(pasid_private_array); static int pasid_private_add(ioasid_t pasid, void *priv) @@ -259,8 +257,6 @@ static const struct mmu_notifier_ops intel_mmuops = { .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, }; -static DEFINE_MUTEX(pasid_mutex); - static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, struct intel_svm **rsvm, struct intel_svm_dev **rsdev) @@ -268,10 +264,6 @@ static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, struct intel_svm_dev *sdev = NULL; struct intel_svm *svm; - /* The caller should hold the pasid_mutex lock */ - if (WARN_ON(!mutex_is_locked(&pasid_mutex))) - return -EINVAL; - if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) return -EINVAL; @@ -371,37 +363,23 @@ free_svm: return ret; } -/* Caller must hold pasid_mutex */ -static int intel_svm_unbind_mm(struct device *dev, u32 pasid) +void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid) { struct intel_svm_dev *sdev; struct intel_iommu *iommu; struct intel_svm *svm; struct mm_struct *mm; - int ret = -EINVAL; iommu = device_to_iommu(dev, NULL, NULL); if (!iommu) - goto out; + return; - ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); - if (ret) - goto out; + if (pasid_to_svm_sdev(dev, pasid, &svm, &sdev)) + return; mm = svm->mm; if (sdev) { list_del_rcu(&sdev->list); - /* - * Flush the PASID cache and IOTLB for this device. - * Note that we do depend on the hardware *not* using - * the PASID any more. Just as we depend on other - * devices never using PASIDs that they have no right - * to use. We have a *shared* PASID table, because it's - * large and has to be physically contiguous. So it's - * hard to be as defensive as we might like. - */ - intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false); - intel_svm_drain_prq(dev, svm->pasid); kfree_rcu(sdev, rcu); if (list_empty(&svm->devs)) { @@ -418,8 +396,6 @@ static int intel_svm_unbind_mm(struct device *dev, u32 pasid) kfree(svm); } } -out: - return ret; } /* Page request queue descriptor */ @@ -460,7 +436,7 @@ static bool is_canonical_address(u64 addr) } /** - * intel_svm_drain_prq - Drain page requests and responses for a pasid + * intel_drain_pasid_prq - Drain page requests and responses for a pasid * @dev: target device * @pasid: pasid for draining * @@ -474,7 +450,7 @@ static bool is_canonical_address(u64 addr) * described in VT-d spec CH7.10 to drain all page requests and page * responses pending in the hardware. */ -static void intel_svm_drain_prq(struct device *dev, u32 pasid) +void intel_drain_pasid_prq(struct device *dev, u32 pasid) { struct device_domain_info *info; struct dmar_domain *domain; @@ -520,19 +496,7 @@ prq_retry: goto prq_retry; } - /* - * A work in IO page fault workqueue may try to lock pasid_mutex now. - * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for - * all works in the workqueue to finish may cause deadlock. - * - * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). - * Unlock it to allow the works to be handled while waiting for - * them to finish. - */ - lockdep_assert_held(&pasid_mutex); - mutex_unlock(&pasid_mutex); iopf_queue_flush_dev(dev); - mutex_lock(&pasid_mutex); /* * Perform steps described in VT-d spec CH7.10 to drain page @@ -827,26 +791,14 @@ out: return ret; } -void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) -{ - mutex_lock(&pasid_mutex); - intel_svm_unbind_mm(dev, pasid); - mutex_unlock(&pasid_mutex); -} - static int intel_svm_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t pasid) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_iommu *iommu = info->iommu; struct mm_struct *mm = domain->mm; - int ret; - mutex_lock(&pasid_mutex); - ret = intel_svm_bind_mm(iommu, dev, mm); - mutex_unlock(&pasid_mutex); - - return ret; + return intel_svm_bind_mm(iommu, dev, mm); } static void intel_svm_domain_free(struct iommu_domain *domain) diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c index 05c0fb2acbc4..b78671a8a914 100644 --- a/drivers/iommu/iommu-sva.c +++ b/drivers/iommu/iommu-sva.c @@ -10,34 +10,30 @@ #include "iommu-sva.h" static DEFINE_MUTEX(iommu_sva_lock); -static DEFINE_IDA(iommu_global_pasid_ida); /* Allocate a PASID for the mm within range (inclusive) */ -static int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) +static int iommu_sva_alloc_pasid(struct mm_struct *mm, struct device *dev) { + ioasid_t pasid; int ret = 0; - if (min == IOMMU_PASID_INVALID || - max == IOMMU_PASID_INVALID || - min == 0 || max < min) - return -EINVAL; - if (!arch_pgtable_dma_compat(mm)) return -EBUSY; mutex_lock(&iommu_sva_lock); /* Is a PASID already associated with this mm? */ if (mm_valid_pasid(mm)) { - if (mm->pasid < min || mm->pasid > max) + if (mm->pasid >= dev->iommu->max_pasids) ret = -EOVERFLOW; goto out; } - ret = ida_alloc_range(&iommu_global_pasid_ida, min, max, GFP_KERNEL); - if (ret < 0) + pasid = iommu_alloc_global_pasid(dev); + if (pasid == IOMMU_PASID_INVALID) { + ret = -ENOSPC; goto out; - - mm->pasid = ret; + } + mm->pasid = pasid; ret = 0; out: mutex_unlock(&iommu_sva_lock); @@ -64,15 +60,10 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm { struct iommu_domain *domain; struct iommu_sva *handle; - ioasid_t max_pasids; int ret; - max_pasids = dev->iommu->max_pasids; - if (!max_pasids) - return ERR_PTR(-EOPNOTSUPP); - /* Allocate mm->pasid if necessary. */ - ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1); + ret = iommu_sva_alloc_pasid(mm, dev); if (ret) return ERR_PTR(ret); @@ -217,5 +208,5 @@ void mm_pasid_drop(struct mm_struct *mm) if (likely(!mm_valid_pasid(mm))) return; - ida_free(&iommu_global_pasid_ida, mm->pasid); + iommu_free_global_pasid(mm->pasid); } diff --git a/drivers/iommu/iommu-sysfs.c b/drivers/iommu/iommu-sysfs.c index 99869217fbec..cbe378c34ba3 100644 --- a/drivers/iommu/iommu-sysfs.c +++ b/drivers/iommu/iommu-sysfs.c @@ -107,9 +107,6 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) { int ret; - if (!iommu || IS_ERR(iommu)) - return -ENODEV; - ret = sysfs_add_link_to_group(&iommu->dev->kobj, "devices", &link->kobj, dev_name(link)); if (ret) @@ -122,14 +119,9 @@ int iommu_device_link(struct iommu_device *iommu, struct device *link) return ret; } -EXPORT_SYMBOL_GPL(iommu_device_link); void iommu_device_unlink(struct iommu_device *iommu, struct device *link) { - if (!iommu || IS_ERR(iommu)) - return; - sysfs_remove_link(&link->kobj, "iommu"); sysfs_remove_link_from_group(&iommu->dev->kobj, "devices", dev_name(link)); } -EXPORT_SYMBOL_GPL(iommu_device_unlink); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index c9c370de9d3d..3bfc56df4f78 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -41,6 +41,7 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); +static DEFINE_IDA(iommu_global_pasid_ida); static unsigned int iommu_def_domain_type __read_mostly; static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); @@ -129,9 +130,12 @@ static int iommu_setup_default_domain(struct iommu_group *group, int target_type); static int iommu_create_device_direct_mappings(struct iommu_domain *domain, struct device *dev); -static struct iommu_group *iommu_group_get_for_dev(struct device *dev); static ssize_t iommu_group_store_type(struct iommu_group *group, const char *buf, size_t count); +static struct group_device *iommu_group_alloc_device(struct iommu_group *group, + struct device *dev); +static void __iommu_group_free_device(struct iommu_group *group, + struct group_device *grp_dev); #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ @@ -377,28 +381,18 @@ static u32 dev_iommu_get_max_pasids(struct device *dev) return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); } -static int __iommu_probe_device(struct device *dev, struct list_head *group_list) +/* + * Init the dev->iommu and dev->iommu_group in the struct device and get the + * driver probed + */ +static int iommu_init_device(struct device *dev, const struct iommu_ops *ops) { - const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_device *iommu_dev; struct iommu_group *group; - static DEFINE_MUTEX(iommu_probe_device_lock); int ret; - if (!ops) - return -ENODEV; - /* - * Serialise to avoid races between IOMMU drivers registering in - * parallel and/or the "replay" calls from ACPI/OF code via client - * driver probe. Once the latter have been cleaned up we should - * probably be able to use device_lock() here to minimise the scope, - * but for now enforcing a simple global ordering is fine. - */ - mutex_lock(&iommu_probe_device_lock); - if (!dev_iommu_get(dev)) { - ret = -ENOMEM; - goto err_unlock; - } + if (!dev_iommu_get(dev)) + return -ENOMEM; if (!try_module_get(ops->owner)) { ret = -EINVAL; @@ -408,124 +402,184 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list iommu_dev = ops->probe_device(dev); if (IS_ERR(iommu_dev)) { ret = PTR_ERR(iommu_dev); - goto out_module_put; + goto err_module_put; } - dev->iommu->iommu_dev = iommu_dev; - dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); - if (ops->is_attach_deferred) - dev->iommu->attach_deferred = ops->is_attach_deferred(dev); + ret = iommu_device_link(iommu_dev, dev); + if (ret) + goto err_release; - group = iommu_group_get_for_dev(dev); + group = ops->device_group(dev); + if (WARN_ON_ONCE(group == NULL)) + group = ERR_PTR(-EINVAL); if (IS_ERR(group)) { ret = PTR_ERR(group); - goto out_release; + goto err_unlink; } + dev->iommu_group = group; - mutex_lock(&group->mutex); - if (group_list && !group->default_domain && list_empty(&group->entry)) - list_add_tail(&group->entry, group_list); - mutex_unlock(&group->mutex); - iommu_group_put(group); - - mutex_unlock(&iommu_probe_device_lock); - iommu_device_link(iommu_dev, dev); - + dev->iommu->iommu_dev = iommu_dev; + dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); + if (ops->is_attach_deferred) + dev->iommu->attach_deferred = ops->is_attach_deferred(dev); return 0; -out_release: +err_unlink: + iommu_device_unlink(iommu_dev, dev); +err_release: if (ops->release_device) ops->release_device(dev); - -out_module_put: +err_module_put: module_put(ops->owner); - err_free: dev_iommu_free(dev); + return ret; +} -err_unlock: - mutex_unlock(&iommu_probe_device_lock); +static void iommu_deinit_device(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; + const struct iommu_ops *ops = dev_iommu_ops(dev); - return ret; + lockdep_assert_held(&group->mutex); + + iommu_device_unlink(dev->iommu->iommu_dev, dev); + + /* + * release_device() must stop using any attached domain on the device. + * If there are still other devices in the group they are not effected + * by this callback. + * + * The IOMMU driver must set the device to either an identity or + * blocking translation and stop using any domain pointer, as it is + * going to be freed. + */ + if (ops->release_device) + ops->release_device(dev); + + /* + * If this is the last driver to use the group then we must free the + * domains before we do the module_put(). + */ + if (list_empty(&group->devices)) { + if (group->default_domain) { + iommu_domain_free(group->default_domain); + group->default_domain = NULL; + } + if (group->blocking_domain) { + iommu_domain_free(group->blocking_domain); + group->blocking_domain = NULL; + } + group->domain = NULL; + } + + /* Caller must put iommu_group */ + dev->iommu_group = NULL; + module_put(ops->owner); + dev_iommu_free(dev); } -int iommu_probe_device(struct device *dev) +static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { - const struct iommu_ops *ops; + const struct iommu_ops *ops = dev->bus->iommu_ops; struct iommu_group *group; + static DEFINE_MUTEX(iommu_probe_device_lock); + struct group_device *gdev; int ret; - ret = __iommu_probe_device(dev, NULL); - if (ret) - goto err_out; + if (!ops) + return -ENODEV; + /* + * Serialise to avoid races between IOMMU drivers registering in + * parallel and/or the "replay" calls from ACPI/OF code via client + * driver probe. Once the latter have been cleaned up we should + * probably be able to use device_lock() here to minimise the scope, + * but for now enforcing a simple global ordering is fine. + */ + mutex_lock(&iommu_probe_device_lock); - group = iommu_group_get(dev); - if (!group) { - ret = -ENODEV; - goto err_release; + /* Device is probed already if in a group */ + if (dev->iommu_group) { + ret = 0; + goto out_unlock; } + ret = iommu_init_device(dev, ops); + if (ret) + goto out_unlock; + + group = dev->iommu_group; + gdev = iommu_group_alloc_device(group, dev); mutex_lock(&group->mutex); + if (IS_ERR(gdev)) { + ret = PTR_ERR(gdev); + goto err_put_group; + } + /* + * The gdev must be in the list before calling + * iommu_setup_default_domain() + */ + list_add_tail(&gdev->list, &group->devices); + WARN_ON(group->default_domain && !group->domain); if (group->default_domain) iommu_create_device_direct_mappings(group->default_domain, dev); - if (group->domain) { ret = __iommu_device_set_domain(group, dev, group->domain, 0); if (ret) - goto err_unlock; - } else if (!group->default_domain) { + goto err_remove_gdev; + } else if (!group->default_domain && !group_list) { ret = iommu_setup_default_domain(group, 0); if (ret) - goto err_unlock; + goto err_remove_gdev; + } else if (!group->default_domain) { + /* + * With a group_list argument we defer the default_domain setup + * to the caller by providing a de-duplicated list of groups + * that need further setup. + */ + if (list_empty(&group->entry)) + list_add_tail(&group->entry, group_list); } - mutex_unlock(&group->mutex); - iommu_group_put(group); + mutex_unlock(&iommu_probe_device_lock); - ops = dev_iommu_ops(dev); - if (ops->probe_finalize) - ops->probe_finalize(dev); + if (dev_is_pci(dev)) + iommu_dma_set_pci_32bit_workaround(dev); return 0; -err_unlock: +err_remove_gdev: + list_del(&gdev->list); + __iommu_group_free_device(group, gdev); +err_put_group: + iommu_deinit_device(dev); mutex_unlock(&group->mutex); iommu_group_put(group); -err_release: - iommu_release_device(dev); +out_unlock: + mutex_unlock(&iommu_probe_device_lock); -err_out: return ret; - } -/* - * Remove a device from a group's device list and return the group device - * if successful. - */ -static struct group_device * -__iommu_group_remove_device(struct iommu_group *group, struct device *dev) +int iommu_probe_device(struct device *dev) { - struct group_device *device; + const struct iommu_ops *ops; + int ret; - lockdep_assert_held(&group->mutex); - for_each_group_device(group, device) { - if (device->dev == dev) { - list_del(&device->list); - return device; - } - } + ret = __iommu_probe_device(dev, NULL); + if (ret) + return ret; - return NULL; + ops = dev_iommu_ops(dev); + if (ops->probe_finalize) + ops->probe_finalize(dev); + + return 0; } -/* - * Release a device from its group and decrements the iommu group reference - * count. - */ -static void __iommu_group_release_device(struct iommu_group *group, - struct group_device *grp_dev) +static void __iommu_group_free_device(struct iommu_group *group, + struct group_device *grp_dev) { struct device *dev = grp_dev->dev; @@ -534,54 +588,57 @@ static void __iommu_group_release_device(struct iommu_group *group, trace_remove_device_from_group(group->id, dev); + /* + * If the group has become empty then ownership must have been + * released, and the current domain must be set back to NULL or + * the default domain. + */ + if (list_empty(&group->devices)) + WARN_ON(group->owner_cnt || + group->domain != group->default_domain); + kfree(grp_dev->name); kfree(grp_dev); - dev->iommu_group = NULL; - kobject_put(group->devices_kobj); } -static void iommu_release_device(struct device *dev) +/* Remove the iommu_group from the struct device. */ +static void __iommu_group_remove_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; struct group_device *device; - const struct iommu_ops *ops; - - if (!dev->iommu || !group) - return; - - iommu_device_unlink(dev->iommu->iommu_dev, dev); mutex_lock(&group->mutex); - device = __iommu_group_remove_device(group, dev); + for_each_group_device(group, device) { + if (device->dev != dev) + continue; - /* - * If the group has become empty then ownership must have been released, - * and the current domain must be set back to NULL or the default - * domain. - */ - if (list_empty(&group->devices)) - WARN_ON(group->owner_cnt || - group->domain != group->default_domain); + list_del(&device->list); + __iommu_group_free_device(group, device); + if (dev->iommu && dev->iommu->iommu_dev) + iommu_deinit_device(dev); + else + dev->iommu_group = NULL; + break; + } + mutex_unlock(&group->mutex); /* - * release_device() must stop using any attached domain on the device. - * If there are still other devices in the group they are not effected - * by this callback. - * - * The IOMMU driver must set the device to either an identity or - * blocking translation and stop using any domain pointer, as it is - * going to be freed. + * Pairs with the get in iommu_init_device() or + * iommu_group_add_device() */ - ops = dev_iommu_ops(dev); - if (ops->release_device) - ops->release_device(dev); - mutex_unlock(&group->mutex); + iommu_group_put(group); +} - if (device) - __iommu_group_release_device(group, device); +static void iommu_release_device(struct device *dev) +{ + struct iommu_group *group = dev->iommu_group; - module_put(ops->owner); - dev_iommu_free(dev); + if (group) + __iommu_group_remove_device(dev); + + /* Free any fwspec if no iommu_driver was ever attached */ + if (dev->iommu) + dev_iommu_free(dev); } static int __init iommu_set_def_domain_type(char *str) @@ -842,10 +899,9 @@ static void iommu_group_release(struct kobject *kobj) ida_free(&iommu_group_ida, group->id); - if (group->default_domain) - iommu_domain_free(group->default_domain); - if (group->blocking_domain) - iommu_domain_free(group->blocking_domain); + /* Domains are free'd by iommu_deinit_device() */ + WARN_ON(group->default_domain); + WARN_ON(group->blocking_domain); kfree(group->name); kfree(group); @@ -1003,14 +1059,12 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, unsigned long pg_size; int ret = 0; - if (!iommu_is_dma_domain(domain)) - return 0; - - BUG_ON(!domain->pgsize_bitmap); - - pg_size = 1UL << __ffs(domain->pgsize_bitmap); + pg_size = domain->pgsize_bitmap ? 1UL << __ffs(domain->pgsize_bitmap) : 0; INIT_LIST_HEAD(&mappings); + if (WARN_ON_ONCE(iommu_is_dma_domain(domain) && !pg_size)) + return -EINVAL; + iommu_get_resv_regions(dev, &mappings); /* We need to consider overlapping regions for different devices */ @@ -1018,13 +1072,17 @@ static int iommu_create_device_direct_mappings(struct iommu_domain *domain, dma_addr_t start, end, addr; size_t map_size = 0; - start = ALIGN(entry->start, pg_size); - end = ALIGN(entry->start + entry->length, pg_size); + if (entry->type == IOMMU_RESV_DIRECT) + dev->iommu->require_direct = 1; - if (entry->type != IOMMU_RESV_DIRECT && - entry->type != IOMMU_RESV_DIRECT_RELAXABLE) + if ((entry->type != IOMMU_RESV_DIRECT && + entry->type != IOMMU_RESV_DIRECT_RELAXABLE) || + !iommu_is_dma_domain(domain)) continue; + start = ALIGN(entry->start, pg_size); + end = ALIGN(entry->start + entry->length, pg_size); + for (addr = start; addr <= end; addr += pg_size) { phys_addr_t phys_addr; @@ -1058,22 +1116,16 @@ out: return ret; } -/** - * iommu_group_add_device - add a device to an iommu group - * @group: the group into which to add the device (reference should be held) - * @dev: the device - * - * This function is called by an iommu driver to add a device into a - * group. Adding a device increments the group reference count. - */ -int iommu_group_add_device(struct iommu_group *group, struct device *dev) +/* This is undone by __iommu_group_free_device() */ +static struct group_device *iommu_group_alloc_device(struct iommu_group *group, + struct device *dev) { int ret, i = 0; struct group_device *device; device = kzalloc(sizeof(*device), GFP_KERNEL); if (!device) - return -ENOMEM; + return ERR_PTR(-ENOMEM); device->dev = dev; @@ -1104,18 +1156,11 @@ rename: goto err_free_name; } - kobject_get(group->devices_kobj); - - dev->iommu_group = group; - - mutex_lock(&group->mutex); - list_add_tail(&device->list, &group->devices); - mutex_unlock(&group->mutex); trace_add_device_to_group(group->id, dev); dev_info(dev, "Adding to iommu group %d\n", group->id); - return 0; + return device; err_free_name: kfree(device->name); @@ -1124,7 +1169,32 @@ err_remove_link: err_free_device: kfree(device); dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); - return ret; + return ERR_PTR(ret); +} + +/** + * iommu_group_add_device - add a device to an iommu group + * @group: the group into which to add the device (reference should be held) + * @dev: the device + * + * This function is called by an iommu driver to add a device into a + * group. Adding a device increments the group reference count. + */ +int iommu_group_add_device(struct iommu_group *group, struct device *dev) +{ + struct group_device *gdev; + + gdev = iommu_group_alloc_device(group, dev); + if (IS_ERR(gdev)) + return PTR_ERR(gdev); + + iommu_group_ref_get(group); + dev->iommu_group = group; + + mutex_lock(&group->mutex); + list_add_tail(&gdev->list, &group->devices); + mutex_unlock(&group->mutex); + return 0; } EXPORT_SYMBOL_GPL(iommu_group_add_device); @@ -1138,19 +1208,13 @@ EXPORT_SYMBOL_GPL(iommu_group_add_device); void iommu_group_remove_device(struct device *dev) { struct iommu_group *group = dev->iommu_group; - struct group_device *device; if (!group) return; dev_info(dev, "Removing from iommu group %d\n", group->id); - mutex_lock(&group->mutex); - device = __iommu_group_remove_device(group, dev); - mutex_unlock(&group->mutex); - - if (device) - __iommu_group_release_device(group, device); + __iommu_group_remove_device(dev); } EXPORT_SYMBOL_GPL(iommu_group_remove_device); @@ -1708,45 +1772,6 @@ iommu_group_alloc_default_domain(struct iommu_group *group, int req_type) return dom; } -/** - * iommu_group_get_for_dev - Find or create the IOMMU group for a device - * @dev: target device - * - * This function is intended to be called by IOMMU drivers and extended to - * support common, bus-defined algorithms when determining or creating the - * IOMMU group for a device. On success, the caller will hold a reference - * to the returned IOMMU group, which will already include the provided - * device. The reference should be released with iommu_group_put(). - */ -static struct iommu_group *iommu_group_get_for_dev(struct device *dev) -{ - const struct iommu_ops *ops = dev_iommu_ops(dev); - struct iommu_group *group; - int ret; - - group = iommu_group_get(dev); - if (group) - return group; - - group = ops->device_group(dev); - if (WARN_ON_ONCE(group == NULL)) - return ERR_PTR(-EINVAL); - - if (IS_ERR(group)) - return group; - - ret = iommu_group_add_device(group, dev); - if (ret) - goto out_put_group; - - return group; - -out_put_group: - iommu_group_put(group); - - return ERR_PTR(ret); -} - struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) { return group->default_domain; @@ -1755,16 +1780,8 @@ struct iommu_domain *iommu_group_default_domain(struct iommu_group *group) static int probe_iommu_group(struct device *dev, void *data) { struct list_head *group_list = data; - struct iommu_group *group; int ret; - /* Device is probed already if in a group */ - group = iommu_group_get(dev); - if (group) { - iommu_group_put(group); - return 0; - } - ret = __iommu_probe_device(dev, group_list); if (ret == -ENODEV) ret = 0; @@ -1840,11 +1857,6 @@ int bus_iommu_probe(const struct bus_type *bus) LIST_HEAD(group_list); int ret; - /* - * This code-path does not allocate the default domain when - * creating the iommu group, so do it after the groups are - * created. - */ ret = bus_for_each_dev(bus, NULL, &group_list, probe_iommu_group); if (ret) return ret; @@ -1857,6 +1869,11 @@ int bus_iommu_probe(const struct bus_type *bus) /* Remove item from the list */ list_del_init(&group->entry); + /* + * We go to the trouble of deferred default domain creation so + * that the cross-group default domain type and the setup of the + * IOMMU_RESV_DIRECT will work correctly in non-hotpug scenarios. + */ ret = iommu_setup_default_domain(group, 0); if (ret) { mutex_unlock(&group->mutex); @@ -2191,6 +2208,21 @@ static int __iommu_device_set_domain(struct iommu_group *group, { int ret; + /* + * If the device requires IOMMU_RESV_DIRECT then we cannot allow + * the blocking domain to be attached as it does not contain the + * required 1:1 mapping. This test effectively excludes the device + * being used with iommu_group_claim_dma_owner() which will block + * vfio and iommufd as well. + */ + if (dev->iommu->require_direct && + (new_domain->type == IOMMU_DOMAIN_BLOCKED || + new_domain == group->blocking_domain)) { + dev_warn(dev, + "Firmware has requested this device have a 1:1 IOMMU mapping, rejecting configuring the device without a 1:1 mapping. Contact your platform vendor.\n"); + return -EINVAL; + } + if (dev->iommu->attach_deferred) { if (new_domain == group->default_domain) return 0; @@ -3282,7 +3314,7 @@ static void __iommu_release_dma_ownership(struct iommu_group *group) /** * iommu_group_release_dma_owner() - Release DMA ownership of a group - * @dev: The device + * @group: The group * * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). */ @@ -3296,7 +3328,7 @@ EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); /** * iommu_device_release_dma_owner() - Release DMA ownership of a device - * @group: The device. + * @dev: The device. * * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). */ @@ -3479,3 +3511,30 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, return domain; } + +ioasid_t iommu_alloc_global_pasid(struct device *dev) +{ + int ret; + + /* max_pasids == 0 means that the device does not support PASID */ + if (!dev->iommu->max_pasids) + return IOMMU_PASID_INVALID; + + /* + * max_pasids is set up by vendor driver based on number of PASID bits + * supported but the IDA allocation is inclusive. + */ + ret = ida_alloc_range(&iommu_global_pasid_ida, IOMMU_FIRST_GLOBAL_PASID, + dev->iommu->max_pasids - 1, GFP_KERNEL); + return ret < 0 ? IOMMU_PASID_INVALID : ret; +} +EXPORT_SYMBOL_GPL(iommu_alloc_global_pasid); + +void iommu_free_global_pasid(ioasid_t pasid) +{ + if (WARN_ON(pasid == IOMMU_PASID_INVALID)) + return; + + ida_free(&iommu_global_pasid_ida, pasid); +} +EXPORT_SYMBOL_GPL(iommu_free_global_pasid); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9f64c5c9f5b9..65ff69477c43 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -14,11 +14,12 @@ #include <linux/init.h> #include <linux/interrupt.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/io-pgtable.h> #include <linux/iommu.h> #include <linux/of.h> -#include <linux/of_device.h> #include <linux/of_platform.h> +#include <linux/pci.h> #include <linux/platform_device.h> #include <linux/sizes.h> #include <linux/slab.h> @@ -253,17 +254,13 @@ static void ipmmu_imuctr_write(struct ipmmu_vmsa_device *mmu, /* Wait for any pending TLB invalidations to complete */ static void ipmmu_tlb_sync(struct ipmmu_vmsa_domain *domain) { - unsigned int count = 0; + u32 val; - while (ipmmu_ctx_read_root(domain, IMCTR) & IMCTR_FLUSH) { - cpu_relax(); - if (++count == TLB_LOOP_TIMEOUT) { - dev_err_ratelimited(domain->mmu->dev, + if (read_poll_timeout_atomic(ipmmu_ctx_read_root, val, + !(val & IMCTR_FLUSH), 1, TLB_LOOP_TIMEOUT, + false, domain, IMCTR)) + dev_err_ratelimited(domain->mmu->dev, "TLB sync timed out -- MMU may be deadlocked\n"); - return; - } - udelay(1); - } } static void ipmmu_tlb_invalidate(struct ipmmu_vmsa_domain *domain) @@ -723,6 +720,10 @@ static bool ipmmu_device_is_allowed(struct device *dev) if (soc_device_match(soc_denylist)) return false; + /* Check whether this device is a PCI device */ + if (dev_is_pci(dev)) + return true; + /* Check whether this device can work with the IPMMU */ for (i = 0; i < ARRAY_SIZE(devices_allowlist); i++) { if (!strcmp(dev_name(dev), devices_allowlist[i])) diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index e93906d6e112..640275873a27 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -3,6 +3,7 @@ * Copyright (c) 2015-2016 MediaTek Inc. * Author: Yong Wu <[email protected]> */ +#include <linux/arm-smccc.h> #include <linux/bitfield.h> #include <linux/bug.h> #include <linux/clk.h> @@ -27,6 +28,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/soc/mediatek/infracfg.h> +#include <linux/soc/mediatek/mtk_sip_svc.h> #include <asm/barrier.h> #include <soc/mediatek/smi.h> @@ -143,6 +145,7 @@ #define PGTABLE_PA_35_EN BIT(17) #define TF_PORT_TO_ADDR_MT8173 BIT(18) #define INT_ID_PORT_WIDTH_6 BIT(19) +#define CFG_IFA_MASTER_IN_ATF BIT(20) #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask) \ ((((pdata)->flags) & (mask)) == (_x)) @@ -167,6 +170,7 @@ enum mtk_iommu_plat { M4U_MT8173, M4U_MT8183, M4U_MT8186, + M4U_MT8188, M4U_MT8192, M4U_MT8195, M4U_MT8365, @@ -258,6 +262,8 @@ struct mtk_iommu_data { struct device *smicomm_dev; struct mtk_iommu_bank_data *bank; + struct mtk_iommu_domain *share_dom; /* For 2 HWs share pgtable */ + struct regmap *pericfg; struct mutex mutex; /* Protect m4u_group/m4u_dom above */ @@ -577,41 +583,55 @@ static int mtk_iommu_config(struct mtk_iommu_data *data, struct device *dev, unsigned int larbid, portid; struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); const struct mtk_iommu_iova_region *region; - u32 peri_mmuen, peri_mmuen_msk; + unsigned long portid_msk = 0; + struct arm_smccc_res res; int i, ret = 0; for (i = 0; i < fwspec->num_ids; ++i) { - larbid = MTK_M4U_TO_LARB(fwspec->ids[i]); portid = MTK_M4U_TO_PORT(fwspec->ids[i]); + portid_msk |= BIT(portid); + } - if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { - larb_mmu = &data->larb_imu[larbid]; + if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_MM)) { + /* All ports should be in the same larb. just use 0 here */ + larbid = MTK_M4U_TO_LARB(fwspec->ids[0]); + larb_mmu = &data->larb_imu[larbid]; + region = data->plat_data->iova_region + regionid; - region = data->plat_data->iova_region + regionid; + for_each_set_bit(portid, &portid_msk, 32) larb_mmu->bank[portid] = upper_32_bits(region->iova_base); - dev_dbg(dev, "%s iommu for larb(%s) port %d region %d rgn-bank %d.\n", - enable ? "enable" : "disable", dev_name(larb_mmu->dev), - portid, regionid, larb_mmu->bank[portid]); + dev_dbg(dev, "%s iommu for larb(%s) port 0x%lx region %d rgn-bank %d.\n", + enable ? "enable" : "disable", dev_name(larb_mmu->dev), + portid_msk, regionid, upper_32_bits(region->iova_base)); - if (enable) - larb_mmu->mmu |= MTK_SMI_MMU_EN(portid); - else - larb_mmu->mmu &= ~MTK_SMI_MMU_EN(portid); - } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { - peri_mmuen_msk = BIT(portid); + if (enable) + larb_mmu->mmu |= portid_msk; + else + larb_mmu->mmu &= ~portid_msk; + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { + if (MTK_IOMMU_HAS_FLAG(data->plat_data, CFG_IFA_MASTER_IN_ATF)) { + arm_smccc_smc(MTK_SIP_KERNEL_IOMMU_CONTROL, + IOMMU_ATF_CMD_CONFIG_INFRA_IOMMU, + portid_msk, enable, 0, 0, 0, 0, &res); + ret = res.a0; + } else { /* PCI dev has only one output id, enable the next writing bit for PCIe */ - if (dev_is_pci(dev)) - peri_mmuen_msk |= BIT(portid + 1); + if (dev_is_pci(dev)) { + if (fwspec->num_ids != 1) { + dev_err(dev, "PCI dev can only have one port.\n"); + return -ENODEV; + } + portid_msk |= BIT(portid + 1); + } - peri_mmuen = enable ? peri_mmuen_msk : 0; ret = regmap_update_bits(data->pericfg, PERICFG_IOMMU_1, - peri_mmuen_msk, peri_mmuen); - if (ret) - dev_err(dev, "%s iommu(%s) inframaster 0x%x fail(%d).\n", - enable ? "enable" : "disable", - dev_name(data->dev), peri_mmuen_msk, ret); + (u32)portid_msk, enable ? (u32)portid_msk : 0); } + if (ret) + dev_err(dev, "%s iommu(%s) inframaster 0x%lx fail(%d).\n", + enable ? "enable" : "disable", + dev_name(data->dev), portid_msk, ret); } return ret; } @@ -620,15 +640,14 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, struct mtk_iommu_data *data, unsigned int region_id) { + struct mtk_iommu_domain *share_dom = data->share_dom; const struct mtk_iommu_iova_region *region; - struct mtk_iommu_domain *m4u_dom; - - /* Always use bank0 in sharing pgtable case */ - m4u_dom = data->bank[0].m4u_dom; - if (m4u_dom) { - dom->iop = m4u_dom->iop; - dom->cfg = m4u_dom->cfg; - dom->domain.pgsize_bitmap = m4u_dom->cfg.pgsize_bitmap; + + /* Always use share domain in sharing pgtable case */ + if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE) && share_dom) { + dom->iop = share_dom->iop; + dom->cfg = share_dom->cfg; + dom->domain.pgsize_bitmap = share_dom->cfg.pgsize_bitmap; goto update_iova_region; } @@ -658,6 +677,9 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, /* Update our support page sizes bitmap */ dom->domain.pgsize_bitmap = dom->cfg.pgsize_bitmap; + if (MTK_IOMMU_HAS_FLAG(data->plat_data, SHARE_PGTABLE)) + data->share_dom = dom; + update_iova_region: /* Update the iova region for this domain */ region = data->plat_data->iova_region + region_id; @@ -708,7 +730,9 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, /* Data is in the frstdata in sharing pgtable case. */ frstdata = mtk_iommu_get_frst_data(hw_list); + mutex_lock(&frstdata->mutex); ret = mtk_iommu_domain_finalise(dom, frstdata, region_id); + mutex_unlock(&frstdata->mutex); if (ret) { mutex_unlock(&dom->mutex); return ret; @@ -1318,7 +1342,8 @@ static int mtk_iommu_probe(struct platform_device *pdev) dev_err_probe(dev, ret, "mm dts parse fail\n"); goto out_runtime_disable; } - } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA)) { + } else if (MTK_IOMMU_IS_TYPE(data->plat_data, MTK_IOMMU_TYPE_INFRA) && + !MTK_IOMMU_HAS_FLAG(data->plat_data, CFG_IFA_MASTER_IN_ATF)) { p = data->plat_data->pericfg_comp_str; data->pericfg = syscon_regmap_lookup_by_compatible(p); if (IS_ERR(data->pericfg)) { @@ -1570,6 +1595,67 @@ static const struct mtk_iommu_plat_data mt8186_data_mm = { .iova_region_larb_msk = mt8186_larb_region_msk, }; +static const struct mtk_iommu_plat_data mt8188_data_infra = { + .m4u_plat = M4U_MT8188, + .flags = WR_THROT_EN | DCM_DISABLE | STD_AXI_MODE | PM_CLK_AO | + MTK_IOMMU_TYPE_INFRA | IFA_IOMMU_PCIE_SUPPORT | + PGTABLE_PA_35_EN | CFG_IFA_MASTER_IN_ATF, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = single_domain, + .iova_region_nr = ARRAY_SIZE(single_domain), +}; + +static const u32 mt8188_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { + [0] = {~0, ~0, ~0, ~0}, /* Region0: all ports for larb0/1/2/3 */ + [1] = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, ~0, ~0, ~0}, /* Region1: larb19(21)/21(22)/23 */ + [2] = {0, 0, 0, 0, ~0, ~0, ~0, ~0, /* Region2: the other larbs. */ + ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, + ~0, ~0, ~0, ~0, ~0, 0, 0, 0, + 0, ~0}, + [3] = {0}, + [4] = {[24] = BIT(0) | BIT(1)}, /* Only larb27(24) port0/1 */ + [5] = {[24] = BIT(2) | BIT(3)}, /* Only larb27(24) port2/3 */ +}; + +static const struct mtk_iommu_plat_data mt8188_data_vdo = { + .m4u_plat = M4U_MT8188, + .flags = HAS_BCLK | HAS_SUB_COMM_3BITS | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | + PGTABLE_PA_35_EN | MTK_IOMMU_TYPE_MM, + .hw_list = &m4ulist, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8188_larb_region_msk, + .larbid_remap = {{2}, {0}, {21}, {0}, {19}, {9, 10, + 11 /* 11a */, 25 /* 11c */}, + {13, 0, 29 /* 16b */, 30 /* 17b */, 0}, {5}}, +}; + +static const struct mtk_iommu_plat_data mt8188_data_vpp = { + .m4u_plat = M4U_MT8188, + .flags = HAS_BCLK | HAS_SUB_COMM_3BITS | OUT_ORDER_WR_EN | + WR_THROT_EN | IOVA_34_EN | SHARE_PGTABLE | + PGTABLE_PA_35_EN | MTK_IOMMU_TYPE_MM, + .hw_list = &m4ulist, + .inv_sel_reg = REG_MMU_INV_SEL_GEN2, + .banks_num = 1, + .banks_enable = {true}, + .iova_region = mt8192_multi_dom, + .iova_region_nr = ARRAY_SIZE(mt8192_multi_dom), + .iova_region_larb_msk = mt8188_larb_region_msk, + .larbid_remap = {{1}, {3}, {23}, {7}, {MTK_INVALID_LARBID}, + {12, 15, 24 /* 11b */}, {14, MTK_INVALID_LARBID, + 16 /* 16a */, 17 /* 17a */, MTK_INVALID_LARBID, + 27, 28 /* ccu0 */, MTK_INVALID_LARBID}, {4, 6}}, +}; + static const unsigned int mt8192_larb_region_msk[MT8192_MULTI_REGION_NR_MAX][MTK_LARB_NR_MAX] = { [0] = {~0, ~0}, /* Region0: larb0/1 */ [1] = {0, 0, 0, 0, ~0, ~0, 0, ~0}, /* Region1: larb4/5/7 */ @@ -1678,6 +1764,9 @@ static const struct of_device_id mtk_iommu_of_ids[] = { { .compatible = "mediatek,mt8173-m4u", .data = &mt8173_data}, { .compatible = "mediatek,mt8183-m4u", .data = &mt8183_data}, { .compatible = "mediatek,mt8186-iommu-mm", .data = &mt8186_data_mm}, /* mm: m4u */ + { .compatible = "mediatek,mt8188-iommu-infra", .data = &mt8188_data_infra}, + { .compatible = "mediatek,mt8188-iommu-vdo", .data = &mt8188_data_vdo}, + { .compatible = "mediatek,mt8188-iommu-vpp", .data = &mt8188_data_vpp}, { .compatible = "mediatek,mt8192-m4u", .data = &mt8192_data}, { .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra}, { .compatible = "mediatek,mt8195-iommu-vdo", .data = &mt8195_data_vdo}, diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 40f57d293a79..157b286e36bf 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -159,7 +159,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, * If we have reason to believe the IOMMU driver missed the initial * probe for dev, replay it to get things in order. */ - if (!err && dev->bus && !device_iommu_mapped(dev)) + if (!err && dev->bus) err = iommu_probe_device(dev); /* Ignore all other errors apart from EPROBE_DEFER */ diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 4054030c3237..8ff69fbf9f65 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -98,9 +98,8 @@ struct rk_iommu_ops { phys_addr_t (*pt_address)(u32 dte); u32 (*mk_dtentries)(dma_addr_t pt_dma); u32 (*mk_ptentries)(phys_addr_t page, int prot); - phys_addr_t (*dte_addr_phys)(u32 addr); - u32 (*dma_addr_dte)(dma_addr_t dt_dma); u64 dma_bit_mask; + gfp_t gfp_flags; }; struct rk_iommu { @@ -278,8 +277,8 @@ static u32 rk_mk_pte(phys_addr_t page, int prot) /* * In v2: * 31:12 - Page address bit 31:0 - * 11:9 - Page address bit 34:32 - * 8:4 - Page address bit 39:35 + * 11: 8 - Page address bit 35:32 + * 7: 4 - Page address bit 39:36 * 3 - Security * 2 - Writable * 1 - Readable @@ -506,7 +505,7 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) /* * Check if register DTE_ADDR is working by writing DTE_ADDR_DUMMY - * and verifying that upper 5 nybbles are read back. + * and verifying that upper 5 (v1) or 7 (v2) nybbles are read back. */ for (i = 0; i < iommu->num_mmu; i++) { dte_addr = rk_ops->pt_address(DTE_ADDR_DUMMY); @@ -531,33 +530,6 @@ static int rk_iommu_force_reset(struct rk_iommu *iommu) return 0; } -static inline phys_addr_t rk_dte_addr_phys(u32 addr) -{ - return (phys_addr_t)addr; -} - -static inline u32 rk_dma_addr_dte(dma_addr_t dt_dma) -{ - return dt_dma; -} - -#define DT_HI_MASK GENMASK_ULL(39, 32) -#define DTE_BASE_HI_MASK GENMASK(11, 4) -#define DT_SHIFT 28 - -static inline phys_addr_t rk_dte_addr_phys_v2(u32 addr) -{ - u64 addr64 = addr; - return (phys_addr_t)(addr64 & RK_DTE_PT_ADDRESS_MASK) | - ((addr64 & DTE_BASE_HI_MASK) << DT_SHIFT); -} - -static inline u32 rk_dma_addr_dte_v2(dma_addr_t dt_dma) -{ - return (dt_dma & RK_DTE_PT_ADDRESS_MASK) | - ((dt_dma & DT_HI_MASK) >> DT_SHIFT); -} - static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) { void __iomem *base = iommu->bases[index]; @@ -577,7 +549,7 @@ static void log_iova(struct rk_iommu *iommu, int index, dma_addr_t iova) page_offset = rk_iova_page_offset(iova); mmu_dte_addr = rk_iommu_read(base, RK_MMU_DTE_ADDR); - mmu_dte_addr_phys = rk_ops->dte_addr_phys(mmu_dte_addr); + mmu_dte_addr_phys = rk_ops->pt_address(mmu_dte_addr); dte_addr_phys = mmu_dte_addr_phys + (4 * dte_index); dte_addr = phys_to_virt(dte_addr_phys); @@ -756,7 +728,7 @@ static u32 *rk_dte_get_page_table(struct rk_iommu_domain *rk_domain, if (rk_dte_is_pt_valid(dte)) goto done; - page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | GFP_DMA32); + page_table = (u32 *)get_zeroed_page(GFP_ATOMIC | rk_ops->gfp_flags); if (!page_table) return ERR_PTR(-ENOMEM); @@ -967,7 +939,7 @@ static int rk_iommu_enable(struct rk_iommu *iommu) for (i = 0; i < iommu->num_mmu; i++) { rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, - rk_ops->dma_addr_dte(rk_domain->dt_dma)); + rk_ops->mk_dtentries(rk_domain->dt_dma)); rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE); rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK); } @@ -1105,7 +1077,7 @@ static struct iommu_domain *rk_iommu_domain_alloc(unsigned type) * Each level1 (dt) and level2 (pt) table has 1024 4-byte entries. * Allocate one 4 KiB page for each table. */ - rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | GFP_DMA32); + rk_domain->dt = (u32 *)get_zeroed_page(GFP_KERNEL | rk_ops->gfp_flags); if (!rk_domain->dt) goto err_free_domain; @@ -1405,18 +1377,16 @@ static struct rk_iommu_ops iommu_data_ops_v1 = { .pt_address = &rk_dte_pt_address, .mk_dtentries = &rk_mk_dte, .mk_ptentries = &rk_mk_pte, - .dte_addr_phys = &rk_dte_addr_phys, - .dma_addr_dte = &rk_dma_addr_dte, .dma_bit_mask = DMA_BIT_MASK(32), + .gfp_flags = GFP_DMA32, }; static struct rk_iommu_ops iommu_data_ops_v2 = { .pt_address = &rk_dte_pt_address_v2, .mk_dtentries = &rk_mk_dte_v2, .mk_ptentries = &rk_mk_pte_v2, - .dte_addr_phys = &rk_dte_addr_phys_v2, - .dma_addr_dte = &rk_dma_addr_dte_v2, .dma_bit_mask = DMA_BIT_MASK(40), + .gfp_flags = 0, }; static const struct of_device_id rk_iommu_dt_ids[] = { diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 39e34fdeccda..2fa9afebd4f5 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -14,6 +14,7 @@ #include <linux/mfd/syscon.h> #include <linux/module.h> #include <linux/of_platform.h> +#include <linux/platform_device.h> #include <linux/regmap.h> #include <linux/slab.h> @@ -148,6 +149,7 @@ static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type) dom->domain.geometry.aperture_start = 0; dom->domain.geometry.aperture_end = SZ_256M - 1; + dom->domain.geometry.force_aperture = true; return &dom->domain; } diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 1cbf063ccf14..e445f80d0226 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -9,7 +9,7 @@ #include <linux/iommu.h> #include <linux/kernel.h> #include <linux/of.h> -#include <linux/of_device.h> +#include <linux/of_platform.h> #include <linux/pci.h> #include <linux/platform_device.h> #include <linux/slab.h> diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 3551ed057774..17dcd826f5c2 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -13,7 +13,7 @@ #include <linux/interval_tree.h> #include <linux/iommu.h> #include <linux/module.h> -#include <linux/of_platform.h> +#include <linux/of.h> #include <linux/pci.h> #include <linux/virtio.h> #include <linux/virtio_config.h> diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index f178ed9899a9..3ae8e8af8ab3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -3721,6 +3721,60 @@ struct hwrm_func_backing_store_qcaps_v2_output { u8 valid; }; +/* hwrm_func_dbr_pacing_qcfg_input (size:128b/16B) */ +struct hwrm_func_dbr_pacing_qcfg_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; +}; + +/* hwrm_func_dbr_pacing_qcfg_output (size:512b/64B) */ +struct hwrm_func_dbr_pacing_qcfg_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + u8 flags; +#define FUNC_DBR_PACING_QCFG_RESP_FLAGS_DBR_NQ_EVENT_ENABLED 0x1UL + u8 unused_0[7]; + __le32 dbr_stat_db_fifo_reg; +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_MASK 0x3UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_SFT 0 +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_PCIE_CFG 0x0UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_GRC 0x1UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR0 0x2UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR1 0x3UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_LAST \ + FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SPACE_BAR1 +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_MASK 0xfffffffcUL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_STAT_DB_FIFO_REG_ADDR_SFT 2 + __le32 dbr_stat_db_fifo_reg_watermark_mask; + u8 dbr_stat_db_fifo_reg_watermark_shift; + u8 unused_1[3]; + __le32 dbr_stat_db_fifo_reg_fifo_room_mask; + u8 dbr_stat_db_fifo_reg_fifo_room_shift; + u8 unused_2[3]; + __le32 dbr_throttling_aeq_arm_reg; +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_MASK 0x3UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_SFT 0 +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_PCIE_CFG 0x0UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_GRC 0x1UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR0 0x2UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR1 0x3UL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_LAST \ + FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SPACE_BAR1 +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_MASK 0xfffffffcUL +#define FUNC_DBR_PACING_QCFG_RESP_DBR_THROTTLING_AEQ_ARM_REG_ADDR_SFT 2 + u8 dbr_throttling_aeq_arm_reg_val; + u8 unused_3[7]; + __le32 primary_nq_id; + __le32 pacing_threshold; + u8 unused_4[7]; + u8 valid; +}; + /* hwrm_func_drv_if_change_input (size:192b/24B) */ struct hwrm_func_drv_if_change_input { __le16 req_type; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c index 852eb449ccae..6ba2b9398633 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c @@ -345,7 +345,7 @@ static void bnxt_set_edev_info(struct bnxt_en_dev *edev, struct bnxt *bp) edev->hw_ring_stats_size = bp->hw_ring_stats_size; edev->pf_port_id = bp->pf.port_id; edev->en_state = bp->state; - + edev->bar0 = bp->bar0; edev->ulp_tbl->msix_requested = bnxt_get_ulp_msix_num(bp); } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h index 80cbc4b6130a..6ff77f082e6c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h @@ -81,6 +81,7 @@ struct bnxt_en_dev { * mode only. Will be * updated in resume. */ + void __iomem *bar0; }; static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev) diff --git a/drivers/video/fbdev/g364fb.c b/drivers/video/fbdev/g364fb.c index 7a1013b22fa7..ee6fe51e0a6b 100644 --- a/drivers/video/fbdev/g364fb.c +++ b/drivers/video/fbdev/g364fb.c @@ -112,7 +112,7 @@ static int g364fb_blank(int blank, struct fb_info *info); static const struct fb_ops g364fb_ops = { .owner = THIS_MODULE, - FB_DEFAULT_IOMEM_HELPERS, + FB_DEFAULT_IOMEM_OPS, .fb_setcolreg = g364fb_setcolreg, .fb_pan_display = g364fb_pan_display, .fb_blank = g364fb_blank, diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 62356d542ef6..e5bca9a004cc 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -98,7 +98,14 @@ MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)"); static void *compress_workspace; +/* + * Compression is only used for dmesg output, which consists of low-entropy + * ASCII text, and so we can assume worst-case 60%. + */ +#define DMESG_COMP_PERCENT 60 + static char *big_oops_buf; +static size_t max_compressed_size; void pstore_set_kmsg_bytes(int bytes) { @@ -196,6 +203,7 @@ static int pstore_compress(const void *in, void *out, static void allocate_buf_for_compression(void) { + size_t compressed_size; char *buf; /* Skip if not built-in or compression disabled. */ @@ -216,7 +224,8 @@ static void allocate_buf_for_compression(void) * uncompressed record size, since any record that would be expanded by * compression is just stored uncompressed. */ - buf = kvzalloc(psinfo->bufsize, GFP_KERNEL); + compressed_size = (psinfo->bufsize * 100) / DMESG_COMP_PERCENT; + buf = kvzalloc(compressed_size, GFP_KERNEL); if (!buf) { pr_err("Failed %zu byte compression buffer allocation for: %s\n", psinfo->bufsize, compress); @@ -233,6 +242,7 @@ static void allocate_buf_for_compression(void) /* A non-NULL big_oops_buf indicates compression is available. */ big_oops_buf = buf; + max_compressed_size = compressed_size; pr_info("Using crash dump compression: %s\n", compress); } @@ -246,6 +256,7 @@ static void free_buf_for_compression(void) kvfree(big_oops_buf); big_oops_buf = NULL; + max_compressed_size = 0; } void pstore_record_init(struct pstore_record *record, @@ -305,7 +316,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, record.buf = psinfo->buf; dst = big_oops_buf ?: psinfo->buf; - dst_size = psinfo->bufsize; + dst_size = max_compressed_size ?: psinfo->bufsize; /* Write dump header. */ header_size = snprintf(dst, dst_size, "%s#%d Part%u\n", why, @@ -326,8 +337,15 @@ static void pstore_dump(struct kmsg_dumper *dumper, record.compressed = true; record.size = zipped_len; } else { - record.size = header_size + dump_size; - memcpy(psinfo->buf, dst, record.size); + /* + * Compression failed, so the buffer is most + * likely filled with binary data that does not + * compress as well as ASCII text. Copy as much + * of the uncompressed data as possible into + * the pstore record, and discard the rest. + */ + record.size = psinfo->bufsize; + memcpy(psinfo->buf, dst, psinfo->bufsize); } } else { record.size = header_size + dump_size; @@ -560,6 +578,7 @@ static void decompress_record(struct pstore_record *record, int ret; int unzipped_len; char *unzipped, *workspace; + size_t max_uncompressed_size; if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !record->compressed) return; @@ -583,7 +602,8 @@ static void decompress_record(struct pstore_record *record, } /* Allocate enough space to hold max decompression and ECC. */ - workspace = kvzalloc(psinfo->bufsize + record->ecc_notice_size, + max_uncompressed_size = 3 * psinfo->bufsize; + workspace = kvzalloc(max_uncompressed_size + record->ecc_notice_size, GFP_KERNEL); if (!workspace) return; @@ -591,11 +611,11 @@ static void decompress_record(struct pstore_record *record, zstream->next_in = record->buf; zstream->avail_in = record->size; zstream->next_out = workspace; - zstream->avail_out = psinfo->bufsize; + zstream->avail_out = max_uncompressed_size; ret = zlib_inflate(zstream, Z_FINISH); if (ret != Z_STREAM_END) { - pr_err("zlib_inflate() failed, ret = %d!\n", ret); + pr_err_ratelimited("zlib_inflate() failed, ret = %d!\n", ret); kvfree(workspace); return; } diff --git a/include/dt-bindings/memory/mediatek,mt8188-memory-port.h b/include/dt-bindings/memory/mediatek,mt8188-memory-port.h new file mode 100644 index 000000000000..337ab11262af --- /dev/null +++ b/include/dt-bindings/memory/mediatek,mt8188-memory-port.h @@ -0,0 +1,489 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* + * Copyright (c) 2022 MediaTek Inc. + * Author: Chengci Xu <[email protected]> + */ +#ifndef _DT_BINDINGS_MEMORY_MEDIATEK_MT8188_LARB_PORT_H_ +#define _DT_BINDINGS_MEMORY_MEDIATEK_MT8188_LARB_PORT_H_ + +#include <dt-bindings/memory/mtk-memory-port.h> + +/* + * MM IOMMU larbs: + * From below, for example larb11 has larb11a/larb11b/larb11c, + * the index of larb is not in order. So we reindexed these larbs from a + * software view. + */ +#define SMI_L0_ID 0 +#define SMI_L1_ID 1 +#define SMI_L2_ID 2 +#define SMI_L3_ID 3 +#define SMI_L4_ID 4 +#define SMI_L5_ID 5 +#define SMI_L6_ID 6 +#define SMI_L7_ID 7 +#define SMI_L9_ID 8 +#define SMI_L10_ID 9 +#define SMI_L11A_ID 10 +#define SMI_L11B_ID 11 +#define SMI_L11C_ID 12 +#define SMI_L12_ID 13 +#define SMI_L13_ID 14 +#define SMI_L14_ID 15 +#define SMI_L15_ID 16 +#define SMI_L16A_ID 17 +#define SMI_L16B_ID 18 +#define SMI_L17A_ID 19 +#define SMI_L17B_ID 20 +#define SMI_L19_ID 21 +#define SMI_L21_ID 22 +#define SMI_L23_ID 23 +#define SMI_L27_ID 24 +#define SMI_L28_ID 25 + +/* + * MM IOMMU supports 16GB dma address. We separate it to four ranges: + * 0 ~ 4G; 4G ~ 8G; 8G ~ 12G; 12G ~ 16G, we could adjust these masters + * locate in anyone region. BUT: + * a) Make sure all the ports inside a larb are in one range. + * b) The iova of any master can NOT cross the 4G/8G/12G boundary. + * + * This is the suggested mapping in this SoC: + * + * modules dma-address-region larbs-ports + * disp 0 ~ 4G larb0/1/2/3 + * vcodec 4G ~ 8G larb19(21)[1]/21(22)/23 + * cam/mdp 8G ~ 12G the other larbs. + * N/A 12G ~ 16G + * CCU0 0x24000_0000 ~ 0x243ff_ffff larb27(24): port 0/1 + * CCU1 0x24400_0000 ~ 0x247ff_ffff larb27(24): port 2/3 + * + * This SoC have two MM IOMMU HWs, this is the connected information: + * iommu-vdo: larb0/2/5/9/10/11A/11C/13/16B/17B/19/21 + * iommu-vpp: larb1/3/4/6/7/11B/12/14/15/16A/17A/23/27 + * + * [1]: This is larb19, but the index is 21 from the SW view. + */ + +/* MM IOMMU ports */ +/* LARB 0 -- VDO-0 */ +#define M4U_PORT_L0_DISP_RDMA1 MTK_M4U_ID(SMI_L0_ID, 0) +#define M4U_PORT_L0_DISP_WDMA0 MTK_M4U_ID(SMI_L0_ID, 1) +#define M4U_PORT_L0_DISP_OVL0_RDMA0 MTK_M4U_ID(SMI_L0_ID, 2) +#define M4U_PORT_L0_DISP_OVL0_RDMA1 MTK_M4U_ID(SMI_L0_ID, 3) +#define M4U_PORT_L0_DISP_OVL0_HDR MTK_M4U_ID(SMI_L0_ID, 4) +#define M4U_PORT_L0_DISP_POSTMASK0 MTK_M4U_ID(SMI_L0_ID, 5) +#define M4U_PORT_L0_DISP_FAKE_ENG0 MTK_M4U_ID(SMI_L0_ID, 6) + +/* LARB 1 -- VD0-0 */ +#define M4U_PORT_L1_DISP_RDMA0 MTK_M4U_ID(SMI_L1_ID, 0) +#define M4U_PORT_L1_DISP_WDMA1 MTK_M4U_ID(SMI_L1_ID, 1) +#define M4U_PORT_L1_DISP_OVL1_RDMA0 MTK_M4U_ID(SMI_L1_ID, 2) +#define M4U_PORT_L1_DISP_OVL1_RDMA1 MTK_M4U_ID(SMI_L1_ID, 3) +#define M4U_PORT_L1_DISP_OVL1_HDR MTK_M4U_ID(SMI_L1_ID, 4) +#define M4U_PORT_L1_DISP_WROT0 MTK_M4U_ID(SMI_L1_ID, 5) +#define M4U_PORT_L1_DISP_FAKE_ENG1 MTK_M4U_ID(SMI_L1_ID, 6) + +/* LARB 2 -- VDO-1 */ +#define M4U_PORT_L2_MDP_RDMA0 MTK_M4U_ID(SMI_L2_ID, 0) +#define M4U_PORT_L2_MDP_RDMA2 MTK_M4U_ID(SMI_L2_ID, 1) +#define M4U_PORT_L2_MDP_RDMA4 MTK_M4U_ID(SMI_L2_ID, 2) +#define M4U_PORT_L2_MDP_RDMA6 MTK_M4U_ID(SMI_L2_ID, 3) +#define M4U_PORT_L2_DISP_FAKE1 MTK_M4U_ID(SMI_L2_ID, 4) + +/* LARB 3 -- VDO-1 */ +#define M4U_PORT_L3_MDP_RDMA1 MTK_M4U_ID(SMI_L3_ID, 0) +#define M4U_PORT_L3_MDP_RDMA3 MTK_M4U_ID(SMI_L3_ID, 1) +#define M4U_PORT_L3_MDP_RDMA5 MTK_M4U_ID(SMI_L3_ID, 2) +#define M4U_PORT_L3_MDP_RDMA7 MTK_M4U_ID(SMI_L3_ID, 3) +#define M4U_PORT_L3_HDR_DS_SMI MTK_M4U_ID(SMI_L3_ID, 4) +#define M4U_PORT_L3_HDR_ADL_SMI MTK_M4U_ID(SMI_L3_ID, 5) +#define M4U_PORT_L3_DISP_FAKE1 MTK_M4U_ID(SMI_L3_ID, 6) + +/* LARB 4 -- VPP-0 */ +#define M4U_PORT_L4_MDP_RDMA MTK_M4U_ID(SMI_L4_ID, 0) +#define M4U_PORT_L4_MDP_FG MTK_M4U_ID(SMI_L4_ID, 1) +#define M4U_PORT_L4_MDP_OVL MTK_M4U_ID(SMI_L4_ID, 2) +#define M4U_PORT_L4_MDP_WROT MTK_M4U_ID(SMI_L4_ID, 3) +#define M4U_PORT_L4_FAKE_ENG MTK_M4U_ID(SMI_L4_ID, 4) +#define M4U_PORT_L4_DISP_RDMA MTK_M4U_ID(SMI_L4_ID, 5) +#define M4U_PORT_L4_DISP_WDMA MTK_M4U_ID(SMI_L4_ID, 6) + +/* LARB 5 -- VPP-1 */ +#define M4U_PORT_L5_SVPP1_MDP_RDMA MTK_M4U_ID(SMI_L5_ID, 0) +#define M4U_PORT_L5_SVPP1_MDP_FG MTK_M4U_ID(SMI_L5_ID, 1) +#define M4U_PORT_L5_SVPP1_MDP_OVL MTK_M4U_ID(SMI_L5_ID, 2) +#define M4U_PORT_L5_SVPP1_MDP_WROT MTK_M4U_ID(SMI_L5_ID, 3) +#define M4U_PORT_L5_SVPP2_MDP_RDMA MTK_M4U_ID(SMI_L5_ID, 4) +#define M4U_PORT_L5_SVPP2_MDP_FG MTK_M4U_ID(SMI_L5_ID, 5) +#define M4U_PORT_L5_SVPP2_MDP_WROT MTK_M4U_ID(SMI_L5_ID, 6) +#define M4U_PORT_L5_LARB5_FAKE_ENG MTK_M4U_ID(SMI_L5_ID, 7) + +/* LARB 6 -- VPP-1 */ +#define M4U_PORT_L6_SVPP3_MDP_RDMA MTK_M4U_ID(SMI_L6_ID, 0) +#define M4U_PORT_L6_SVPP3_MDP_FG MTK_M4U_ID(SMI_L6_ID, 1) +#define M4U_PORT_L6_SVPP3_MDP_WROT MTK_M4U_ID(SMI_L6_ID, 2) +#define M4U_PORT_L6_LARB6_FAKE_ENG MTK_M4U_ID(SMI_L6_ID, 3) + +/* LARB 7 -- WPE */ +#define M4U_PORT_L7_WPE_RDMA_0 MTK_M4U_ID(SMI_L7_ID, 0) +#define M4U_PORT_L7_WPE_RDMA_1 MTK_M4U_ID(SMI_L7_ID, 1) +#define M4U_PORT_L7_WPE_WDMA_0 MTK_M4U_ID(SMI_L7_ID, 2) + +/* LARB 9 -- IMG-M */ +#define M4U_PORT_L9_IMGI_T1_A MTK_M4U_ID(SMI_L9_ID, 0) +#define M4U_PORT_L9_UFDI_T1_A MTK_M4U_ID(SMI_L9_ID, 1) +#define M4U_PORT_L9_IMGBI_T1_A MTK_M4U_ID(SMI_L9_ID, 2) +#define M4U_PORT_L9_IMGCI_T1_A MTK_M4U_ID(SMI_L9_ID, 3) +#define M4U_PORT_L9_SMTI_T1_A MTK_M4U_ID(SMI_L9_ID, 4) +#define M4U_PORT_L9_SMTI_T4_A MTK_M4U_ID(SMI_L9_ID, 5) +#define M4U_PORT_L9_TNCSTI_T1_A MTK_M4U_ID(SMI_L9_ID, 6) +#define M4U_PORT_L9_TNCSTI_T4_A MTK_M4U_ID(SMI_L9_ID, 7) +#define M4U_PORT_L9_YUVO_T1_A MTK_M4U_ID(SMI_L9_ID, 8) +#define M4U_PORT_L9_YUVBO_T1_A MTK_M4U_ID(SMI_L9_ID, 9) +#define M4U_PORT_L9_YUVCO_T1_A MTK_M4U_ID(SMI_L9_ID, 10) +#define M4U_PORT_L9_TIMGO_T1_A MTK_M4U_ID(SMI_L9_ID, 11) +#define M4U_PORT_L9_YUVO_T2_A MTK_M4U_ID(SMI_L9_ID, 12) +#define M4U_PORT_L9_YUVO_T5_A MTK_M4U_ID(SMI_L9_ID, 13) +#define M4U_PORT_L9_IMGI_T1_B MTK_M4U_ID(SMI_L9_ID, 14) +#define M4U_PORT_L9_IMGBI_T1_B MTK_M4U_ID(SMI_L9_ID, 15) +#define M4U_PORT_L9_IMGCI_T1_B MTK_M4U_ID(SMI_L9_ID, 16) +#define M4U_PORT_L9_SMTI_T4_B MTK_M4U_ID(SMI_L9_ID, 17) +#define M4U_PORT_L9_TNCSO_T1_A MTK_M4U_ID(SMI_L9_ID, 18) +#define M4U_PORT_L9_SMTO_T1_A MTK_M4U_ID(SMI_L9_ID, 19) +#define M4U_PORT_L9_SMTO_T4_A MTK_M4U_ID(SMI_L9_ID, 20) +#define M4U_PORT_L9_TNCSTO_T1_A MTK_M4U_ID(SMI_L9_ID, 21) +#define M4U_PORT_L9_YUVO_T2_B MTK_M4U_ID(SMI_L9_ID, 22) +#define M4U_PORT_L9_YUVO_T5_B MTK_M4U_ID(SMI_L9_ID, 23) +#define M4U_PORT_L9_SMTO_T4_B MTK_M4U_ID(SMI_L9_ID, 24) + +/* LARB 10 -- IMG-D */ +#define M4U_PORT_L10_IMGI_D1 MTK_M4U_ID(SMI_L10_ID, 0) +#define M4U_PORT_L10_IMGBI_D1 MTK_M4U_ID(SMI_L10_ID, 1) +#define M4U_PORT_L10_IMGCI_D1 MTK_M4U_ID(SMI_L10_ID, 2) +#define M4U_PORT_L10_IMGDI_D1 MTK_M4U_ID(SMI_L10_ID, 3) +#define M4U_PORT_L10_DEPI_D1 MTK_M4U_ID(SMI_L10_ID, 4) +#define M4U_PORT_L10_DMGI_D1 MTK_M4U_ID(SMI_L10_ID, 5) +#define M4U_PORT_L10_SMTI_D1 MTK_M4U_ID(SMI_L10_ID, 6) +#define M4U_PORT_L10_RECI_D1 MTK_M4U_ID(SMI_L10_ID, 7) +#define M4U_PORT_L10_RECI_D1_N MTK_M4U_ID(SMI_L10_ID, 8) +#define M4U_PORT_L10_TNRWI_D1 MTK_M4U_ID(SMI_L10_ID, 9) +#define M4U_PORT_L10_TNRCI_D1 MTK_M4U_ID(SMI_L10_ID, 10) +#define M4U_PORT_L10_TNRCI_D1_N MTK_M4U_ID(SMI_L10_ID, 11) +#define M4U_PORT_L10_IMG4O_D1 MTK_M4U_ID(SMI_L10_ID, 12) +#define M4U_PORT_L10_IMG4BO_D1 MTK_M4U_ID(SMI_L10_ID, 13) +#define M4U_PORT_L10_SMTI_D8 MTK_M4U_ID(SMI_L10_ID, 14) +#define M4U_PORT_L10_SMTO_D1 MTK_M4U_ID(SMI_L10_ID, 15) +#define M4U_PORT_L10_TNRMO_D1 MTK_M4U_ID(SMI_L10_ID, 16) +#define M4U_PORT_L10_TNRMO_D1_N MTK_M4U_ID(SMI_L10_ID, 17) +#define M4U_PORT_L10_SMTO_D8 MTK_M4U_ID(SMI_L10_ID, 18) +#define M4U_PORT_L10_DBGO_D1 MTK_M4U_ID(SMI_L10_ID, 19) + +/* LARB 11A -- IMG-D */ +#define M4U_PORT_L11A_WPE_RDMA_0 MTK_M4U_ID(SMI_L11A_ID, 0) +#define M4U_PORT_L11A_WPE_RDMA_1 MTK_M4U_ID(SMI_L11A_ID, 1) +#define M4U_PORT_L11A_WPE_RDMA_4P_0 MTK_M4U_ID(SMI_L11A_ID, 2) +#define M4U_PORT_L11A_WPE_RDMA_4P_1 MTK_M4U_ID(SMI_L11A_ID, 3) +#define M4U_PORT_L11A_WPE_CQ0 MTK_M4U_ID(SMI_L11A_ID, 4) +#define M4U_PORT_L11A_WPE_CQ1 MTK_M4U_ID(SMI_L11A_ID, 5) +#define M4U_PORT_L11A_PIMGI_P1 MTK_M4U_ID(SMI_L11A_ID, 6) +#define M4U_PORT_L11A_PIMGBI_P1 MTK_M4U_ID(SMI_L11A_ID, 7) +#define M4U_PORT_L11A_PIMGCI_P1 MTK_M4U_ID(SMI_L11A_ID, 8) +#define M4U_PORT_L11A_IMGI_T1_C MTK_M4U_ID(SMI_L11A_ID, 9) +#define M4U_PORT_L11A_IMGBI_T1_C MTK_M4U_ID(SMI_L11A_ID, 10) +#define M4U_PORT_L11A_IMGCI_T1_C MTK_M4U_ID(SMI_L11A_ID, 11) +#define M4U_PORT_L11A_SMTI_T1_C MTK_M4U_ID(SMI_L11A_ID, 12) +#define M4U_PORT_L11A_SMTI_T4_C MTK_M4U_ID(SMI_L11A_ID, 13) +#define M4U_PORT_L11A_SMTI_T6_C MTK_M4U_ID(SMI_L11A_ID, 14) +#define M4U_PORT_L11A_YUVO_T1_C MTK_M4U_ID(SMI_L11A_ID, 15) +#define M4U_PORT_L11A_YUVBO_T1_C MTK_M4U_ID(SMI_L11A_ID, 16) +#define M4U_PORT_L11A_YUVCO_T1_C MTK_M4U_ID(SMI_L11A_ID, 17) +#define M4U_PORT_L11A_WPE_WDMA_0 MTK_M4U_ID(SMI_L11A_ID, 18) +#define M4U_PORT_L11A_WPE_WDMA_4P_0 MTK_M4U_ID(SMI_L11A_ID, 19) +#define M4U_PORT_L11A_WROT_P1 MTK_M4U_ID(SMI_L11A_ID, 20) +#define M4U_PORT_L11A_TCCSO_P1 MTK_M4U_ID(SMI_L11A_ID, 21) +#define M4U_PORT_L11A_TCCSI_P1 MTK_M4U_ID(SMI_L11A_ID, 22) +#define M4U_PORT_L11A_TIMGO_T1_C MTK_M4U_ID(SMI_L11A_ID, 23) +#define M4U_PORT_L11A_YUVO_T2_C MTK_M4U_ID(SMI_L11A_ID, 24) +#define M4U_PORT_L11A_YUVO_T5_C MTK_M4U_ID(SMI_L11A_ID, 25) +#define M4U_PORT_L11A_SMTO_T1_C MTK_M4U_ID(SMI_L11A_ID, 26) +#define M4U_PORT_L11A_SMTO_T4_C MTK_M4U_ID(SMI_L11A_ID, 27) +#define M4U_PORT_L11A_SMTO_T6_C MTK_M4U_ID(SMI_L11A_ID, 28) +#define M4U_PORT_L11A_DBGO_T1_C MTK_M4U_ID(SMI_L11A_ID, 29) + +/* LARB 11B -- IMG-D */ +#define M4U_PORT_L11B_WPE_RDMA_0 MTK_M4U_ID(SMI_L11B_ID, 0) +#define M4U_PORT_L11B_WPE_RDMA_1 MTK_M4U_ID(SMI_L11B_ID, 1) +#define M4U_PORT_L11B_WPE_RDMA_4P_0 MTK_M4U_ID(SMI_L11B_ID, 2) +#define M4U_PORT_L11B_WPE_RDMA_4P_1 MTK_M4U_ID(SMI_L11B_ID, 3) +#define M4U_PORT_L11B_WPE_CQ0 MTK_M4U_ID(SMI_L11B_ID, 4) +#define M4U_PORT_L11B_WPE_CQ1 MTK_M4U_ID(SMI_L11B_ID, 5) +#define M4U_PORT_L11B_PIMGI_P1 MTK_M4U_ID(SMI_L11B_ID, 6) +#define M4U_PORT_L11B_PIMGBI_P1 MTK_M4U_ID(SMI_L11B_ID, 7) +#define M4U_PORT_L11B_PIMGCI_P1 MTK_M4U_ID(SMI_L11B_ID, 8) +#define M4U_PORT_L11B_IMGI_T1_C MTK_M4U_ID(SMI_L11B_ID, 9) +#define M4U_PORT_L11B_IMGBI_T1_C MTK_M4U_ID(SMI_L11B_ID, 10) +#define M4U_PORT_L11B_IMGCI_T1_C MTK_M4U_ID(SMI_L11B_ID, 11) +#define M4U_PORT_L11B_SMTI_T1_C MTK_M4U_ID(SMI_L11B_ID, 12) +#define M4U_PORT_L11B_SMTI_T4_C MTK_M4U_ID(SMI_L11B_ID, 13) +#define M4U_PORT_L11B_SMTI_T6_C MTK_M4U_ID(SMI_L11B_ID, 14) +#define M4U_PORT_L11B_YUVO_T1_C MTK_M4U_ID(SMI_L11B_ID, 15) +#define M4U_PORT_L11B_YUVBO_T1_C MTK_M4U_ID(SMI_L11B_ID, 16) +#define M4U_PORT_L11B_YUVCO_T1_C MTK_M4U_ID(SMI_L11B_ID, 17) +#define M4U_PORT_L11B_WPE_WDMA_0 MTK_M4U_ID(SMI_L11B_ID, 18) +#define M4U_PORT_L11B_WPE_WDMA_4P_0 MTK_M4U_ID(SMI_L11B_ID, 19) +#define M4U_PORT_L11B_WROT_P1 MTK_M4U_ID(SMI_L11B_ID, 20) +#define M4U_PORT_L11B_TCCSO_P1 MTK_M4U_ID(SMI_L11B_ID, 21) +#define M4U_PORT_L11B_TCCSI_P1 MTK_M4U_ID(SMI_L11B_ID, 22) +#define M4U_PORT_L11B_TIMGO_T1_C MTK_M4U_ID(SMI_L11B_ID, 23) +#define M4U_PORT_L11B_YUVO_T2_C MTK_M4U_ID(SMI_L11B_ID, 24) +#define M4U_PORT_L11B_YUVO_T5_C MTK_M4U_ID(SMI_L11B_ID, 25) +#define M4U_PORT_L11B_SMTO_T1_C MTK_M4U_ID(SMI_L11B_ID, 26) +#define M4U_PORT_L11B_SMTO_T4_C MTK_M4U_ID(SMI_L11B_ID, 27) +#define M4U_PORT_L11B_SMTO_T6_C MTK_M4U_ID(SMI_L11B_ID, 28) +#define M4U_PORT_L11B_DBGO_T1_C MTK_M4U_ID(SMI_L11B_ID, 29) + +/* LARB 11C -- IMG-D */ +#define M4U_PORT_L11C_WPE_RDMA_0 MTK_M4U_ID(SMI_L11C_ID, 0) +#define M4U_PORT_L11C_WPE_RDMA_1 MTK_M4U_ID(SMI_L11C_ID, 1) +#define M4U_PORT_L11C_WPE_RDMA_4P_0 MTK_M4U_ID(SMI_L11C_ID, 2) +#define M4U_PORT_L11C_WPE_RDMA_4P_1 MTK_M4U_ID(SMI_L11C_ID, 3) +#define M4U_PORT_L11C_WPE_CQ0 MTK_M4U_ID(SMI_L11C_ID, 4) +#define M4U_PORT_L11C_WPE_CQ1 MTK_M4U_ID(SMI_L11C_ID, 5) +#define M4U_PORT_L11C_PIMGI_P1 MTK_M4U_ID(SMI_L11C_ID, 6) +#define M4U_PORT_L11C_PIMGBI_P1 MTK_M4U_ID(SMI_L11C_ID, 7) +#define M4U_PORT_L11C_PIMGCI_P1 MTK_M4U_ID(SMI_L11C_ID, 8) +#define M4U_PORT_L11C_IMGI_T1_C MTK_M4U_ID(SMI_L11C_ID, 9) +#define M4U_PORT_L11C_IMGBI_T1_C MTK_M4U_ID(SMI_L11C_ID, 10) +#define M4U_PORT_L11C_IMGCI_T1_C MTK_M4U_ID(SMI_L11C_ID, 11) +#define M4U_PORT_L11C_SMTI_T1_C MTK_M4U_ID(SMI_L11C_ID, 12) +#define M4U_PORT_L11C_SMTI_T4_C MTK_M4U_ID(SMI_L11C_ID, 13) +#define M4U_PORT_L11C_SMTI_T6_C MTK_M4U_ID(SMI_L11C_ID, 14) +#define M4U_PORT_L11C_YUVO_T1_C MTK_M4U_ID(SMI_L11C_ID, 15) +#define M4U_PORT_L11C_YUVBO_T1_C MTK_M4U_ID(SMI_L11C_ID, 16) +#define M4U_PORT_L11C_YUVCO_T1_C MTK_M4U_ID(SMI_L11C_ID, 17) +#define M4U_PORT_L11C_WPE_WDMA_0 MTK_M4U_ID(SMI_L11C_ID, 18) +#define M4U_PORT_L11C_WPE_WDMA_4P_0 MTK_M4U_ID(SMI_L11C_ID, 19) +#define M4U_PORT_L11C_WROT_P1 MTK_M4U_ID(SMI_L11C_ID, 20) +#define M4U_PORT_L11C_TCCSO_P1 MTK_M4U_ID(SMI_L11C_ID, 21) +#define M4U_PORT_L11C_TCCSI_P1 MTK_M4U_ID(SMI_L11C_ID, 22) +#define M4U_PORT_L11C_TIMGO_T1_C MTK_M4U_ID(SMI_L11C_ID, 23) +#define M4U_PORT_L11C_YUVO_T2_C MTK_M4U_ID(SMI_L11C_ID, 24) +#define M4U_PORT_L11C_YUVO_T5_C MTK_M4U_ID(SMI_L11C_ID, 25) +#define M4U_PORT_L11C_SMTO_T1_C MTK_M4U_ID(SMI_L11C_ID, 26) +#define M4U_PORT_L11C_SMTO_T4_C MTK_M4U_ID(SMI_L11C_ID, 27) +#define M4U_PORT_L11C_SMTO_T6_C MTK_M4U_ID(SMI_L11C_ID, 28) +#define M4U_PORT_L11C_DBGO_T1_C MTK_M4U_ID(SMI_L11C_ID, 29) + +/* LARB 12 -- IPE */ +#define M4U_PORT_L12_FDVT_RDA_0 MTK_M4U_ID(SMI_L12_ID, 0) +#define M4U_PORT_L12_FDVT_RDB_0 MTK_M4U_ID(SMI_L12_ID, 1) +#define M4U_PORT_L12_FDVT_WRA_0 MTK_M4U_ID(SMI_L12_ID, 2) +#define M4U_PORT_L12_FDVT_WRB_0 MTK_M4U_ID(SMI_L12_ID, 3) +#define M4U_PORT_L12_ME_RDMA MTK_M4U_ID(SMI_L12_ID, 4) +#define M4U_PORT_L12_ME_WDMA MTK_M4U_ID(SMI_L12_ID, 5) +#define M4U_PORT_L12_DVS_RDMA MTK_M4U_ID(SMI_L12_ID, 6) +#define M4U_PORT_L12_DVS_WDMA MTK_M4U_ID(SMI_L12_ID, 7) +#define M4U_PORT_L12_DVP_RDMA MTK_M4U_ID(SMI_L12_ID, 8) +#define M4U_PORT_L12_DVP_WDMA MTK_M4U_ID(SMI_L12_ID, 9) +#define M4U_PORT_L12_FDVT_2ND_RDA_0 MTK_M4U_ID(SMI_L12_ID, 10) +#define M4U_PORT_L12_FDVT_2ND_RDB_0 MTK_M4U_ID(SMI_L12_ID, 11) +#define M4U_PORT_L12_FDVT_2ND_WRA_0 MTK_M4U_ID(SMI_L12_ID, 12) +#define M4U_PORT_L12_FDVT_2ND_WRB_0 MTK_M4U_ID(SMI_L12_ID, 13) +#define M4U_PORT_L12_DHZEI_E1 MTK_M4U_ID(SMI_L12_ID, 14) +#define M4U_PORT_L12_DHZEO_E1 MTK_M4U_ID(SMI_L12_ID, 15) + +/* LARB 13 -- CAM-1 */ +#define M4U_PORT_L13_CAMSV_CQI_E1 MTK_M4U_ID(SMI_L13_ID, 0) +#define M4U_PORT_L13_CAMSV_CQI_E2 MTK_M4U_ID(SMI_L13_ID, 1) +#define M4U_PORT_L13_GCAMSV_A_IMGO_1 MTK_M4U_ID(SMI_L13_ID, 2) +#define M4U_PORT_L13_GCAMSV_C_IMGO_1 MTK_M4U_ID(SMI_L13_ID, 3) +#define M4U_PORT_L13_GCAMSV_A_IMGO_2 MTK_M4U_ID(SMI_L13_ID, 4) +#define M4U_PORT_L13_GCAMSV_C_IMGO_2 MTK_M4U_ID(SMI_L13_ID, 5) +#define M4U_PORT_L13_PDAI_A_0 MTK_M4U_ID(SMI_L13_ID, 6) +#define M4U_PORT_L13_PDAI_A_1 MTK_M4U_ID(SMI_L13_ID, 7) +#define M4U_PORT_L13_CAMSV_CQI_B_E1 MTK_M4U_ID(SMI_L13_ID, 8) +#define M4U_PORT_L13_CAMSV_CQI_B_E2 MTK_M4U_ID(SMI_L13_ID, 9) +#define M4U_PORT_L13_CAMSV_CQI_C_E1 MTK_M4U_ID(SMI_L13_ID, 10) +#define M4U_PORT_L13_CAMSV_CQI_C_E2 MTK_M4U_ID(SMI_L13_ID, 11) +#define M4U_PORT_L13_GCAMSV_E_IMGO_1 MTK_M4U_ID(SMI_L13_ID, 12) +#define M4U_PORT_L13_GCAMSV_E_IMGO_2 MTK_M4U_ID(SMI_L13_ID, 13) +#define M4U_PORT_L13_GCAMSV_A_UFEO_1 MTK_M4U_ID(SMI_L13_ID, 14) +#define M4U_PORT_L13_GCAMSV_C_UFEO_1 MTK_M4U_ID(SMI_L13_ID, 15) +#define M4U_PORT_L13_GCAMSV_A_UFEO_2 MTK_M4U_ID(SMI_L13_ID, 16) +#define M4U_PORT_L13_GCAMSV_C_UFEO_2 MTK_M4U_ID(SMI_L13_ID, 17) +#define M4U_PORT_L13_GCAMSV_E_UFEO_1 MTK_M4U_ID(SMI_L13_ID, 18) +#define M4U_PORT_L13_GCAMSV_E_UFEO_2 MTK_M4U_ID(SMI_L13_ID, 19) +#define M4U_PORT_L13_GCAMSV_G_IMGO_1 MTK_M4U_ID(SMI_L13_ID, 20) +#define M4U_PORT_L13_GCAMSV_G_IMGO_2 MTK_M4U_ID(SMI_L13_ID, 21) +#define M4U_PORT_L13_PDAO_A MTK_M4U_ID(SMI_L13_ID, 22) +#define M4U_PORT_L13_PDAO_C MTK_M4U_ID(SMI_L13_ID, 23) + +/* LARB 14 -- CAM-1 */ +#define M4U_PORT_L14_GCAMSV_B_IMGO_1 MTK_M4U_ID(SMI_L14_ID, 0) +#define M4U_PORT_L14_GCAMSV_B_IMGO_2 MTK_M4U_ID(SMI_L14_ID, 1) +#define M4U_PORT_L14_SCAMSV_A_IMGO_1 MTK_M4U_ID(SMI_L14_ID, 2) +#define M4U_PORT_L14_SCAMSV_A_IMGO_2 MTK_M4U_ID(SMI_L14_ID, 3) +#define M4U_PORT_L14_SCAMSV_B_IMGO_1 MTK_M4U_ID(SMI_L14_ID, 4) +#define M4U_PORT_L14_SCAMSV_B_IMGO_2 MTK_M4U_ID(SMI_L14_ID, 5) +#define M4U_PORT_L14_PDAI_B_0 MTK_M4U_ID(SMI_L14_ID, 6) +#define M4U_PORT_L14_PDAI_B_1 MTK_M4U_ID(SMI_L14_ID, 7) +#define M4U_PORT_L14_GCAMSV_D_IMGO_1 MTK_M4U_ID(SMI_L14_ID, 8) +#define M4U_PORT_L14_GCAMSV_D_IMGO_2 MTK_M4U_ID(SMI_L14_ID, 9) +#define M4U_PORT_L14_GCAMSV_F_IMGO_1 MTK_M4U_ID(SMI_L14_ID, 10) +#define M4U_PORT_L14_GCAMSV_F_IMGO_2 MTK_M4U_ID(SMI_L14_ID, 11) +#define M4U_PORT_L14_GCAMSV_H_IMGO_1 MTK_M4U_ID(SMI_L14_ID, 12) +#define M4U_PORT_L14_GCAMSV_H_IMGO_2 MTK_M4U_ID(SMI_L14_ID, 13) +#define M4U_PORT_L14_GCAMSV_B_UFEO_1 MTK_M4U_ID(SMI_L14_ID, 14) +#define M4U_PORT_L14_GCAMSV_B_UFEO_2 MTK_M4U_ID(SMI_L14_ID, 15) +#define M4U_PORT_L14_GCAMSV_D_UFEO_1 MTK_M4U_ID(SMI_L14_ID, 16) +#define M4U_PORT_L14_GCAMSV_D_UFEO_2 MTK_M4U_ID(SMI_L14_ID, 17) +#define M4U_PORT_L14_PDAO_B MTK_M4U_ID(SMI_L14_ID, 18) +#define M4U_PORT_L14_IPUI MTK_M4U_ID(SMI_L14_ID, 19) +#define M4U_PORT_L14_IPUO MTK_M4U_ID(SMI_L14_ID, 20) +#define M4U_PORT_L14_IPU3O MTK_M4U_ID(SMI_L14_ID, 21) +#define M4U_PORT_L14_FAKE MTK_M4U_ID(SMI_L14_ID, 22) + +/* LARB 15 -- IMG-D */ +#define M4U_PORT_L15_VIPI_D1 MTK_M4U_ID(SMI_L15_ID, 0) +#define M4U_PORT_L15_VIPBI_D1 MTK_M4U_ID(SMI_L15_ID, 1) +#define M4U_PORT_L15_SMTI_D6 MTK_M4U_ID(SMI_L15_ID, 2) +#define M4U_PORT_L15_TNCSTI_D1 MTK_M4U_ID(SMI_L15_ID, 3) +#define M4U_PORT_L15_TNCSTI_D4 MTK_M4U_ID(SMI_L15_ID, 4) +#define M4U_PORT_L15_SMTI_D4 MTK_M4U_ID(SMI_L15_ID, 5) +#define M4U_PORT_L15_IMG3O_D1 MTK_M4U_ID(SMI_L15_ID, 6) +#define M4U_PORT_L15_IMG3BO_D1 MTK_M4U_ID(SMI_L15_ID, 7) +#define M4U_PORT_L15_IMG3CO_D1 MTK_M4U_ID(SMI_L15_ID, 8) +#define M4U_PORT_L15_IMG2O_D1 MTK_M4U_ID(SMI_L15_ID, 9) +#define M4U_PORT_L15_SMTI_D9 MTK_M4U_ID(SMI_L15_ID, 10) +#define M4U_PORT_L15_SMTO_D4 MTK_M4U_ID(SMI_L15_ID, 11) +#define M4U_PORT_L15_FEO_D1 MTK_M4U_ID(SMI_L15_ID, 12) +#define M4U_PORT_L15_TNCSO_D1 MTK_M4U_ID(SMI_L15_ID, 13) +#define M4U_PORT_L15_TNCSTO_D1 MTK_M4U_ID(SMI_L15_ID, 14) +#define M4U_PORT_L15_SMTO_D6 MTK_M4U_ID(SMI_L15_ID, 15) +#define M4U_PORT_L15_SMTO_D9 MTK_M4U_ID(SMI_L15_ID, 16) +#define M4U_PORT_L15_TNCO_D1 MTK_M4U_ID(SMI_L15_ID, 17) +#define M4U_PORT_L15_TNCO_D1_N MTK_M4U_ID(SMI_L15_ID, 18) + +/* LARB 16A -- CAM */ +#define M4U_PORT_L16A_IMGO_R1 MTK_M4U_ID(SMI_L16A_ID, 0) +#define M4U_PORT_L16A_CQI_R1 MTK_M4U_ID(SMI_L16A_ID, 1) +#define M4U_PORT_L16A_CQI_R2 MTK_M4U_ID(SMI_L16A_ID, 2) +#define M4U_PORT_L16A_BPCI_R1 MTK_M4U_ID(SMI_L16A_ID, 3) +#define M4U_PORT_L16A_LSCI_R1 MTK_M4U_ID(SMI_L16A_ID, 4) +#define M4U_PORT_L16A_RAWI_R2 MTK_M4U_ID(SMI_L16A_ID, 5) +#define M4U_PORT_L16A_RAWI_R3 MTK_M4U_ID(SMI_L16A_ID, 6) +#define M4U_PORT_L16A_UFDI_R2 MTK_M4U_ID(SMI_L16A_ID, 7) +#define M4U_PORT_L16A_UFDI_R3 MTK_M4U_ID(SMI_L16A_ID, 8) +#define M4U_PORT_L16A_RAWI_R4 MTK_M4U_ID(SMI_L16A_ID, 9) +#define M4U_PORT_L16A_RAWI_R5 MTK_M4U_ID(SMI_L16A_ID, 10) +#define M4U_PORT_L16A_AAI_R1 MTK_M4U_ID(SMI_L16A_ID, 11) +#define M4U_PORT_L16A_UFDI_R5 MTK_M4U_ID(SMI_L16A_ID, 12) +#define M4U_PORT_L16A_FHO_R1 MTK_M4U_ID(SMI_L16A_ID, 13) +#define M4U_PORT_L16A_AAO_R1 MTK_M4U_ID(SMI_L16A_ID, 14) +#define M4U_PORT_L16A_TSFSO_R1 MTK_M4U_ID(SMI_L16A_ID, 15) +#define M4U_PORT_L16A_FLKO_R1 MTK_M4U_ID(SMI_L16A_ID, 16) + +/* LARB 16B -- CAM */ +#define M4U_PORT_L16B_IMGO_R1 MTK_M4U_ID(SMI_L16B_ID, 0) +#define M4U_PORT_L16B_CQI_R1 MTK_M4U_ID(SMI_L16B_ID, 1) +#define M4U_PORT_L16B_CQI_R2 MTK_M4U_ID(SMI_L16B_ID, 2) +#define M4U_PORT_L16B_BPCI_R1 MTK_M4U_ID(SMI_L16B_ID, 3) +#define M4U_PORT_L16B_LSCI_R1 MTK_M4U_ID(SMI_L16B_ID, 4) +#define M4U_PORT_L16B_RAWI_R2 MTK_M4U_ID(SMI_L16B_ID, 5) +#define M4U_PORT_L16B_RAWI_R3 MTK_M4U_ID(SMI_L16B_ID, 6) +#define M4U_PORT_L16B_UFDI_R2 MTK_M4U_ID(SMI_L16B_ID, 7) +#define M4U_PORT_L16B_UFDI_R3 MTK_M4U_ID(SMI_L16B_ID, 8) +#define M4U_PORT_L16B_RAWI_R4 MTK_M4U_ID(SMI_L16B_ID, 9) +#define M4U_PORT_L16B_RAWI_R5 MTK_M4U_ID(SMI_L16B_ID, 10) +#define M4U_PORT_L16B_AAI_R1 MTK_M4U_ID(SMI_L16B_ID, 11) +#define M4U_PORT_L16B_UFDI_R5 MTK_M4U_ID(SMI_L16B_ID, 12) +#define M4U_PORT_L16B_FHO_R1 MTK_M4U_ID(SMI_L16B_ID, 13) +#define M4U_PORT_L16B_AAO_R1 MTK_M4U_ID(SMI_L16B_ID, 14) +#define M4U_PORT_L16B_TSFSO_R1 MTK_M4U_ID(SMI_L16B_ID, 15) +#define M4U_PORT_L16B_FLKO_R1 MTK_M4U_ID(SMI_L16B_ID, 16) + +/* LARB 17A -- CAM */ +#define M4U_PORT_L17A_YUVO_R1 MTK_M4U_ID(SMI_L17A_ID, 0) +#define M4U_PORT_L17A_YUVO_R3 MTK_M4U_ID(SMI_L17A_ID, 1) +#define M4U_PORT_L17A_YUVCO_R1 MTK_M4U_ID(SMI_L17A_ID, 2) +#define M4U_PORT_L17A_YUVO_R2 MTK_M4U_ID(SMI_L17A_ID, 3) +#define M4U_PORT_L17A_RZH1N2TO_R1 MTK_M4U_ID(SMI_L17A_ID, 4) +#define M4U_PORT_L17A_DRZS4NO_R1 MTK_M4U_ID(SMI_L17A_ID, 5) +#define M4U_PORT_L17A_TNCSO_R1 MTK_M4U_ID(SMI_L17A_ID, 6) + +/* LARB 17B -- CAM */ +#define M4U_PORT_L17B_YUVO_R1 MTK_M4U_ID(SMI_L17B_ID, 0) +#define M4U_PORT_L17B_YUVO_R3 MTK_M4U_ID(SMI_L17B_ID, 1) +#define M4U_PORT_L17B_YUVCO_R1 MTK_M4U_ID(SMI_L17B_ID, 2) +#define M4U_PORT_L17B_YUVO_R2 MTK_M4U_ID(SMI_L17B_ID, 3) +#define M4U_PORT_L17B_RZH1N2TO_R1 MTK_M4U_ID(SMI_L17B_ID, 4) +#define M4U_PORT_L17B_DRZS4NO_R1 MTK_M4U_ID(SMI_L17B_ID, 5) +#define M4U_PORT_L17B_TNCSO_R1 MTK_M4U_ID(SMI_L17B_ID, 6) + +/* LARB 19 -- VENC */ +#define M4U_PORT_L19_VENC_RCPU MTK_M4U_ID(SMI_L19_ID, 0) +#define M4U_PORT_L19_VENC_REC MTK_M4U_ID(SMI_L19_ID, 1) +#define M4U_PORT_L19_VENC_BSDMA MTK_M4U_ID(SMI_L19_ID, 2) +#define M4U_PORT_L19_VENC_SV_COMV MTK_M4U_ID(SMI_L19_ID, 3) +#define M4U_PORT_L19_VENC_RD_COMV MTK_M4U_ID(SMI_L19_ID, 4) +#define M4U_PORT_L19_VENC_NBM_RDMA MTK_M4U_ID(SMI_L19_ID, 5) +#define M4U_PORT_L19_VENC_NBM_RDMA_LITE MTK_M4U_ID(SMI_L19_ID, 6) +#define M4U_PORT_L19_JPGENC_Y_RDMA MTK_M4U_ID(SMI_L19_ID, 7) +#define M4U_PORT_L19_JPGENC_C_RDMA MTK_M4U_ID(SMI_L19_ID, 8) +#define M4U_PORT_L19_JPGENC_Q_TABLE MTK_M4U_ID(SMI_L19_ID, 9) +#define M4U_PORT_L19_VENC_SUB_W_LUMA MTK_M4U_ID(SMI_L19_ID, 10) +#define M4U_PORT_L19_VENC_FCS_NBM_RDMA MTK_M4U_ID(SMI_L19_ID, 11) +#define M4U_PORT_L19_JPGENC_BSDMA MTK_M4U_ID(SMI_L19_ID, 12) +#define M4U_PORT_L19_JPGDEC_WDMA_0 MTK_M4U_ID(SMI_L19_ID, 13) +#define M4U_PORT_L19_JPGDEC_BSDMA_0 MTK_M4U_ID(SMI_L19_ID, 14) +#define M4U_PORT_L19_VENC_NBM_WDMA MTK_M4U_ID(SMI_L19_ID, 15) +#define M4U_PORT_L19_VENC_NBM_WDMA_LITE MTK_M4U_ID(SMI_L19_ID, 16) +#define M4U_PORT_L19_VENC_FCS_NBM_WDMA MTK_M4U_ID(SMI_L19_ID, 17) +#define M4U_PORT_L19_JPGDEC_WDMA_1 MTK_M4U_ID(SMI_L19_ID, 18) +#define M4U_PORT_L19_JPGDEC_BSDMA_1 MTK_M4U_ID(SMI_L19_ID, 19) +#define M4U_PORT_L19_JPGDEC_HUFF_OFFSET_1 MTK_M4U_ID(SMI_L19_ID, 20) +#define M4U_PORT_L19_JPGDEC_HUFF_OFFSET_0 MTK_M4U_ID(SMI_L19_ID, 21) +#define M4U_PORT_L19_VENC_CUR_LUMA MTK_M4U_ID(SMI_L19_ID, 22) +#define M4U_PORT_L19_VENC_CUR_CHROMA MTK_M4U_ID(SMI_L19_ID, 23) +#define M4U_PORT_L19_VENC_REF_LUMA MTK_M4U_ID(SMI_L19_ID, 24) +#define M4U_PORT_L19_VENC_REF_CHROMA MTK_M4U_ID(SMI_L19_ID, 25) +#define M4U_PORT_L19_VENC_SUB_R_LUMA MTK_M4U_ID(SMI_L19_ID, 26) + +/* LARB 21 -- VDEC-CORE0 */ +#define M4U_PORT_L21_HW_VDEC_MC_EXT MTK_M4U_ID(SMI_L21_ID, 0) +#define M4U_PORT_L21_HW_VDEC_UFO_EXT MTK_M4U_ID(SMI_L21_ID, 1) +#define M4U_PORT_L21_HW_VDEC_PP_EXT MTK_M4U_ID(SMI_L21_ID, 2) +#define M4U_PORT_L21_HW_VDEC_PRED_RD_EXT MTK_M4U_ID(SMI_L21_ID, 3) +#define M4U_PORT_L21_HW_VDEC_PRED_WR_EXT MTK_M4U_ID(SMI_L21_ID, 4) +#define M4U_PORT_L21_HW_VDEC_PPWRAP_EXT MTK_M4U_ID(SMI_L21_ID, 5) +#define M4U_PORT_L21_HW_VDEC_TILE_EXT MTK_M4U_ID(SMI_L21_ID, 6) +#define M4U_PORT_L21_HW_VDEC_VLD_EXT MTK_M4U_ID(SMI_L21_ID, 7) +#define M4U_PORT_L21_HW_VDEC_VLD2_EXT MTK_M4U_ID(SMI_L21_ID, 8) +#define M4U_PORT_L21_HW_VDEC_AVC_MV_EXT MTK_M4U_ID(SMI_L21_ID, 9) +#define M4U_PORT_L21_HW_VDEC_UFO_EXT_C MTK_M4U_ID(SMI_L21_ID, 10) + +/* LARB 23 -- VDEC-SOC */ +#define M4U_PORT_L23_HW_VDEC_LAT0_VLD_EXT MTK_M4U_ID(SMI_L23_ID, 0) +#define M4U_PORT_L23_HW_VDEC_LAT0_VLD2_EXT MTK_M4U_ID(SMI_L23_ID, 1) +#define M4U_PORT_L23_HW_VDEC_LAT0_AVC_MV_EXT MTK_M4U_ID(SMI_L23_ID, 2) +#define M4U_PORT_L23_HW_VDEC_LAT0_PRED_RD_EXT MTK_M4U_ID(SMI_L23_ID, 3) +#define M4U_PORT_L23_HW_VDEC_LAT0_TILE_EXT MTK_M4U_ID(SMI_L23_ID, 4) +#define M4U_PORT_L23_HW_VDEC_LAT0_WDMA_EXT MTK_M4U_ID(SMI_L23_ID, 5) +#define M4U_PORT_L23_HW_VDEC_UFO_ENC_EXT MTK_M4U_ID(SMI_L23_ID, 6) +#define M4U_PORT_L23_HW_VDEC_UFO_ENC_EXT_C MTK_M4U_ID(SMI_L23_ID, 7) +#define M4U_PORT_L23_HW_VDEC_MC_EXT_C MTK_M4U_ID(SMI_L23_ID, 8) + +/* LARB 27 -- CCU */ +#define M4U_PORT_L27_CCUI MTK_M4U_ID(SMI_L27_ID, 0) +#define M4U_PORT_L27_CCUO MTK_M4U_ID(SMI_L27_ID, 1) +#define M4U_PORT_L27_CCUI2 MTK_M4U_ID(SMI_L27_ID, 2) +#define M4U_PORT_L27_CCUO2 MTK_M4U_ID(SMI_L27_ID, 3) + +/* LARB 28 -- AXI-CCU */ +#define M4U_PORT_L28_CCU_AXI_0 MTK_M4U_ID(SMI_L28_ID, 0) + +/* infra/peri */ +#define IFR_IOMMU_PORT_PCIE_0 MTK_IFAIOMMU_PERI_ID(0) + +#endif diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 953e6f12fa1c..99a5201d9e62 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -32,7 +32,6 @@ struct task_struct; struct pci_dev; extern int amd_iommu_detect(void); -extern int amd_iommu_init_hardware(void); /** * amd_iommu_init_device() - Init device for use with IOMMUv2 driver diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 27dbd4c64860..e34b601b71fd 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -106,8 +106,6 @@ static inline bool dmar_rcu_check(void) extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); extern void dmar_register_bus_notifier(void); -extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct dmar_dev_scope **devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); extern void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt); extern int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, diff --git a/include/linux/iommu.h b/include/linux/iommu.h index bd6a1110b294..c50a769d569a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -196,6 +196,8 @@ enum iommu_dev_features { IOMMU_DEV_FEAT_IOPF, }; +#define IOMMU_NO_PASID (0U) /* Reserved for DMA w/o PASID */ +#define IOMMU_FIRST_GLOBAL_PASID (1U) /*starting range for allocation */ #define IOMMU_PASID_INVALID (-1U) typedef unsigned int ioasid_t; @@ -414,6 +416,8 @@ struct iommu_fault_param { * @priv: IOMMU Driver private data * @max_pasids: number of PASIDs this device can consume * @attach_deferred: the dma domain attachment is deferred + * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs + * @require_direct: device requires IOMMU_RESV_DIRECT regions * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -427,6 +431,8 @@ struct dev_iommu { void *priv; u32 max_pasids; u32 attach_deferred:1; + u32 pci_32bit_workaround:1; + u32 require_direct:1; }; int iommu_device_register(struct iommu_device *iommu, @@ -721,6 +727,8 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct iommu_domain * iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, unsigned int type); +ioasid_t iommu_alloc_global_pasid(struct device *dev); +void iommu_free_global_pasid(ioasid_t pasid); #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; @@ -1082,6 +1090,13 @@ iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid, { return NULL; } + +static inline ioasid_t iommu_alloc_global_pasid(struct device *dev) +{ + return IOMMU_PASID_INVALID; +} + +static inline void iommu_free_global_pasid(ioasid_t pasid) {} #endif /* CONFIG_IOMMU_API */ /** diff --git a/include/linux/sched.h b/include/linux/sched.h index 177b3f3676ef..77f01ac385f7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1671,7 +1671,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); - if (tsk_state == TASK_IDLE) + if ((tsk_state & TASK_IDLE) == TASK_IDLE) state = TASK_REPORT_IDLE; /* @@ -1679,7 +1679,7 @@ static inline unsigned int __task_state_index(unsigned int tsk_state, * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. */ - if (tsk_state == TASK_RTLOCK_WAIT) + if (tsk_state & TASK_RTLOCK_WAIT) state = TASK_UNINTERRUPTIBLE; return fls(state); @@ -1858,7 +1858,17 @@ extern int task_can_attach(struct task_struct *p); extern int dl_bw_alloc(int cpu, u64 dl_bw); extern void dl_bw_free(int cpu, u64 dl_bw); #ifdef CONFIG_SMP + +/* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); + +/** + * set_cpus_allowed_ptr - set CPU affinity mask of a task + * @p: the task + * @new_mask: CPU affinity mask + * + * Return: zero if successful, or a negative error code + */ extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 1e7774ac808f..533ab92684d8 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4440,8 +4440,6 @@ struct net_device *ib_get_net_dev_by_params(struct ib_device *dev, u32 port, const struct sockaddr *addr); int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, unsigned int port); -struct net_device *ib_device_netdev(struct ib_device *dev, u32 port); - struct ib_wq *ib_create_wq(struct ib_pd *pd, struct ib_wq_init_attr *init_attr); int ib_destroy_wq_user(struct ib_wq *wq, struct ib_udata *udata); diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h index 03abd30e6c8c..2b22f153ef63 100644 --- a/include/rdma/iw_cm.h +++ b/include/rdma/iw_cm.h @@ -115,27 +115,6 @@ struct iw_cm_id *iw_create_cm_id(struct ib_device *device, void iw_destroy_cm_id(struct iw_cm_id *cm_id); /** - * iw_cm_bind_qp - Unbind the specified IW CM identifier and QP - * - * @cm_id: The IW CM idenfier to unbind from the QP. - * @qp: The QP - * - * This is called by the provider when destroying the QP to ensure - * that any references held by the IWCM are released. It may also - * be called by the IWCM when destroying a CM_ID to that any - * references held by the provider are released. - */ -void iw_cm_unbind_qp(struct iw_cm_id *cm_id, struct ib_qp *qp); - -/** - * iw_cm_get_qp - Return the ib_qp associated with a QPN - * - * @ib_device: The IB device - * @qpn: The queue pair number - */ -struct ib_qp *iw_cm_get_qp(struct ib_device *device, int qpn); - -/** * iw_cm_listen - Listen for incoming connection requests on the * specified IW CM id. * diff --git a/include/soc/mediatek/smi.h b/include/soc/mediatek/smi.h index dfd8efca5e60..000eb1cf68b7 100644 --- a/include/soc/mediatek/smi.h +++ b/include/soc/mediatek/smi.h @@ -13,6 +13,7 @@ enum iommu_atf_cmd { IOMMU_ATF_CMD_CONFIG_SMI_LARB, /* For mm master to en/disable iommu */ + IOMMU_ATF_CMD_CONFIG_INFRA_IOMMU, /* For infra master to enable iommu */ IOMMU_ATF_CMD_MAX, }; diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index 8a2a1d4f6b29..6e7c67a0cca3 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -53,6 +53,7 @@ enum { BNXT_RE_UCNTX_CMASK_HAVE_CCTX = 0x1ULL, BNXT_RE_UCNTX_CMASK_HAVE_MODE = 0x02ULL, BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL, + BNXT_RE_UCNTX_CMASK_DBR_PACING_ENABLED = 0x08ULL, }; enum bnxt_re_wqe_mode { @@ -131,10 +132,13 @@ enum bnxt_re_shpg_offt { enum bnxt_re_objects { BNXT_RE_OBJECT_ALLOC_PAGE = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_OBJECT_NOTIFY_DRV, }; enum bnxt_re_alloc_page_type { BNXT_RE_ALLOC_WC_PAGE = 0, + BNXT_RE_ALLOC_DBR_BAR_PAGE, + BNXT_RE_ALLOC_DBR_PAGE, }; enum bnxt_re_var_alloc_page_attrs { @@ -154,4 +158,7 @@ enum bnxt_re_alloc_page_methods { BNXT_RE_METHOD_DESTROY_PAGE, }; +enum bnxt_re_notify_drv_methods { + BNXT_RE_METHOD_NOTIFY_DRV = (1U << UVERBS_ID_NS_SHIFT), +}; #endif /* __BNXT_RE_UVERBS_ABI_H__*/ diff --git a/include/uapi/rdma/irdma-abi.h b/include/uapi/rdma/irdma-abi.h index a7085e092d34..bb18f15489e3 100644 --- a/include/uapi/rdma/irdma-abi.h +++ b/include/uapi/rdma/irdma-abi.h @@ -22,10 +22,16 @@ enum irdma_memreg_type { IRDMA_MEMREG_TYPE_CQ = 2, }; +enum { + IRDMA_ALLOC_UCTX_USE_RAW_ATTR = 1 << 0, + IRDMA_ALLOC_UCTX_MIN_HW_WQ_SIZE = 1 << 1, +}; + struct irdma_alloc_ucontext_req { __u32 rsvd32; __u8 userspace_ver; __u8 rsvd8[3]; + __aligned_u64 comp_mask; }; struct irdma_alloc_ucontext_resp { @@ -46,6 +52,9 @@ struct irdma_alloc_ucontext_resp { __u16 max_hw_sq_chunk; __u8 hw_rev; __u8 rsvd2; + __aligned_u64 comp_mask; + __u16 min_hw_wq_size; + __u8 rsvd3[6]; }; struct irdma_alloc_pd_resp { diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index babb34a643ea..1fb7f562289d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3673,21 +3673,9 @@ static int cgroup_extra_stat_show(struct seq_file *seq, int ssid) css_put(css); return ret; } -#endif - -static int cpu_stat_show(struct seq_file *seq, void *v) -{ - int ret = 0; - - cgroup_base_stat_cputime_show(seq); -#ifdef CONFIG_CGROUP_SCHED - ret = cgroup_extra_stat_show(seq, cpu_cgrp_id); -#endif - return ret; -} -static int __maybe_unused cgroup_local_stat_show(struct seq_file *seq, - struct cgroup *cgrp, int ssid) +static int cgroup_local_stat_show(struct seq_file *seq, + struct cgroup *cgrp, int ssid) { struct cgroup_subsys *ss = cgroup_subsys[ssid]; struct cgroup_subsys_state *css; @@ -3704,6 +3692,18 @@ static int __maybe_unused cgroup_local_stat_show(struct seq_file *seq, css_put(css); return ret; } +#endif + +static int cpu_stat_show(struct seq_file *seq, void *v) +{ + int ret = 0; + + cgroup_base_stat_cputime_show(seq); +#ifdef CONFIG_CGROUP_SCHED + ret = cgroup_extra_stat_show(seq, cpu_cgrp_id); +#endif + return ret; +} static int cpu_local_stat_show(struct seq_file *seq, void *v) { diff --git a/kernel/cpu.c b/kernel/cpu.c index f6811c857102..6de7c6bb74ee 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1487,8 +1487,22 @@ out: return ret; } +struct cpu_down_work { + unsigned int cpu; + enum cpuhp_state target; +}; + +static long __cpu_down_maps_locked(void *arg) +{ + struct cpu_down_work *work = arg; + + return _cpu_down(work->cpu, 0, work->target); +} + static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) { + struct cpu_down_work work = { .cpu = cpu, .target = target, }; + /* * If the platform does not support hotplug, report it explicitly to * differentiate it from a transient offlining failure. @@ -1497,7 +1511,15 @@ static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) return -EOPNOTSUPP; if (cpu_hotplug_disabled) return -EBUSY; - return _cpu_down(cpu, 0, target); + + /* + * Ensure that the control task does not run on the to be offlined + * CPU to prevent a deadlock against cfs_b->period_timer. + */ + cpu = cpumask_any_but(cpu_online_mask, cpu); + if (cpu >= nr_cpu_ids) + return -EBUSY; + return work_on_cpu(cpu, __cpu_down_maps_locked, &work); } static int cpu_down(unsigned int cpu, enum cpuhp_state target) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 911d0063763c..8dbff6e7ad4f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -699,7 +699,7 @@ u64 avg_vruntime(struct cfs_rq *cfs_rq) * * XXX could add max_slice to the augmented data to track this. */ -void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) +static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se) { s64 lag, limit; diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4df14db4da49..87015e9deacc 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -1045,7 +1045,7 @@ static bool report_idle_softirq(void) return false; /* On RT, softirqs handling may be waiting on some lock */ - if (!local_bh_blocked()) + if (local_bh_blocked()) return false; pr_warn("NOHZ tick-stop error: local softirq work is pending, handler #%02x!!!\n", diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 9b2d23fbf4d3..b7d7e4fcfad7 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -58,7 +58,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte = pte_offset_map(pmd, addr); if (pte) { err = walk_pte_range_inner(pte, addr, end, walk); - if (walk->mm != &init_mm) + if (walk->mm != &init_mm && addr < TASK_SIZE) pte_unmap(pte); } } else { diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c index 2188968674cb..757e6527f67e 100644 --- a/tools/testing/selftests/x86/test_shadow_stack.c +++ b/tools/testing/selftests/x86/test_shadow_stack.c @@ -40,7 +40,7 @@ * without building the headers. */ #ifndef __NR_map_shadow_stack -#define __NR_map_shadow_stack 452 +#define __NR_map_shadow_stack 453 #define SHADOW_STACK_SET_TOKEN (1ULL << 0) |