aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt16
-rw-r--r--Documentation/arch/riscv/hwprobe.rst6
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml2
-rw-r--r--Documentation/devicetree/bindings/pwm/mxs-pwm.yaml13
-rw-r--r--Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml12
-rw-r--r--Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt18
-rw-r--r--Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml5
-rw-r--r--Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml2
-rw-r--r--MAINTAINERS50
-rw-r--r--arch/arm/configs/multi_v7_defconfig1
-rw-r--r--arch/arm/configs/tegra_defconfig1
-rw-r--r--arch/powerpc/kernel/iommu.c53
-rw-r--r--arch/riscv/Kconfig7
-rw-r--r--arch/riscv/Makefile4
-rw-r--r--arch/riscv/configs/defconfig52
-rw-r--r--arch/riscv/include/asm/acpi.h6
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h1
-rw-r--r--arch/riscv/include/asm/asm.h41
-rw-r--r--arch/riscv/include/asm/cpufeature.h1
-rw-r--r--arch/riscv/include/asm/errata_list.h14
-rw-r--r--arch/riscv/include/asm/hwcap.h16
-rw-r--r--arch/riscv/include/asm/hwprobe.h7
-rw-r--r--arch/riscv/include/asm/irq_stack.h3
-rw-r--r--arch/riscv/include/asm/page.h4
-rw-r--r--arch/riscv/include/asm/pgtable-32.h3
-rw-r--r--arch/riscv/include/asm/pgtable.h3
-rw-r--r--arch/riscv/include/asm/scs.h54
-rw-r--r--arch/riscv/include/asm/thread_info.h16
-rw-r--r--arch/riscv/include/asm/vdso/processor.h2
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h2
-rw-r--r--arch/riscv/kernel/acpi.c87
-rw-r--r--arch/riscv/kernel/asm-offsets.c9
-rw-r--r--arch/riscv/kernel/cpufeature.c17
-rw-r--r--arch/riscv/kernel/entry.S126
-rw-r--r--arch/riscv/kernel/head.S19
-rw-r--r--arch/riscv/kernel/irq.c56
-rw-r--r--arch/riscv/kernel/kexec_relocate.S52
-rw-r--r--arch/riscv/kernel/setup.c4
-rw-r--r--arch/riscv/kernel/smpboot.c4
-rw-r--r--arch/riscv/kernel/suspend_entry.S5
-rw-r--r--arch/riscv/kernel/sys_riscv.c46
-rw-r--r--arch/riscv/kernel/traps.c68
-rw-r--r--arch/riscv/kernel/vdso/Makefile2
-rw-r--r--arch/riscv/kernel/vdso/hwprobe.c2
-rw-r--r--arch/riscv/mm/cacheflush.c25
-rw-r--r--arch/riscv/mm/init.c2
-rw-r--r--arch/riscv/mm/ptdump.c3
-rw-r--r--arch/riscv/purgatory/Makefile8
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/boot/ipl_parm.c8
-rw-r--r--arch/s390/boot/startup.c44
-rw-r--r--arch/s390/boot/vmem.c17
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h1
-rw-r--r--arch/s390/include/asm/page-states.h59
-rw-r--r--arch/s390/include/asm/page.h1
-rw-r--r--arch/s390/include/asm/pci.h11
-rw-r--r--arch/s390/include/asm/pci_clp.h3
-rw-r--r--arch/s390/include/asm/pci_dma.h121
-rw-r--r--arch/s390/include/asm/pgalloc.h1
-rw-r--r--arch/s390/include/asm/setup.h3
-rw-r--r--arch/s390/include/asm/stacktrace.h7
-rw-r--r--arch/s390/include/asm/tlb.h13
-rw-r--r--arch/s390/kernel/early.c1
-rw-r--r--arch/s390/kernel/perf_event.c41
-rw-r--r--arch/s390/kernel/stacktrace.c43
-rw-r--r--arch/s390/mm/gmap.c4
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/s390/mm/page-states.c213
-rw-r--r--arch/s390/mm/pgalloc.c296
-rw-r--r--arch/s390/mm/vmem.c4
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c35
-rw-r--r--arch/s390/pci/pci_bus.c5
-rw-r--r--arch/s390/pci/pci_debug.c12
-rw-r--r--arch/s390/pci/pci_dma.c746
-rw-r--r--arch/s390/pci/pci_event.c17
-rw-r--r--arch/s390/pci/pci_sysfs.c19
-rw-r--r--drivers/acpi/riscv/rhct.c93
-rw-r--r--drivers/clocksource/timer-riscv.c17
-rw-r--r--drivers/gpio/gpio-mvebu.c1
-rw-r--r--drivers/gpu/drm/bridge/ti-sn65dsi86.c1
-rw-r--r--drivers/iommu/Kconfig15
-rw-r--r--drivers/iommu/Makefile1
-rw-r--r--drivers/iommu/amd/Kconfig9
-rw-r--r--drivers/iommu/amd/Makefile1
-rw-r--r--drivers/iommu/amd/amd_iommu.h35
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h52
-rw-r--r--drivers/iommu/amd/init.c117
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c8
-rw-r--r--drivers/iommu/amd/iommu.c577
-rw-r--r--drivers/iommu/amd/iommu_v2.c996
-rw-r--r--drivers/iommu/apple-dart.c138
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c71
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c251
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h17
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c2
-rw-r--r--drivers/iommu/arm/arm-smmu/qcom_iommu.c45
-rw-r--r--drivers/iommu/dma-iommu.c200
-rw-r--r--drivers/iommu/exynos-iommu.c83
-rw-r--r--drivers/iommu/fsl_pamu_domain.c41
-rw-r--r--drivers/iommu/intel/debugfs.c215
-rw-r--r--drivers/iommu/intel/iommu.c19
-rw-r--r--drivers/iommu/intel/iommu.h14
-rw-r--r--drivers/iommu/iommu.c455
-rw-r--r--drivers/iommu/iommufd/selftest.c31
-rw-r--r--drivers/iommu/iova.c95
-rw-r--r--drivers/iommu/ipmmu-vmsa.c72
-rw-r--r--drivers/iommu/msm_iommu.c35
-rw-r--r--drivers/iommu/mtk_iommu.c35
-rw-r--r--drivers/iommu/mtk_iommu_v1.c28
-rw-r--r--drivers/iommu/omap-iommu.c69
-rw-r--r--drivers/iommu/omap-iommu.h2
-rw-r--r--drivers/iommu/rockchip-iommu.c59
-rw-r--r--drivers/iommu/s390-iommu.c424
-rw-r--r--drivers/iommu/sprd-iommu.c36
-rw-r--r--drivers/iommu/sun50i-iommu.c80
-rw-r--r--drivers/iommu/tegra-gart.c371
-rw-r--r--drivers/iommu/tegra-smmu.c58
-rw-r--r--drivers/iommu/virtio-iommu.c4
-rw-r--r--drivers/leds/rgb/leds-qcom-lpg.c1
-rw-r--r--drivers/memory/tegra/mc.c34
-rw-r--r--drivers/memory/tegra/tegra20.c28
-rw-r--r--drivers/misc/lkdtm/cfi.c13
-rw-r--r--drivers/pwm/Kconfig20
-rw-r--r--drivers/pwm/Makefile1
-rw-r--r--drivers/pwm/core.c55
-rw-r--r--drivers/pwm/pwm-ab8500.c1
-rw-r--r--drivers/pwm/pwm-apple.c1
-rw-r--r--drivers/pwm/pwm-atmel-hlcdc.c1
-rw-r--r--drivers/pwm/pwm-atmel-tcb.c1
-rw-r--r--drivers/pwm/pwm-atmel.c3
-rw-r--r--drivers/pwm/pwm-bcm-iproc.c38
-rw-r--r--drivers/pwm/pwm-bcm-kona.c1
-rw-r--r--drivers/pwm/pwm-bcm2835.c36
-rw-r--r--drivers/pwm/pwm-berlin.c63
-rw-r--r--drivers/pwm/pwm-brcmstb.c49
-rw-r--r--drivers/pwm/pwm-clk.c1
-rw-r--r--drivers/pwm/pwm-clps711x.c1
-rw-r--r--drivers/pwm/pwm-crc.c5
-rw-r--r--drivers/pwm/pwm-cros-ec.c67
-rw-r--r--drivers/pwm/pwm-dwc-core.c184
-rw-r--r--drivers/pwm/pwm-dwc.c197
-rw-r--r--drivers/pwm/pwm-dwc.h60
-rw-r--r--drivers/pwm/pwm-ep93xx.c1
-rw-r--r--drivers/pwm/pwm-fsl-ftm.c1
-rw-r--r--drivers/pwm/pwm-hibvt.c1
-rw-r--r--drivers/pwm/pwm-img.c1
-rw-r--r--drivers/pwm/pwm-imx-tpm.c30
-rw-r--r--drivers/pwm/pwm-imx1.c1
-rw-r--r--drivers/pwm/pwm-imx27.c1
-rw-r--r--drivers/pwm/pwm-intel-lgm.c1
-rw-r--r--drivers/pwm/pwm-iqs620a.c1
-rw-r--r--drivers/pwm/pwm-jz4740.c46
-rw-r--r--drivers/pwm/pwm-keembay.c1
-rw-r--r--drivers/pwm/pwm-lp3943.c22
-rw-r--r--drivers/pwm/pwm-lpc18xx-sct.c1
-rw-r--r--drivers/pwm/pwm-lpc32xx.c1
-rw-r--r--drivers/pwm/pwm-lpss.c1
-rw-r--r--drivers/pwm/pwm-mediatek.c1
-rw-r--r--drivers/pwm/pwm-meson.c1
-rw-r--r--drivers/pwm/pwm-microchip-core.c1
-rw-r--r--drivers/pwm/pwm-mtk-disp.c25
-rw-r--r--drivers/pwm/pwm-mxs.c1
-rw-r--r--drivers/pwm/pwm-ntxec.c1
-rw-r--r--drivers/pwm/pwm-omap-dmtimer.c3
-rw-r--r--drivers/pwm/pwm-pca9685.c1
-rw-r--r--drivers/pwm/pwm-pxa.c3
-rw-r--r--drivers/pwm/pwm-raspberrypi-poe.c1
-rw-r--r--drivers/pwm/pwm-rcar.c1
-rw-r--r--drivers/pwm/pwm-renesas-tpu.c23
-rw-r--r--drivers/pwm/pwm-rockchip.c1
-rw-r--r--drivers/pwm/pwm-rz-mtu3.c1
-rw-r--r--drivers/pwm/pwm-samsung.c152
-rw-r--r--drivers/pwm/pwm-sifive.c1
-rw-r--r--drivers/pwm/pwm-sl28cpld.c1
-rw-r--r--drivers/pwm/pwm-spear.c41
-rw-r--r--drivers/pwm/pwm-sprd.c29
-rw-r--r--drivers/pwm/pwm-sti.c30
-rw-r--r--drivers/pwm/pwm-stm32-lp.c1
-rw-r--r--drivers/pwm/pwm-stm32.c1
-rw-r--r--drivers/pwm/pwm-stmpe.c1
-rw-r--r--drivers/pwm/pwm-sun4i.c1
-rw-r--r--drivers/pwm/pwm-sunplus.c1
-rw-r--r--drivers/pwm/pwm-tegra.c1
-rw-r--r--drivers/pwm/pwm-tiecap.c1
-rw-r--r--drivers/pwm/pwm-tiehrpwm.c1
-rw-r--r--drivers/pwm/pwm-twl-led.c2
-rw-r--r--drivers/pwm/pwm-twl.c2
-rw-r--r--drivers/pwm/pwm-visconti.c1
-rw-r--r--drivers/pwm/pwm-vt8500.c43
-rw-r--r--drivers/pwm/pwm-xilinx.c1
-rw-r--r--drivers/s390/crypto/ap_bus.c47
-rw-r--r--drivers/s390/crypto/ap_bus.h1
-rw-r--r--drivers/s390/crypto/ap_queue.c36
-rw-r--r--drivers/s390/crypto/zcrypt_card.c4
-rw-r--r--drivers/s390/crypto/zcrypt_queue.c5
-rw-r--r--drivers/staging/greybus/pwm.c1
-rw-r--r--drivers/watchdog/apple_wdt.c25
-rw-r--r--drivers/watchdog/aspeed_wdt.c11
-rw-r--r--drivers/watchdog/at91sam9_wdt.c20
-rw-r--r--drivers/watchdog/ath79_wdt.c19
-rw-r--r--drivers/watchdog/gpio_wdt.c16
-rw-r--r--drivers/watchdog/imx7ulp_wdt.c8
-rw-r--r--drivers/watchdog/imx_sc_wdt.c5
-rw-r--r--drivers/watchdog/it87_wdt.c8
-rw-r--r--drivers/watchdog/ixp4xx_wdt.c28
-rw-r--r--drivers/watchdog/marvell_gti_wdt.c14
-rw-r--r--drivers/watchdog/mlx_wdt.c1
-rw-r--r--drivers/watchdog/of_xilinx_wdt.c13
-rw-r--r--drivers/watchdog/sbsa_gwdt.c4
-rw-r--r--drivers/watchdog/st_lpc_wdt.c11
-rw-r--r--drivers/watchdog/sunplus_wdt.c17
-rw-r--r--drivers/watchdog/wdat_wdt.c2
-rw-r--r--fs/exfat/file.c1
-rw-r--r--fs/exfat/inode.c4
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/delegation.c7
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/dir.c29
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4proc.c62
-rw-r--r--fs/nfs/pnfs.c8
-rw-r--r--fs/nfs/pnfs.h5
-rw-r--r--fs/nfs/proc.c3
-rw-r--r--fs/nfs/super.c8
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c45
-rw-r--r--fs/xfs/libxfs/xfs_format.h34
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c807
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.h383
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/libxfs/xfs_sb.h2
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c10
-rw-r--r--fs/xfs/libxfs/xfs_types.c4
-rw-r--r--fs/xfs/libxfs/xfs_types.h10
-rw-r--r--fs/xfs/scrub/bmap.c2
-rw-r--r--fs/xfs/scrub/fscounters.c2
-rw-r--r--fs/xfs/scrub/inode.c3
-rw-r--r--fs/xfs/scrub/rtbitmap.c28
-rw-r--r--fs/xfs/scrub/rtsummary.c72
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h15
-rw-r--r--fs/xfs/xfs_bmap_util.c74
-rw-r--r--fs/xfs/xfs_file.c63
-rw-r--r--fs/xfs/xfs_fsmap.c15
-rw-r--r--fs/xfs/xfs_inode.c24
-rw-r--r--fs/xfs/xfs_inode.h9
-rw-r--r--fs/xfs/xfs_inode_item.c3
-rw-r--r--fs/xfs/xfs_ioctl.c5
-rw-r--r--fs/xfs/xfs_linux.h12
-rw-r--r--fs/xfs/xfs_mount.h8
-rw-r--r--fs/xfs/xfs_ondisk.h4
-rw-r--r--fs/xfs/xfs_reflink.c4
-rw-r--r--fs/xfs/xfs_rtalloc.c638
-rw-r--r--fs/xfs/xfs_rtalloc.h94
-rw-r--r--fs/xfs/xfs_super.c3
-rw-r--r--fs/xfs/xfs_trans.c7
-rw-r--r--include/dt-bindings/watchdog/aspeed-wdt.h92
-rw-r--r--include/linux/amd-iommu.h120
-rw-r--r--include/linux/iommu.h38
-rw-r--r--include/linux/nfs_fs_sb.h1
-rw-r--r--include/linux/nfs_xdr.h2
-rw-r--r--include/linux/pwm.h24
-rw-r--r--include/linux/sunrpc/clnt.h1
-rw-r--r--include/soc/tegra/mc.h26
-rw-r--r--kernel/debug/debug_core.c3
-rw-r--r--kernel/debug/kdb/kdb_main.c7
-rw-r--r--kernel/rcu/rcu.h2
-rw-r--r--kernel/rcu/tasks.h32
-rw-r--r--kernel/rcu/tree.c43
-rw-r--r--mm/memblock.c2
-rw-r--r--net/sunrpc/clnt.c10
-rw-r--r--net/sunrpc/rpcb_clnt.c4
-rw-r--r--net/sunrpc/xprt.c4
-rw-r--r--net/sunrpc/xprtsock.c14
-rw-r--r--tools/testing/selftests/riscv/hwprobe/Makefile9
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c228
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c64
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.h15
281 files changed, 5810 insertions, 6650 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2a4bc78c27ec..65731b060e3f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2227,7 +2227,7 @@
forcing Dual Address Cycle for PCI cards supporting
greater than 32-bit addressing.
- iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour
+ iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
Request that DMA unmap operations use deferred
@@ -3596,6 +3596,13 @@
[NFS] set the TCP port on which the NFSv4 callback
channel should listen.
+ nfs.delay_retrans=
+ [NFS] specifies the number of times the NFSv4 client
+ retries the request before returning an EAGAIN error,
+ after a reply of NFS4ERR_DELAY from the server.
+ Only applies if the softerr mount option is enabled,
+ and the specified value is >= 0.
+
nfs.enable_ino64=
[NFS] enable 64-bit inode numbers.
If zero, the NFS client will fake up a 32-bit inode
@@ -5687,9 +5694,10 @@
s390_iommu= [HW,S390]
Set s390 IOTLB flushing mode
strict
- With strict flushing every unmap operation will result in
- an IOTLB flush. Default is lazy flushing before reuse,
- which is faster.
+ With strict flushing every unmap operation will result
+ in an IOTLB flush. Default is lazy flushing before
+ reuse, which is faster. Deprecated, equivalent to
+ iommu.strict=1.
s390_iommu_aperture= [KNL,S390]
Specifies the size of the per device DMA address space
diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst
index a52996b22f75..7b2384de471f 100644
--- a/Documentation/arch/riscv/hwprobe.rst
+++ b/Documentation/arch/riscv/hwprobe.rst
@@ -77,6 +77,9 @@ The following keys are defined:
* :c:macro:`RISCV_HWPROBE_EXT_ZBS`: The Zbs extension is supported, as defined
in version 1.0 of the Bit-Manipulation ISA extensions.
+ * :c:macro:`RISCV_HWPROBE_EXT_ZICBOZ`: The Zicboz extension is supported, as
+ ratified in commit 3dd606f ("Create cmobase-v1.0.pdf") of riscv-CMOs.
+
* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
information about the selected set of processors.
@@ -96,3 +99,6 @@ The following keys are defined:
* :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
not supported at all and will generate a misaligned address fault.
+
+* :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which
+ represents the size of the Zicboz block in bytes.
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index b1b2cf81b42f..aa9e1c0895a5 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -110,6 +110,7 @@ properties:
- qcom,sdm630-smmu-v2
- qcom,sdm845-smmu-v2
- qcom,sm6350-smmu-v2
+ - qcom,sm7150-smmu-v2
- const: qcom,adreno-smmu
- const: qcom,smmu-v2
- description: Qcom Adreno GPUs on Google Cheza platform
@@ -409,6 +410,7 @@ allOf:
contains:
enum:
- qcom,sm6350-smmu-v2
+ - qcom,sm7150-smmu-v2
- qcom,sm8150-smmu-500
- qcom,sm8250-smmu-500
then:
diff --git a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml
index 6ffbed204c25..8f50e23ca8c9 100644
--- a/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/mxs-pwm.yaml
@@ -15,12 +15,19 @@ allOf:
properties:
compatible:
- enum:
- - fsl,imx23-pwm
+ oneOf:
+ - const: fsl,imx23-pwm
+ - items:
+ - enum:
+ - fsl,imx28-pwm
+ - const: fsl,imx23-pwm
reg:
maxItems: 1
+ clocks:
+ maxItems: 1
+
"#pwm-cells":
const: 3
@@ -31,6 +38,7 @@ properties:
required:
- compatible
- reg
+ - clocks
- fsl,pwm-number
additionalProperties: false
@@ -40,6 +48,7 @@ examples:
pwm@80064000 {
compatible = "fsl,imx23-pwm";
reg = <0x80064000 0x2000>;
+ clocks = <&clks 30>;
#pwm-cells = <3>;
fsl,pwm-number = <8>;
};
diff --git a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
index 443e2e7ab467..69845ec32e81 100644
--- a/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/amlogic,meson-gxbb-wdt.yaml
@@ -15,9 +15,15 @@ allOf:
properties:
compatible:
- enum:
- - amlogic,meson-gxbb-wdt
- - amlogic,t7-wdt
+ oneOf:
+ - enum:
+ - amlogic,meson-gxbb-wdt
+ - amlogic,t7-wdt
+ - items:
+ - enum:
+ - amlogic,c3-wdt
+ - amlogic,s4-wdt
+ - const: amlogic,t7-wdt
reg:
maxItems: 1
diff --git a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt
index a8197632d6d2..3208adb3e52e 100644
--- a/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/aspeed-wdt.txt
@@ -47,7 +47,15 @@ Optional properties for AST2500-compatible watchdogs:
is configured as push-pull, then set the pulse
polarity to active-high. The default is active-low.
-Example:
+Optional properties for AST2500- and AST2600-compatible watchdogs:
+ - aspeed,reset-mask: A bitmask indicating which peripherals will be reset if
+ the watchdog timer expires. On AST2500 this should be a
+ single word defined using the AST2500_WDT_RESET_* macros;
+ on AST2600 this should be a two-word array with the first
+ word defined using the AST2600_WDT_RESET1_* macros and the
+ second word defined using the AST2600_WDT_RESET2_* macros.
+
+Examples:
wdt1: watchdog@1e785000 {
compatible = "aspeed,ast2400-wdt";
@@ -55,3 +63,11 @@ Example:
aspeed,reset-type = "system";
aspeed,external-signal;
};
+
+ #include <dt-bindings/watchdog/aspeed-wdt.h>
+ wdt2: watchdog@1e785040 {
+ compatible = "aspeed,ast2600-wdt";
+ reg = <0x1e785040 0x40>;
+ aspeed,reset-mask = <AST2600_WDT_RESET1_DEFAULT
+ (AST2600_WDT_RESET2_DEFAULT & ~AST2600_WDT_RESET2_LPC)>;
+ };
diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
index 4b7ed1355701..9c50766bf690 100644
--- a/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/fsl-imx7ulp-wdt.yaml
@@ -30,6 +30,11 @@ properties:
clocks:
maxItems: 1
+ fsl,ext-reset-output:
+ description:
+ When set, wdog can generate external reset from the wdog_any pin.
+ type: boolean
+
required:
- compatible
- interrupts
diff --git a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
index 5046dfa55f13..c12bc852aedc 100644
--- a/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/qcom-wdt.yaml
@@ -21,6 +21,8 @@ properties:
- qcom,apss-wdt-ipq5018
- qcom,apss-wdt-ipq5332
- qcom,apss-wdt-ipq9574
+ - qcom,apss-wdt-msm8226
+ - qcom,apss-wdt-msm8974
- qcom,apss-wdt-msm8994
- qcom,apss-wdt-qcm2290
- qcom,apss-wdt-qcs404
diff --git a/MAINTAINERS b/MAINTAINERS
index 978bb58d7d07..350d00657f6b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -311,7 +311,7 @@ ACPI COMPONENT ARCHITECTURE (ACPICA)
M: Robert Moore <[email protected]>
M: "Rafael J. Wysocki" <[email protected]>
S: Supported
W: https://acpica.org/
W: https://github.com/acpica/acpica/
@@ -6534,7 +6534,7 @@ F: drivers/gpu/drm/ast/
DRM DRIVER FOR BOCHS VIRTUAL GPU
M: Gerd Hoffmann <[email protected]>
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: drivers/gpu/drm/tiny/bochs.c
@@ -6781,7 +6781,7 @@ F: drivers/gpu/drm/tiny/repaper.c
DRM DRIVER FOR QEMU'S CIRRUS DEVICE
M: Dave Airlie <[email protected]>
M: Gerd Hoffmann <[email protected]>
S: Obsolete
W: https://www.kraxel.org/blog/2014/10/qemu-using-cirrus-considered-harmful/
T: git git://anongit.freedesktop.org/drm/drm-misc
@@ -6790,7 +6790,7 @@ F: drivers/gpu/drm/tiny/cirrus.c
DRM DRIVER FOR QXL VIRTUAL GPU
M: Dave Airlie <[email protected]>
M: Gerd Hoffmann <[email protected]>
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
@@ -7889,7 +7889,7 @@ F: drivers/net/can/usb/etas_es58x/
ETHERNET BRIDGE
M: Roopa Prabhu <[email protected]>
M: Nikolay Aleksandrov <[email protected]>
-L: [email protected] (moderated for non-subscribers)
S: Maintained
W: http://www.linuxfoundation.org/en/Net:Bridge
@@ -16394,7 +16394,7 @@ M: Juergen Gross <[email protected]>
R: Ajay Kaher <[email protected]>
R: Alexey Makhalov <[email protected]>
R: VMware PV-Drivers Reviewers <[email protected]>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/core
@@ -22972,7 +22972,7 @@ VIRTIO AND VHOST VSOCK DRIVER
M: Stefan Hajnoczi <[email protected]>
M: Stefano Garzarella <[email protected]>
S: Maintained
F: drivers/vhost/vsock.c
@@ -22984,7 +22984,7 @@ F: net/vmw_vsock/virtio_transport_common.c
VIRTIO BALLOON
M: "Michael S. Tsirkin" <[email protected]>
M: David Hildenbrand <[email protected]>
S: Maintained
F: drivers/virtio/virtio_balloon.c
F: include/linux/balloon_compaction.h
@@ -22996,7 +22996,7 @@ M: "Michael S. Tsirkin" <[email protected]>
M: Jason Wang <[email protected]>
R: Paolo Bonzini <[email protected]>
R: Stefan Hajnoczi <[email protected]>
S: Maintained
F: drivers/block/virtio_blk.c
F: drivers/scsi/virtio_scsi.c
@@ -23005,7 +23005,7 @@ F: include/uapi/linux/virtio_scsi.h
VIRTIO CONSOLE DRIVER
M: Amit Shah <[email protected]>
S: Maintained
F: drivers/char/virtio_console.c
F: include/linux/virtio_console.h
@@ -23015,7 +23015,7 @@ VIRTIO CORE AND NET DRIVERS
M: "Michael S. Tsirkin" <[email protected]>
M: Jason Wang <[email protected]>
R: Xuan Zhuo <[email protected]>
S: Maintained
F: Documentation/ABI/testing/sysfs-bus-vdpa
F: Documentation/ABI/testing/sysfs-class-vduse
@@ -23034,7 +23034,7 @@ F: tools/virtio/
VIRTIO CRYPTO DRIVER
M: Gonglei <[email protected]>
S: Maintained
F: drivers/crypto/virtio/
@@ -23045,7 +23045,7 @@ M: Cornelia Huck <[email protected]>
M: Halil Pasic <[email protected]>
M: Eric Farman <[email protected]>
S: Supported
F: arch/s390/include/uapi/asm/virtio-ccw.h
@@ -23055,7 +23055,7 @@ VIRTIO FILE SYSTEM
M: Vivek Goyal <[email protected]>
M: Stefan Hajnoczi <[email protected]>
M: Miklos Szeredi <[email protected]>
S: Supported
W: https://virtio-fs.gitlab.io/
@@ -23067,7 +23067,7 @@ VIRTIO GPIO DRIVER
M: Enrico Weigelt, metux IT consult <[email protected]>
M: Viresh Kumar <[email protected]>
S: Maintained
F: drivers/gpio/gpio-virtio.c
F: include/uapi/linux/virtio_gpio.h
@@ -23078,7 +23078,7 @@ M: Gerd Hoffmann <[email protected]>
R: Gurchetan Singh <[email protected]>
R: Chia-I Wu <[email protected]>
S: Maintained
T: git git://anongit.freedesktop.org/drm/drm-misc
F: drivers/gpu/drm/ci/xfails/virtio*
@@ -23089,7 +23089,7 @@ VIRTIO HOST (VHOST)
M: "Michael S. Tsirkin" <[email protected]>
M: Jason Wang <[email protected]>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost.git
@@ -23105,7 +23105,7 @@ M: Jason Wang <[email protected]>
M: Mike Christie <[email protected]>
R: Paolo Bonzini <[email protected]>
R: Stefan Hajnoczi <[email protected]>
S: Maintained
F: drivers/vhost/scsi.c
@@ -23113,7 +23113,7 @@ VIRTIO I2C DRIVER
M: Conghui Chen <[email protected]>
M: Viresh Kumar <[email protected]>
S: Maintained
F: drivers/i2c/busses/i2c-virtio.c
F: include/uapi/linux/virtio_i2c.h
@@ -23126,14 +23126,14 @@ F: include/uapi/linux/virtio_input.h
VIRTIO IOMMU DRIVER
M: Jean-Philippe Brucker <[email protected]>
S: Maintained
F: drivers/iommu/virtio-iommu.c
F: include/uapi/linux/virtio_iommu.h
VIRTIO MEM DRIVER
M: David Hildenbrand <[email protected]>
S: Maintained
W: https://virtio-mem.gitlab.io/
F: drivers/virtio/virtio_mem.c
@@ -23141,7 +23141,7 @@ F: include/uapi/linux/virtio_mem.h
VIRTIO PMEM DRIVER
M: Pankaj Gupta <[email protected]>
S: Maintained
F: drivers/nvdimm/nd_virtio.c
F: drivers/nvdimm/virtio_pmem.c
@@ -23149,7 +23149,7 @@ F: drivers/nvdimm/virtio_pmem.c
VIRTIO SOUND DRIVER
M: Anton Yakovlev <[email protected]>
M: "Michael S. Tsirkin" <[email protected]>
L: [email protected] (moderated for non-subscribers)
S: Maintained
F: include/uapi/linux/virtio_snd.h
@@ -23207,7 +23207,7 @@ F: include/linux/vlynq.h
VM SOCKETS (AF_VSOCK)
M: Stefano Garzarella <[email protected]>
S: Maintained
F: drivers/net/vsockmon.c
@@ -23251,7 +23251,7 @@ VMWARE HYPERVISOR INTERFACE
M: Ajay Kaher <[email protected]>
M: Alexey Makhalov <[email protected]>
R: VMware PV-Drivers Reviewers <[email protected]>
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vmware
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index bb9f0e5b0b63..10fd74bf85f9 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1076,7 +1076,6 @@ CONFIG_QCOM_IPCC=y
CONFIG_OMAP_IOMMU=y
CONFIG_OMAP_IOMMU_DEBUG=y
CONFIG_ROCKCHIP_IOMMU=y
-CONFIG_TEGRA_IOMMU_GART=y
CONFIG_TEGRA_IOMMU_SMMU=y
CONFIG_EXYNOS_IOMMU=y
CONFIG_QCOM_IOMMU=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 613f07b8ce15..8635b7216bfc 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -292,7 +292,6 @@ CONFIG_CHROME_PLATFORMS=y
CONFIG_CROS_EC=y
CONFIG_CROS_EC_I2C=m
CONFIG_CROS_EC_SPI=m
-CONFIG_TEGRA_IOMMU_GART=y
CONFIG_TEGRA_IOMMU_SMMU=y
CONFIG_ARCH_TEGRA_2x_SOC=y
CONFIG_ARCH_TEGRA_3x_SOC=y
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 3e28579f7c62..ebe259bdd462 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1280,13 +1280,19 @@ struct iommu_table_group_ops spapr_tce_table_group_ops = {
/*
* A simple iommu_ops to allow less cruft in generic VFIO code.
*/
-static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
- struct device *dev)
+static int
+spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain,
+ struct device *dev)
{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_group *grp = iommu_group_get(dev);
struct iommu_table_group *table_group;
int ret = -EINVAL;
+ /* At first attach the ownership is already set */
+ if (!domain)
+ return 0;
+
if (!grp)
return -ENODEV;
@@ -1297,17 +1303,22 @@ static int spapr_tce_blocking_iommu_attach_dev(struct iommu_domain *dom,
return ret;
}
-static void spapr_tce_blocking_iommu_set_platform_dma(struct device *dev)
-{
- struct iommu_group *grp = iommu_group_get(dev);
- struct iommu_table_group *table_group;
+static const struct iommu_domain_ops spapr_tce_platform_domain_ops = {
+ .attach_dev = spapr_tce_platform_iommu_attach_dev,
+};
- table_group = iommu_group_get_iommudata(grp);
- table_group->ops->release_ownership(table_group);
-}
+static struct iommu_domain spapr_tce_platform_domain = {
+ .type = IOMMU_DOMAIN_PLATFORM,
+ .ops = &spapr_tce_platform_domain_ops,
+};
-static const struct iommu_domain_ops spapr_tce_blocking_domain_ops = {
- .attach_dev = spapr_tce_blocking_iommu_attach_dev,
+static struct iommu_domain spapr_tce_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ /*
+ * FIXME: SPAPR mixes blocked and platform behaviors, the blocked domain
+ * also sets the dma_api ops
+ */
+ .ops = &spapr_tce_platform_domain_ops,
};
static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
@@ -1322,22 +1333,6 @@ static bool spapr_tce_iommu_capable(struct device *dev, enum iommu_cap cap)
return false;
}
-static struct iommu_domain *spapr_tce_iommu_domain_alloc(unsigned int type)
-{
- struct iommu_domain *dom;
-
- if (type != IOMMU_DOMAIN_BLOCKED)
- return NULL;
-
- dom = kzalloc(sizeof(*dom), GFP_KERNEL);
- if (!dom)
- return NULL;
-
- dom->ops = &spapr_tce_blocking_domain_ops;
-
- return dom;
-}
-
static struct iommu_device *spapr_tce_iommu_probe_device(struct device *dev)
{
struct pci_dev *pdev;
@@ -1371,12 +1366,12 @@ static struct iommu_group *spapr_tce_iommu_device_group(struct device *dev)
}
static const struct iommu_ops spapr_tce_iommu_ops = {
+ .default_domain = &spapr_tce_platform_domain,
+ .blocked_domain = &spapr_tce_blocked_domain,
.capable = spapr_tce_iommu_capable,
- .domain_alloc = spapr_tce_iommu_domain_alloc,
.probe_device = spapr_tce_iommu_probe_device,
.release_device = spapr_tce_iommu_release_device,
.device_group = spapr_tce_iommu_device_group,
- .set_platform_dma_ops = spapr_tce_blocking_iommu_set_platform_dma,
};
static struct attribute *spapr_tce_iommu_attrs[] = {
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index eaa15a20e6ae..18b03ad0e6b9 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -39,6 +39,7 @@ config RISCV
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_VDSO_DATA
+ select ARCH_KEEP_MEMBLOCK if ACPI
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
select ARCH_STACKWALK
@@ -48,6 +49,7 @@ config RISCV
select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_SUPPORTS_PER_VMA_LOCK if MMU
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if HAVE_SHADOW_CALL_STACK
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USES_CFI_TRAPS if CFI_CLANG
@@ -174,6 +176,11 @@ config GCC_SUPPORTS_DYNAMIC_FTRACE
def_bool CC_IS_GCC
depends on $(cc-option,-fpatchable-function-entry=8)
+config HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option,-fsanitize=shadow-call-stack)
+ # https://github.com/riscv-non-isa/riscv-elf-psabi-doc/commit/a484e843e6eeb51f0cb7b8819e50da6d2444d769
+ depends on $(ld-option,--no-relax-gp)
+
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
default 8
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 4d06f3402674..a74be78678eb 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -54,6 +54,10 @@ endif
endif
endif
+ifeq ($(CONFIG_SHADOW_CALL_STACK),y)
+ KBUILD_LDFLAGS += --no-relax-gp
+endif
+
# ISA string setting
riscv-march-$(CONFIG_ARCH_RV32I) := rv32ima
riscv-march-$(CONFIG_ARCH_RV64I) := rv64ima
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 1edf3cd886c5..938fa07ddf77 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -37,6 +37,13 @@ CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
CONFIG_PM=y
CONFIG_CPU_IDLE=y
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=m
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
+CONFIG_CPUFREQ_DT=y
CONFIG_VIRTUALIZATION=y
CONFIG_KVM=m
CONFIG_ACPI=y
@@ -95,6 +102,7 @@ CONFIG_NETLINK_DIAG=y
CONFIG_CGROUP_NET_PRIO=y
CONFIG_NET_9P=y
CONFIG_NET_9P_VIRTIO=y
+CONFIG_CAN=m
CONFIG_PCI=y
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_HOST_GENERIC=y
@@ -102,6 +110,11 @@ CONFIG_PCIE_XILINX=y
CONFIG_PCIE_FU740=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_MTD=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_ADV_OPTIONS=y
+CONFIG_MTD_SPI_NOR=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_VIRTIO_BLK=y
CONFIG_BLK_DEV_NVME=m
@@ -124,8 +137,11 @@ CONFIG_VIRTIO_NET=y
CONFIG_MACB=y
CONFIG_E1000E=y
CONFIG_R8169=y
+CONFIG_RAVB=y
CONFIG_STMMAC_ETH=m
+CONFIG_MICREL_PHY=y
CONFIG_MICROSEMI_PHY=y
+CONFIG_CAN_RCAR_CANFD=m
CONFIG_INPUT_MOUSEDEV=y
CONFIG_KEYBOARD_SUN4I_LRADC=m
CONFIG_SERIAL_8250=y
@@ -136,16 +152,24 @@ CONFIG_SERIAL_SH_SCI=y
CONFIG_VIRTIO_CONSOLE=y
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_VIRTIO=y
+CONFIG_I2C_CHARDEV=m
CONFIG_I2C_MV64XXX=m
+CONFIG_I2C_RIIC=y
CONFIG_SPI=y
+CONFIG_SPI_RSPI=m
CONFIG_SPI_SIFIVE=y
CONFIG_SPI_SUN6I=y
# CONFIG_PTP_1588_CLOCK is not set
CONFIG_GPIO_SIFIVE=y
+CONFIG_CPU_THERMAL=y
+CONFIG_DEVFREQ_THERMAL=y
+CONFIG_RZG2L_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_SUNXI_WATCHDOG=y
+CONFIG_RENESAS_RZG2LWDT=y
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
+CONFIG_REGULATOR_GPIO=y
CONFIG_DRM=m
CONFIG_DRM_RADEON=m
CONFIG_DRM_NOUVEAU=m
@@ -153,39 +177,67 @@ CONFIG_DRM_SUN4I=m
CONFIG_DRM_VIRTIO_GPU=m
CONFIG_FB=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_SOUND=y
+CONFIG_SND=y
+CONFIG_SND_SOC=y
+CONFIG_SND_SOC_RZ=m
+CONFIG_SND_SOC_WM8978=m
+CONFIG_SND_SIMPLE_CARD=m
CONFIG_USB=y
+CONFIG_USB_OTG=y
CONFIG_USB_XHCI_HCD=y
CONFIG_USB_XHCI_PLATFORM=y
CONFIG_USB_EHCI_HCD=y
CONFIG_USB_EHCI_HCD_PLATFORM=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
+CONFIG_USB_RENESAS_USBHS=m
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
CONFIG_USB_MUSB_HDRC=m
CONFIG_USB_MUSB_SUNXI=m
CONFIG_NOP_USB_XCEIV=m
+CONFIG_USB_GADGET=y
+CONFIG_USB_RENESAS_USBHS_UDC=m
+CONFIG_USB_CONFIGFS=m
+CONFIG_USB_CONFIGFS_SERIAL=y
+CONFIG_USB_CONFIGFS_ACM=y
+CONFIG_USB_CONFIGFS_OBEX=y
+CONFIG_USB_CONFIGFS_NCM=y
+CONFIG_USB_CONFIGFS_ECM=y
+CONFIG_USB_CONFIGFS_ECM_SUBSET=y
+CONFIG_USB_CONFIGFS_RNDIS=y
+CONFIG_USB_CONFIGFS_EEM=y
+CONFIG_USB_CONFIGFS_MASS_STORAGE=y
+CONFIG_USB_CONFIGFS_F_FS=y
CONFIG_MMC=y
CONFIG_MMC_SDHCI=y
CONFIG_MMC_SDHCI_PLTFM=y
CONFIG_MMC_SDHCI_CADENCE=y
CONFIG_MMC_SPI=y
+CONFIG_MMC_SDHI=y
CONFIG_MMC_SUNXI=y
CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_SUN6I=y
CONFIG_DMADEVICES=y
CONFIG_DMA_SUN6I=m
+CONFIG_RZ_DMAC=y
CONFIG_VIRTIO_PCI=y
CONFIG_VIRTIO_BALLOON=y
CONFIG_VIRTIO_INPUT=y
CONFIG_VIRTIO_MMIO=y
+CONFIG_RENESAS_OSTM=y
CONFIG_SUN8I_DE2_CCU=m
CONFIG_SUN50I_IOMMU=y
CONFIG_RPMSG_CHAR=y
CONFIG_RPMSG_CTRL=y
CONFIG_RPMSG_VIRTIO=y
CONFIG_ARCH_R9A07G043=y
+CONFIG_IIO=y
+CONFIG_RZG2L_ADC=m
+CONFIG_RESET_RZG2L_USBPHY_CTRL=y
CONFIG_PHY_SUN4I_USB=m
+CONFIG_PHY_RCAR_GEN3_USB2=y
CONFIG_LIBNVDIMM=y
CONFIG_NVMEM_SUNXI_SID=y
CONFIG_EXT4_FS=y
diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h
index d5604d2073bc..7dad0cf9d701 100644
--- a/arch/riscv/include/asm/acpi.h
+++ b/arch/riscv/include/asm/acpi.h
@@ -66,6 +66,8 @@ int acpi_get_riscv_isa(struct acpi_table_header *table,
unsigned int cpu, const char **isa);
static inline int acpi_numa_get_nid(unsigned int cpu) { return NUMA_NO_NODE; }
+void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size,
+ u32 *cboz_size, u32 *cbop_size);
#else
static inline void acpi_init_rintc_map(void) { }
static inline struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu)
@@ -79,6 +81,10 @@ static inline int acpi_get_riscv_isa(struct acpi_table_header *table,
return -EINVAL;
}
+static inline void acpi_get_cbo_block_size(struct acpi_table_header *table,
+ u32 *cbom_size, u32 *cboz_size,
+ u32 *cbop_size) { }
+
#endif /* CONFIG_ACPI */
#endif /*_ASM_ACPI_H*/
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index 61ba8ed43d8f..36b955c762ba 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -25,7 +25,6 @@ DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
DECLARE_DO_ERROR_INFO(do_trap_break);
-asmlinkage unsigned long get_overflow_stack(void);
asmlinkage void handle_bad_stack(struct pt_regs *regs);
asmlinkage void do_page_fault(struct pt_regs *regs);
asmlinkage void do_irq(struct pt_regs *regs);
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 114bbadaef41..b0487b39e674 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -82,6 +82,47 @@
.endr
.endm
+#ifdef CONFIG_SMP
+#ifdef CONFIG_32BIT
+#define PER_CPU_OFFSET_SHIFT 2
+#else
+#define PER_CPU_OFFSET_SHIFT 3
+#endif
+
+.macro asm_per_cpu dst sym tmp
+ REG_L \tmp, TASK_TI_CPU_NUM(tp)
+ slli \tmp, \tmp, PER_CPU_OFFSET_SHIFT
+ la \dst, __per_cpu_offset
+ add \dst, \dst, \tmp
+ REG_L \tmp, 0(\dst)
+ la \dst, \sym
+ add \dst, \dst, \tmp
+.endm
+#else /* CONFIG_SMP */
+.macro asm_per_cpu dst sym tmp
+ la \dst, \sym
+.endm
+#endif /* CONFIG_SMP */
+
+.macro load_per_cpu dst ptr tmp
+ asm_per_cpu \dst \ptr \tmp
+ REG_L \dst, 0(\dst)
+.endm
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+/* gp is used as the shadow call stack pointer instead */
+.macro load_global_pointer
+.endm
+#else
+/* load __global_pointer to gp */
+.macro load_global_pointer
+.option push
+.option norelax
+ la gp, __global_pointer$
+.option pop
+.endm
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
/* save all GPs except x1 ~ x5 */
.macro save_from_x6_to_x31
REG_S x6, PT_T1(sp)
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
index d0345bd659c9..13b7d35648a9 100644
--- a/arch/riscv/include/asm/cpufeature.h
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -31,5 +31,6 @@ DECLARE_PER_CPU(long, misaligned_access_speed);
extern struct riscv_isainfo hart_isa[NR_CPUS];
void check_unaligned_access(int cpu);
+void riscv_user_isa_enable(void);
#endif
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index b55b434f0059..0ac18a4135be 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -95,25 +95,25 @@ asm volatile(ALTERNATIVE( \
#endif
/*
- * dcache.ipa rs1 (invalidate, physical address)
+ * th.dcache.ipa rs1 (invalidate, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01010 rs1 000 00000 0001011
- * dache.iva rs1 (invalida, virtual address)
+ * th.dache.iva rs1 (invalida, virtual address)
* 0000001 00110 rs1 000 00000 0001011
*
- * dcache.cpa rs1 (clean, physical address)
+ * th.dcache.cpa rs1 (clean, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01001 rs1 000 00000 0001011
- * dcache.cva rs1 (clean, virtual address)
+ * th.dcache.cva rs1 (clean, virtual address)
* 0000001 00101 rs1 000 00000 0001011
*
- * dcache.cipa rs1 (clean then invalidate, physical address)
+ * th.dcache.cipa rs1 (clean then invalidate, physical address)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000001 01011 rs1 000 00000 0001011
- * dcache.civa rs1 (... virtual address)
+ * th.dcache.civa rs1 (... virtual address)
* 0000001 00111 rs1 000 00000 0001011
*
- * sync.s (make sure all cache operations finished)
+ * th.sync.s (make sure all cache operations finished)
* | 31 - 25 | 24 - 20 | 19 - 15 | 14 - 12 | 11 - 7 | 6 - 0 |
* 0000000 11001 00000 000 00000 0001011
*/
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 6fc51c1b34cf..fe6656af967a 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -72,6 +72,7 @@
#ifndef __ASSEMBLY__
#include <linux/jump_label.h>
+#include <asm/cpufeature.h>
unsigned long riscv_get_elf_hwcap(void);
@@ -139,6 +140,21 @@ l_yes:
return true;
}
+static __always_inline bool riscv_cpu_has_extension_likely(int cpu, const unsigned long ext)
+{
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_likely(ext))
+ return true;
+
+ return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
+}
+
+static __always_inline bool riscv_cpu_has_extension_unlikely(int cpu, const unsigned long ext)
+{
+ if (IS_ENABLED(CONFIG_RISCV_ALTERNATIVE) && riscv_has_extension_unlikely(ext))
+ return true;
+
+ return __riscv_isa_extension_available(hart_isa[cpu].isa, ext);
+}
#endif
#endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
index 78936f4ff513..5c48f48e79a6 100644
--- a/arch/riscv/include/asm/hwprobe.h
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -8,6 +8,11 @@
#include <uapi/asm/hwprobe.h>
-#define RISCV_HWPROBE_MAX_KEY 5
+#define RISCV_HWPROBE_MAX_KEY 6
+
+static inline bool riscv_hwprobe_key_is_valid(__s64 key)
+{
+ return key >= 0 && key <= RISCV_HWPROBE_MAX_KEY;
+}
#endif
diff --git a/arch/riscv/include/asm/irq_stack.h b/arch/riscv/include/asm/irq_stack.h
index e4042d297580..6441ded3b0cf 100644
--- a/arch/riscv/include/asm/irq_stack.h
+++ b/arch/riscv/include/asm/irq_stack.h
@@ -12,6 +12,9 @@
DECLARE_PER_CPU(ulong *, irq_stack_ptr);
+asmlinkage void call_on_irq_stack(struct pt_regs *regs,
+ void (*func)(struct pt_regs *));
+
#ifdef CONFIG_VMAP_STACK
/*
* To ensure that VMAP'd stack overflow detection works correctly, all VMAP'd
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 5488ecc337b6..57e887bfa34c 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -33,8 +33,8 @@
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
#endif
/*
- * By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
- * define the PAGE_OFFSET value for SV39.
+ * By default, CONFIG_PAGE_OFFSET value corresponds to SV57 address space so
+ * define the PAGE_OFFSET value for SV48 and SV39.
*/
#define PAGE_OFFSET_L4 _AC(0xffffaf8000000000, UL)
#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h
index 59ba1fbaf784..00f3369570a8 100644
--- a/arch/riscv/include/asm/pgtable-32.h
+++ b/arch/riscv/include/asm/pgtable-32.h
@@ -33,4 +33,7 @@
_PAGE_WRITE | _PAGE_EXEC | \
_PAGE_USER | _PAGE_GLOBAL))
+static const __maybe_unused int pgtable_l4_enabled;
+static const __maybe_unused int pgtable_l5_enabled;
+
#endif /* _ASM_RISCV_PGTABLE_32_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index b2ba3f79cfe9..c8e8867c42f6 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -811,7 +811,7 @@ extern pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
* bit 5: _PAGE_PROT_NONE (zero)
* bit 6: exclusive marker
* bits 7 to 11: swap type
- * bits 11 to XLEN-1: swap offset
+ * bits 12 to XLEN-1: swap offset
*/
#define __SWP_TYPE_SHIFT 7
#define __SWP_TYPE_BITS 5
@@ -914,7 +914,6 @@ extern uintptr_t _dtb_early_pa;
#define dtb_early_pa _dtb_early_pa
#endif /* CONFIG_XIP_KERNEL */
extern u64 satp_mode;
-extern bool pgtable_l4_enabled;
void paging_init(void);
void misc_mem_init(void);
diff --git a/arch/riscv/include/asm/scs.h b/arch/riscv/include/asm/scs.h
new file mode 100644
index 000000000000..0e45db78b24b
--- /dev/null
+++ b/arch/riscv/include/asm/scs.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifdef __ASSEMBLY__
+#include <asm/asm-offsets.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/* Load init_shadow_call_stack to gp. */
+.macro scs_load_init_stack
+ la gp, init_shadow_call_stack
+ XIP_FIXUP_OFFSET gp
+.endm
+
+/* Load the per-CPU IRQ shadow call stack to gp. */
+.macro scs_load_irq_stack tmp
+ load_per_cpu gp, irq_shadow_call_stack_ptr, \tmp
+.endm
+
+/* Load task_scs_sp(current) to gp. */
+.macro scs_load_current
+ REG_L gp, TASK_TI_SCS_SP(tp)
+.endm
+
+/* Load task_scs_sp(current) to gp, but only if tp has changed. */
+.macro scs_load_current_if_task_changed prev
+ beq \prev, tp, _skip_scs
+ scs_load_current
+_skip_scs:
+.endm
+
+/* Save gp to task_scs_sp(current). */
+.macro scs_save_current
+ REG_S gp, TASK_TI_SCS_SP(tp)
+.endm
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+.macro scs_load_init_stack
+.endm
+.macro scs_load_irq_stack tmp
+.endm
+.macro scs_load_current
+.endm
+.macro scs_load_current_if_task_changed prev
+.endm
+.macro scs_save_current
+.endm
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 1833beb00489..574779900bfb 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -34,9 +34,6 @@
#ifndef __ASSEMBLY__
-extern long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE / sizeof(long)];
-extern unsigned long spin_shadow_stack;
-
#include <asm/processor.h>
#include <asm/csr.h>
@@ -60,8 +57,20 @@ struct thread_info {
long user_sp; /* User stack pointer */
int cpu;
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *scs_base;
+ void *scs_sp;
+#endif
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+#define INIT_SCS \
+ .scs_base = init_shadow_call_stack, \
+ .scs_sp = init_shadow_call_stack,
+#else
+#define INIT_SCS
+#endif
+
/*
* macros/functions for gaining access to the thread information structure
*
@@ -71,6 +80,7 @@ struct thread_info {
{ \
.flags = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
+ INIT_SCS \
}
void arch_release_task_struct(struct task_struct *tsk);
diff --git a/arch/riscv/include/asm/vdso/processor.h b/arch/riscv/include/asm/vdso/processor.h
index 14f5d27783b8..96b65a5396df 100644
--- a/arch/riscv/include/asm/vdso/processor.h
+++ b/arch/riscv/include/asm/vdso/processor.h
@@ -14,7 +14,7 @@ static inline void cpu_relax(void)
__asm__ __volatile__ ("div %0, %0, zero" : "=r" (dummy));
#endif
-#ifdef __riscv_zihintpause
+#ifdef CONFIG_TOOLCHAIN_HAS_ZIHINTPAUSE
/*
* Reduce instruction retirement.
* This assumes the PC changes.
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
index d43e306ce2f9..b659ffcfcdb4 100644
--- a/arch/riscv/include/uapi/asm/hwprobe.h
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -29,6 +29,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_EXT_ZBA (1 << 3)
#define RISCV_HWPROBE_EXT_ZBB (1 << 4)
#define RISCV_HWPROBE_EXT_ZBS (1 << 5)
+#define RISCV_HWPROBE_EXT_ZICBOZ (1 << 6)
#define RISCV_HWPROBE_KEY_CPUPERF_0 5
#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
@@ -36,6 +37,7 @@ struct riscv_hwprobe {
#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
+#define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6
/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
#endif
diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
index 56cb2c986c48..e619edc8b0cc 100644
--- a/arch/riscv/kernel/acpi.c
+++ b/arch/riscv/kernel/acpi.c
@@ -14,9 +14,10 @@
*/
#include <linux/acpi.h>
+#include <linux/efi.h>
#include <linux/io.h>
+#include <linux/memblock.h>
#include <linux/pci.h>
-#include <linux/efi.h>
int acpi_noirq = 1; /* skip ACPI IRQ initialization */
int acpi_disabled = 1;
@@ -217,7 +218,89 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
{
- return (void __iomem *)memremap(phys, size, MEMREMAP_WB);
+ efi_memory_desc_t *md, *region = NULL;
+ pgprot_t prot;
+
+ if (WARN_ON_ONCE(!efi_enabled(EFI_MEMMAP)))
+ return NULL;
+
+ for_each_efi_memory_desc(md) {
+ u64 end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
+
+ if (phys < md->phys_addr || phys >= end)
+ continue;
+
+ if (phys + size > end) {
+ pr_warn(FW_BUG "requested region covers multiple EFI memory regions\n");
+ return NULL;
+ }
+ region = md;
+ break;
+ }
+
+ /*
+ * It is fine for AML to remap regions that are not represented in the
+ * EFI memory map at all, as it only describes normal memory, and MMIO
+ * regions that require a virtual mapping to make them accessible to
+ * the EFI runtime services.
+ */
+ prot = PAGE_KERNEL_IO;
+ if (region) {
+ switch (region->type) {
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ case EFI_PERSISTENT_MEMORY:
+ if (memblock_is_map_memory(phys) ||
+ !memblock_is_region_memory(phys, size)) {
+ pr_warn(FW_BUG "requested region covers kernel memory\n");
+ return NULL;
+ }
+
+ /*
+ * Mapping kernel memory is permitted if the region in
+ * question is covered by a single memblock with the
+ * NOMAP attribute set: this enables the use of ACPI
+ * table overrides passed via initramfs.
+ * This particular use case only requires read access.
+ */
+ fallthrough;
+
+ case EFI_RUNTIME_SERVICES_CODE:
+ /*
+ * This would be unusual, but not problematic per se,
+ * as long as we take care not to create a writable
+ * mapping for executable code.
+ */
+ prot = PAGE_KERNEL_RO;
+ break;
+
+ case EFI_ACPI_RECLAIM_MEMORY:
+ /*
+ * ACPI reclaim memory is used to pass firmware tables
+ * and other data that is intended for consumption by
+ * the OS only, which may decide it wants to reclaim
+ * that memory and use it for something else. We never
+ * do that, but we usually add it to the linear map
+ * anyway, in which case we should use the existing
+ * mapping.
+ */
+ if (memblock_is_map_memory(phys))
+ return (void __iomem *)__va(phys);
+ fallthrough;
+
+ default:
+ if (region->attribute & EFI_MEMORY_WB)
+ prot = PAGE_KERNEL;
+ else if ((region->attribute & EFI_MEMORY_WC) ||
+ (region->attribute & EFI_MEMORY_WT))
+ prot = pgprot_writecombine(PAGE_KERNEL);
+ }
+ }
+
+ return ioremap_prot(phys, size, pgprot_val(prot));
}
#ifdef CONFIG_PCI
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index d6a75aac1d27..a03129f40c46 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -14,6 +14,7 @@
#include <asm/thread_info.h>
#include <asm/ptrace.h>
#include <asm/cpu_ops_sbi.h>
+#include <asm/stacktrace.h>
#include <asm/suspend.h>
void asm_offsets(void);
@@ -38,7 +39,11 @@ void asm_offsets(void)
OFFSET(TASK_TI_PREEMPT_COUNT, task_struct, thread_info.preempt_count);
OFFSET(TASK_TI_KERNEL_SP, task_struct, thread_info.kernel_sp);
OFFSET(TASK_TI_USER_SP, task_struct, thread_info.user_sp);
+#ifdef CONFIG_SHADOW_CALL_STACK
+ OFFSET(TASK_TI_SCS_SP, task_struct, thread_info.scs_sp);
+#endif
+ OFFSET(TASK_TI_CPU_NUM, task_struct, thread_info.cpu);
OFFSET(TASK_THREAD_F0, task_struct, thread.fstate.f[0]);
OFFSET(TASK_THREAD_F1, task_struct, thread.fstate.f[1]);
OFFSET(TASK_THREAD_F2, task_struct, thread.fstate.f[2]);
@@ -479,4 +484,8 @@ void asm_offsets(void)
OFFSET(KERNEL_MAP_VIRT_ADDR, kernel_mapping, virt_addr);
OFFSET(SBI_HART_BOOT_TASK_PTR_OFFSET, sbi_hart_boot_data, task_ptr);
OFFSET(SBI_HART_BOOT_STACK_PTR_OFFSET, sbi_hart_boot_data, stack_ptr);
+
+ DEFINE(STACKFRAME_SIZE_ON_STACK, ALIGN(sizeof(struct stackframe), STACK_ALIGN));
+ OFFSET(STACKFRAME_FP, stackframe, fp);
+ OFFSET(STACKFRAME_RA, stackframe, ra);
}
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index e3803822ab5a..7aeba01dcfd4 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -93,10 +93,10 @@ static bool riscv_isa_extension_check(int id)
return true;
case RISCV_ISA_EXT_ZICBOZ:
if (!riscv_cboz_block_size) {
- pr_err("Zicboz detected in ISA string, but no cboz-block-size found\n");
+ pr_err("Zicboz detected in ISA string, disabling as no cboz-block-size found\n");
return false;
} else if (!is_power_of_2(riscv_cboz_block_size)) {
- pr_err("cboz-block-size present, but is not a power-of-2\n");
+ pr_err("Zicboz disabled as cboz-block-size present, but is not a power-of-2\n");
return false;
}
return true;
@@ -206,10 +206,11 @@ static void __init riscv_parse_isa_string(unsigned long *this_hwcap, struct risc
switch (*ext) {
case 's':
/*
- * Workaround for invalid single-letter 's' & 'u'(QEMU).
+ * Workaround for invalid single-letter 's' & 'u' (QEMU).
* No need to set the bit in riscv_isa as 's' & 'u' are
- * not valid ISA extensions. It works until multi-letter
- * extension starting with "Su" appears.
+ * not valid ISA extensions. It works unless the first
+ * multi-letter extension in the ISA string begins with
+ * "Su" and is not prefixed with an underscore.
*/
if (ext[-1] != '_' && ext[1] == 'u') {
++isa;
@@ -655,6 +656,12 @@ static int check_unaligned_access_boot_cpu(void)
arch_initcall(check_unaligned_access_boot_cpu);
+void riscv_user_isa_enable(void)
+{
+ if (riscv_cpu_has_extension_unlikely(smp_processor_id(), RISCV_ISA_EXT_ZICBOZ))
+ csr_set(CSR_SENVCFG, ENVCFG_CBZE);
+}
+
#ifdef CONFIG_RISCV_ALTERNATIVE
/*
* Alternative patch sites consider 48 bits when determining when to patch
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 143a2bb3e697..9f92c067f7e1 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -9,10 +9,15 @@
#include <asm/asm.h>
#include <asm/csr.h>
+#include <asm/scs.h>
#include <asm/unistd.h>
+#include <asm/page.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
#include <asm/errata_list.h>
+#include <linux/sizes.h>
+
+ .section .irqentry.text, "ax"
SYM_CODE_START(handle_exception)
/*
@@ -73,10 +78,11 @@ _save_context:
csrw CSR_SCRATCH, x0
/* Load the global pointer */
-.option push
-.option norelax
- la gp, __global_pointer$
-.option pop
+ load_global_pointer
+
+ /* Load the kernel shadow call stack pointer if coming from userspace */
+ scs_load_current_if_task_changed s5
+
move a0, sp /* pt_regs */
la ra, ret_from_exception
@@ -123,6 +129,9 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
addi s0, sp, PT_SIZE_ON_STACK
REG_S s0, TASK_TI_KERNEL_SP(tp)
+ /* Save the kernel shadow call stack pointer */
+ scs_save_current
+
/*
* Save TP into the scratch register , so we can find the kernel data
* structures again.
@@ -170,67 +179,15 @@ SYM_CODE_END(ret_from_exception)
#ifdef CONFIG_VMAP_STACK
SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
- /*
- * Takes the psuedo-spinlock for the shadow stack, in case multiple
- * harts are concurrently overflowing their kernel stacks. We could
- * store any value here, but since we're overflowing the kernel stack
- * already we only have SP to use as a scratch register. So we just
- * swap in the address of the spinlock, as that's definately non-zero.
- *
- * Pairs with a store_release in handle_bad_stack().
- */
-1: la sp, spin_shadow_stack
- REG_AMOSWAP_AQ sp, sp, (sp)
- bnez sp, 1b
-
- la sp, shadow_stack
- addi sp, sp, SHADOW_OVERFLOW_STACK_SIZE
+ /* we reach here from kernel context, sscratch must be 0 */
+ csrrw x31, CSR_SCRATCH, x31
+ asm_per_cpu sp, overflow_stack, x31
+ li x31, OVERFLOW_STACK_SIZE
+ add sp, sp, x31
+ /* zero out x31 again and restore x31 */
+ xor x31, x31, x31
+ csrrw x31, CSR_SCRATCH, x31
- //save caller register to shadow stack
- addi sp, sp, -(PT_SIZE_ON_STACK)
- REG_S x1, PT_RA(sp)
- REG_S x5, PT_T0(sp)
- REG_S x6, PT_T1(sp)
- REG_S x7, PT_T2(sp)
- REG_S x10, PT_A0(sp)
- REG_S x11, PT_A1(sp)
- REG_S x12, PT_A2(sp)
- REG_S x13, PT_A3(sp)
- REG_S x14, PT_A4(sp)
- REG_S x15, PT_A5(sp)
- REG_S x16, PT_A6(sp)
- REG_S x17, PT_A7(sp)
- REG_S x28, PT_T3(sp)
- REG_S x29, PT_T4(sp)
- REG_S x30, PT_T5(sp)
- REG_S x31, PT_T6(sp)
-
- la ra, restore_caller_reg
- tail get_overflow_stack
-
-restore_caller_reg:
- //save per-cpu overflow stack
- REG_S a0, -8(sp)
- //restore caller register from shadow_stack
- REG_L x1, PT_RA(sp)
- REG_L x5, PT_T0(sp)
- REG_L x6, PT_T1(sp)
- REG_L x7, PT_T2(sp)
- REG_L x10, PT_A0(sp)
- REG_L x11, PT_A1(sp)
- REG_L x12, PT_A2(sp)
- REG_L x13, PT_A3(sp)
- REG_L x14, PT_A4(sp)
- REG_L x15, PT_A5(sp)
- REG_L x16, PT_A6(sp)
- REG_L x17, PT_A7(sp)
- REG_L x28, PT_T3(sp)
- REG_L x29, PT_T4(sp)
- REG_L x30, PT_T5(sp)
- REG_L x31, PT_T6(sp)
-
- //load per-cpu overflow stack
- REG_L sp, -8(sp)
addi sp, sp, -(PT_SIZE_ON_STACK)
//save context to overflow stack
@@ -268,6 +225,43 @@ SYM_CODE_START(ret_from_fork)
tail syscall_exit_to_user_mode
SYM_CODE_END(ret_from_fork)
+#ifdef CONFIG_IRQ_STACKS
+/*
+ * void call_on_irq_stack(struct pt_regs *regs,
+ * void (*func)(struct pt_regs *));
+ *
+ * Calls func(regs) using the per-CPU IRQ stack.
+ */
+SYM_FUNC_START(call_on_irq_stack)
+ /* Create a frame record to save ra and s0 (fp) */
+ addi sp, sp, -STACKFRAME_SIZE_ON_STACK
+ REG_S ra, STACKFRAME_RA(sp)
+ REG_S s0, STACKFRAME_FP(sp)
+ addi s0, sp, STACKFRAME_SIZE_ON_STACK
+
+ /* Switch to the per-CPU shadow call stack */
+ scs_save_current
+ scs_load_irq_stack t0
+
+ /* Switch to the per-CPU IRQ stack and call the handler */
+ load_per_cpu t0, irq_stack_ptr, t1
+ li t1, IRQ_STACK_SIZE
+ add sp, t0, t1
+ jalr a1
+
+ /* Switch back to the thread shadow call stack */
+ scs_load_current
+
+ /* Switch back to the thread stack and restore ra and s0 */
+ addi sp, s0, -STACKFRAME_SIZE_ON_STACK
+ REG_L ra, STACKFRAME_RA(sp)
+ REG_L s0, STACKFRAME_FP(sp)
+ addi sp, sp, STACKFRAME_SIZE_ON_STACK
+
+ ret
+SYM_FUNC_END(call_on_irq_stack)
+#endif /* CONFIG_IRQ_STACKS */
+
/*
* Integer register context switch
* The callee-saved registers must be saved and restored.
@@ -297,6 +291,8 @@ SYM_FUNC_START(__switch_to)
REG_S s9, TASK_THREAD_S9_RA(a3)
REG_S s10, TASK_THREAD_S10_RA(a3)
REG_S s11, TASK_THREAD_S11_RA(a3)
+ /* Save the kernel shadow call stack pointer */
+ scs_save_current
/* Restore context from next->thread */
REG_L ra, TASK_THREAD_RA_RA(a4)
REG_L sp, TASK_THREAD_SP_RA(a4)
@@ -314,6 +310,8 @@ SYM_FUNC_START(__switch_to)
REG_L s11, TASK_THREAD_S11_RA(a4)
/* The offset of thread_info in task_struct is zero. */
move tp, a1
+ /* Switch to the next shadow call stack */
+ scs_load_current
ret
SYM_FUNC_END(__switch_to)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 3710ea5d160f..18f97ec0f7ed 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -14,6 +14,7 @@
#include <asm/cpu_ops_sbi.h>
#include <asm/hwcap.h>
#include <asm/image.h>
+#include <asm/scs.h>
#include <asm/xip_fixup.h>
#include "efi-header.S"
@@ -110,10 +111,7 @@ relocate_enable_mmu:
csrw CSR_TVEC, a0
/* Reload the global pointer */
-.option push
-.option norelax
- la gp, __global_pointer$
-.option pop
+ load_global_pointer
/*
* Switch to kernel page tables. A full fence is necessary in order to
@@ -134,10 +132,7 @@ secondary_start_sbi:
csrw CSR_IP, zero
/* Load the global pointer */
- .option push
- .option norelax
- la gp, __global_pointer$
- .option pop
+ load_global_pointer
/*
* Disable FPU & VECTOR to detect illegal usage of
@@ -159,6 +154,7 @@ secondary_start_sbi:
XIP_FIXUP_OFFSET a3
add a3, a3, a1
REG_L sp, (a3)
+ scs_load_current
.Lsecondary_start_common:
@@ -228,10 +224,7 @@ pmp_done:
#endif /* CONFIG_RISCV_M_MODE */
/* Load the global pointer */
-.option push
-.option norelax
- la gp, __global_pointer$
-.option pop
+ load_global_pointer
/*
* Disable FPU & VECTOR to detect illegal usage of
@@ -298,6 +291,7 @@ clear_bss_done:
la sp, init_thread_union + THREAD_SIZE
XIP_FIXUP_OFFSET sp
addi sp, sp, -PT_SIZE_ON_STACK
+ scs_load_init_stack
#ifdef CONFIG_BUILTIN_DTB
la a0, __dtb_start
XIP_FIXUP_OFFSET a0
@@ -316,6 +310,7 @@ clear_bss_done:
la tp, init_task
la sp, init_thread_union + THREAD_SIZE
addi sp, sp, -PT_SIZE_ON_STACK
+ scs_load_current
#ifdef CONFIG_KASAN
call kasan_early_init
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 9cc0a7669271..9ceda02507ca 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -9,6 +9,7 @@
#include <linux/irqchip.h>
#include <linux/irqdomain.h>
#include <linux/module.h>
+#include <linux/scs.h>
#include <linux/seq_file.h>
#include <asm/sbi.h>
#include <asm/smp.h>
@@ -34,6 +35,24 @@ EXPORT_SYMBOL_GPL(riscv_get_intc_hwnode);
#ifdef CONFIG_IRQ_STACKS
#include <asm/irq_stack.h>
+DECLARE_PER_CPU(ulong *, irq_shadow_call_stack_ptr);
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+DEFINE_PER_CPU(ulong *, irq_shadow_call_stack_ptr);
+#endif
+
+static void init_irq_scs(void)
+{
+ int cpu;
+
+ if (!scs_is_enabled())
+ return;
+
+ for_each_possible_cpu(cpu)
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ scs_alloc(cpu_to_node(cpu));
+}
+
DEFINE_PER_CPU(ulong *, irq_stack_ptr);
#ifdef CONFIG_VMAP_STACK
@@ -61,40 +80,22 @@ static void init_irq_stacks(void)
#endif /* CONFIG_VMAP_STACK */
#ifdef CONFIG_SOFTIRQ_ON_OWN_STACK
+static void ___do_softirq(struct pt_regs *regs)
+{
+ __do_softirq();
+}
+
void do_softirq_own_stack(void)
{
-#ifdef CONFIG_IRQ_STACKS
- if (on_thread_stack()) {
- ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
- + IRQ_STACK_SIZE/sizeof(ulong);
- __asm__ __volatile(
- "addi sp, sp, -"RISCV_SZPTR "\n"
- REG_S" ra, (sp) \n"
- "addi sp, sp, -"RISCV_SZPTR "\n"
- REG_S" s0, (sp) \n"
- "addi s0, sp, 2*"RISCV_SZPTR "\n"
- "move sp, %[sp] \n"
- "call __do_softirq \n"
- "addi sp, s0, -2*"RISCV_SZPTR"\n"
- REG_L" s0, (sp) \n"
- "addi sp, sp, "RISCV_SZPTR "\n"
- REG_L" ra, (sp) \n"
- "addi sp, sp, "RISCV_SZPTR "\n"
- :
- : [sp] "r" (sp)
- : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
- "t0", "t1", "t2", "t3", "t4", "t5", "t6",
-#ifndef CONFIG_FRAME_POINTER
- "s0",
-#endif
- "memory");
- } else
-#endif
+ if (on_thread_stack())
+ call_on_irq_stack(NULL, ___do_softirq);
+ else
__do_softirq();
}
#endif /* CONFIG_SOFTIRQ_ON_OWN_STACK */
#else
+static void init_irq_scs(void) {}
static void init_irq_stacks(void) {}
#endif /* CONFIG_IRQ_STACKS */
@@ -106,6 +107,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
void __init init_IRQ(void)
{
+ init_irq_scs();
init_irq_stacks();
irqchip_init();
if (!handle_arch_irq)
diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S
index 059c5e216ae7..de0a4b35d01e 100644
--- a/arch/riscv/kernel/kexec_relocate.S
+++ b/arch/riscv/kernel/kexec_relocate.S
@@ -17,27 +17,17 @@ SYM_CODE_START(riscv_kexec_relocate)
* s1: (const) Phys address to jump to after relocation
* s2: (const) Phys address of the FDT image
* s3: (const) The hartid of the current hart
- * s4: Pointer to the destination address for the relocation
- * s5: (const) Number of words per page
- * s6: (const) 1, used for subtraction
- * s7: (const) kernel_map.va_pa_offset, used when switching MMU off
- * s8: (const) Physical address of the main loop
- * s9: (debug) indirection page counter
- * s10: (debug) entry counter
- * s11: (debug) copied words counter
+ * s4: (const) kernel_map.va_pa_offset, used when switching MMU off
+ * s5: Pointer to the destination address for the relocation
+ * s6: (const) Physical address of the main loop
*/
mv s0, a0
mv s1, a1
mv s2, a2
mv s3, a3
- mv s4, zero
- li s5, (PAGE_SIZE / RISCV_SZPTR)
- li s6, 1
- mv s7, a4
- mv s8, zero
- mv s9, zero
- mv s10, zero
- mv s11, zero
+ mv s4, a4
+ mv s5, zero
+ mv s6, zero
/* Disable / cleanup interrupts */
csrw CSR_SIE, zero
@@ -52,21 +42,27 @@ SYM_CODE_START(riscv_kexec_relocate)
* the start of the loop below so that we jump there in
* any case.
*/
- la s8, 1f
- sub s8, s8, s7
- csrw CSR_STVEC, s8
+ la s6, 1f
+ sub s6, s6, s4
+ csrw CSR_STVEC, s6
+
+ /*
+ * With C-extension, here we get 42 Bytes and the next
+ * .align directive would pad zeros here up to 44 Bytes.
+ * So manually put a nop here to avoid zeros padding.
+ */
+ nop
/* Process entries in a loop */
.align 2
1:
- addi s10, s10, 1
REG_L t0, 0(s0) /* t0 = *image->entry */
addi s0, s0, RISCV_SZPTR /* image->entry++ */
/* IND_DESTINATION entry ? -> save destination address */
andi t1, t0, 0x1
beqz t1, 2f
- andi s4, t0, ~0x1
+ andi s5, t0, ~0x1
j 1b
2:
@@ -74,9 +70,8 @@ SYM_CODE_START(riscv_kexec_relocate)
andi t1, t0, 0x2
beqz t1, 2f
andi s0, t0, ~0x2
- addi s9, s9, 1
csrw CSR_SATP, zero
- jalr zero, s8, 0
+ jr s6
2:
/* IND_DONE entry ? -> jump to done label */
@@ -92,14 +87,13 @@ SYM_CODE_START(riscv_kexec_relocate)
andi t1, t0, 0x8
beqz t1, 1b /* Unknown entry type, ignore it */
andi t0, t0, ~0x8
- mv t3, s5 /* i = num words per page */
+ li t3, (PAGE_SIZE / RISCV_SZPTR) /* i = num words per page */
3: /* copy loop */
REG_L t1, (t0) /* t1 = *src_ptr */
- REG_S t1, (s4) /* *dst_ptr = *src_ptr */
+ REG_S t1, (s5) /* *dst_ptr = *src_ptr */
addi t0, t0, RISCV_SZPTR /* stc_ptr++ */
- addi s4, s4, RISCV_SZPTR /* dst_ptr++ */
- sub t3, t3, s6 /* i-- */
- addi s11, s11, 1 /* c++ */
+ addi s5, s5, RISCV_SZPTR /* dst_ptr++ */
+ addi t3, t3, -0x1 /* i-- */
beqz t3, 1b /* copy done ? */
j 3b
@@ -146,7 +140,7 @@ SYM_CODE_START(riscv_kexec_relocate)
*/
fence.i
- jalr zero, a2, 0
+ jr a2
SYM_CODE_END(riscv_kexec_relocate)
riscv_kexec_relocate_end:
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 0624f44d43ec..535a837de55d 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -25,6 +25,7 @@
#include <asm/acpi.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/early_ioremap.h>
#include <asm/pgtable.h>
@@ -289,10 +290,13 @@ void __init setup_arch(char **cmdline_p)
riscv_fill_hwcap();
init_rt_signal_env();
apply_boot_alternatives();
+
if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) &&
riscv_isa_extension_available(NULL, ZICBOM))
riscv_noncoherent_supported();
riscv_set_dma_cache_alignment();
+
+ riscv_user_isa_enable();
}
static int __init topology_init(void)
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 1b8da4e40a4d..d1b0a6fc3adf 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -25,6 +25,8 @@
#include <linux/of.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/mm.h>
+
+#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
#include <asm/cpufeature.h>
#include <asm/irq.h>
@@ -253,6 +255,8 @@ asmlinkage __visible void smp_callin(void)
elf_hwcap &= ~COMPAT_HWCAP_ISA_V;
}
+ riscv_user_isa_enable();
+
/*
* Remote TLB flushes are ignored while the CPU is offline, so emit
* a local TLB flush right now just in case.
diff --git a/arch/riscv/kernel/suspend_entry.S b/arch/riscv/kernel/suspend_entry.S
index f7960c7c5f9e..d5cf8b575777 100644
--- a/arch/riscv/kernel/suspend_entry.S
+++ b/arch/riscv/kernel/suspend_entry.S
@@ -61,10 +61,7 @@ END(__cpu_suspend_enter)
SYM_TYPED_FUNC_START(__cpu_resume_enter)
/* Load the global pointer */
- .option push
- .option norelax
- la gp, __global_pointer$
- .option pop
+ load_global_pointer
#ifdef CONFIG_MMU
/* Save A0 and A1 */
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index b651ec698a91..c712037dbe10 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -145,26 +145,38 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair,
for_each_cpu(cpu, cpus) {
struct riscv_isainfo *isainfo = &hart_isa[cpu];
- if (riscv_isa_extension_available(isainfo->isa, ZBA))
- pair->value |= RISCV_HWPROBE_EXT_ZBA;
- else
- missing |= RISCV_HWPROBE_EXT_ZBA;
-
- if (riscv_isa_extension_available(isainfo->isa, ZBB))
- pair->value |= RISCV_HWPROBE_EXT_ZBB;
- else
- missing |= RISCV_HWPROBE_EXT_ZBB;
-
- if (riscv_isa_extension_available(isainfo->isa, ZBS))
- pair->value |= RISCV_HWPROBE_EXT_ZBS;
- else
- missing |= RISCV_HWPROBE_EXT_ZBS;
+#define EXT_KEY(ext) \
+ do { \
+ if (__riscv_isa_extension_available(isainfo->isa, RISCV_ISA_EXT_##ext)) \
+ pair->value |= RISCV_HWPROBE_EXT_##ext; \
+ else \
+ missing |= RISCV_HWPROBE_EXT_##ext; \
+ } while (false)
+
+ /*
+ * Only use EXT_KEY() for extensions which can be exposed to userspace,
+ * regardless of the kernel's configuration, as no other checks, besides
+ * presence in the hart_isa bitmap, are made.
+ */
+ EXT_KEY(ZBA);
+ EXT_KEY(ZBB);
+ EXT_KEY(ZBS);
+ EXT_KEY(ZICBOZ);
+#undef EXT_KEY
}
/* Now turn off reporting features if any CPU is missing it. */
pair->value &= ~missing;
}
+static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext)
+{
+ struct riscv_hwprobe pair;
+
+ hwprobe_isa_ext0(&pair, cpus);
+ return (pair.value & ext);
+}
+
static u64 hwprobe_misaligned(const struct cpumask *cpus)
{
int cpu;
@@ -215,6 +227,12 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
pair->value = hwprobe_misaligned(cpus);
break;
+ case RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE:
+ pair->value = 0;
+ if (hwprobe_ext0_has(cpus, RISCV_HWPROBE_EXT_ZICBOZ))
+ pair->value = riscv_cboz_block_size;
+ break;
+
/*
* For forward compatibility, unknown keys don't fail the whole
* call, but get their element key set to -1 and value set to 0
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index fae8f610d867..c9d698518bae 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -360,34 +360,10 @@ static void noinstr handle_riscv_irq(struct pt_regs *regs)
asmlinkage void noinstr do_irq(struct pt_regs *regs)
{
irqentry_state_t state = irqentry_enter(regs);
-#ifdef CONFIG_IRQ_STACKS
- if (on_thread_stack()) {
- ulong *sp = per_cpu(irq_stack_ptr, smp_processor_id())
- + IRQ_STACK_SIZE/sizeof(ulong);
- __asm__ __volatile(
- "addi sp, sp, -"RISCV_SZPTR "\n"
- REG_S" ra, (sp) \n"
- "addi sp, sp, -"RISCV_SZPTR "\n"
- REG_S" s0, (sp) \n"
- "addi s0, sp, 2*"RISCV_SZPTR "\n"
- "move sp, %[sp] \n"
- "move a0, %[regs] \n"
- "call handle_riscv_irq \n"
- "addi sp, s0, -2*"RISCV_SZPTR"\n"
- REG_L" s0, (sp) \n"
- "addi sp, sp, "RISCV_SZPTR "\n"
- REG_L" ra, (sp) \n"
- "addi sp, sp, "RISCV_SZPTR "\n"
- :
- : [sp] "r" (sp), [regs] "r" (regs)
- : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
- "t0", "t1", "t2", "t3", "t4", "t5", "t6",
-#ifndef CONFIG_FRAME_POINTER
- "s0",
-#endif
- "memory");
- } else
-#endif
+
+ if (IS_ENABLED(CONFIG_IRQ_STACKS) && on_thread_stack())
+ call_on_irq_stack(regs, handle_riscv_irq);
+ else
handle_riscv_irq(regs);
irqentry_exit(regs, state);
@@ -410,48 +386,14 @@ int is_valid_bugaddr(unsigned long pc)
#endif /* CONFIG_GENERIC_BUG */
#ifdef CONFIG_VMAP_STACK
-/*
- * Extra stack space that allows us to provide panic messages when the kernel
- * has overflowed its stack.
- */
-static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
+DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
overflow_stack)__aligned(16);
-/*
- * A temporary stack for use by handle_kernel_stack_overflow. This is used so
- * we can call into C code to get the per-hart overflow stack. Usage of this
- * stack must be protected by spin_shadow_stack.
- */
-long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16);
-
-/*
- * A pseudo spinlock to protect the shadow stack from being used by multiple
- * harts concurrently. This isn't a real spinlock because the lock side must
- * be taken without a valid stack and only a single register, it's only taken
- * while in the process of panicing anyway so the performance and error
- * checking a proper spinlock gives us doesn't matter.
- */
-unsigned long spin_shadow_stack;
-
-asmlinkage unsigned long get_overflow_stack(void)
-{
- return (unsigned long)this_cpu_ptr(overflow_stack) +
- OVERFLOW_STACK_SIZE;
-}
asmlinkage void handle_bad_stack(struct pt_regs *regs)
{
unsigned long tsk_stk = (unsigned long)current->stack;
unsigned long ovf_stk = (unsigned long)this_cpu_ptr(overflow_stack);
- /*
- * We're done with the shadow stack by this point, as we're on the
- * overflow stack. Tell any other concurrent overflowing harts that
- * they can proceed with panicing by releasing the pseudo-spinlock.
- *
- * This pairs with an amoswap.aq in handle_kernel_stack_overflow.
- */
- smp_store_release(&spin_shadow_stack, 0);
-
console_verbose();
pr_emerg("Insufficient stack space to handle exception!\n");
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index e8aa7c380007..9b517fe1b8a8 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -36,7 +36,7 @@ CPPFLAGS_vdso.lds += -DHAS_VGETTIMEOFDAY
endif
# Disable -pg to prevent insert call site
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
index d40bec6ac078..cadf725ef798 100644
--- a/arch/riscv/kernel/vdso/hwprobe.c
+++ b/arch/riscv/kernel/vdso/hwprobe.c
@@ -37,7 +37,7 @@ int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
/* This is something we can handle, fill out the pairs. */
while (p < end) {
- if (p->key <= RISCV_HWPROBE_MAX_KEY) {
+ if (riscv_hwprobe_key_is_valid(p->key)) {
p->value = avd->all_cpu_hwprobe_values[p->key];
} else {
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index f1387272a551..55a34f2020a8 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -3,7 +3,9 @@
* Copyright (C) 2017 SiFive
*/
+#include <linux/acpi.h>
#include <linux/of.h>
+#include <asm/acpi.h>
#include <asm/cacheflush.h>
#ifdef CONFIG_SMP
@@ -124,13 +126,24 @@ void __init riscv_init_cbo_blocksizes(void)
unsigned long cbom_hartid, cboz_hartid;
u32 cbom_block_size = 0, cboz_block_size = 0;
struct device_node *node;
+ struct acpi_table_header *rhct;
+ acpi_status status;
+
+ if (acpi_disabled) {
+ for_each_of_cpu_node(node) {
+ /* set block-size for cbom and/or cboz extension if available */
+ cbo_get_block_size(node, "riscv,cbom-block-size",
+ &cbom_block_size, &cbom_hartid);
+ cbo_get_block_size(node, "riscv,cboz-block-size",
+ &cboz_block_size, &cboz_hartid);
+ }
+ } else {
+ status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
+ if (ACPI_FAILURE(status))
+ return;
- for_each_of_cpu_node(node) {
- /* set block-size for cbom and/or cboz extension if available */
- cbo_get_block_size(node, "riscv,cbom-block-size",
- &cbom_block_size, &cbom_hartid);
- cbo_get_block_size(node, "riscv,cboz-block-size",
- &cboz_block_size, &cboz_hartid);
+ acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL);
+ acpi_put_table((struct acpi_table_header *)rhct);
}
if (cbom_block_size)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index d9a4e8702864..b128ed3c5a37 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -49,10 +49,12 @@ u64 satp_mode __ro_after_init = SATP_MODE_32;
#endif
EXPORT_SYMBOL(satp_mode);
+#ifdef CONFIG_64BIT
bool pgtable_l4_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_XIP_KERNEL);
EXPORT_SYMBOL(pgtable_l4_enabled);
EXPORT_SYMBOL(pgtable_l5_enabled);
+#endif
phys_addr_t phys_ram_base __ro_after_init;
EXPORT_SYMBOL(phys_ram_base);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 20a9f991a6d7..e9090b38f811 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -384,6 +384,9 @@ static int __init ptdump_init(void)
kernel_ptd_info.base_addr = KERN_VIRT_START;
+ pg_level[1].name = pgtable_l5_enabled ? "P4D" : "PGD";
+ pg_level[2].name = pgtable_l4_enabled ? "PUD" : "PGD";
+
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
pg_level[i].mask |= pte_bits[j].mask;
diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
index 9e6476719abb..280b0eb352b8 100644
--- a/arch/riscv/purgatory/Makefile
+++ b/arch/riscv/purgatory/Makefile
@@ -81,6 +81,14 @@ ifdef CONFIG_CFI_CLANG
PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_CFI)
endif
+ifdef CONFIG_RELOCATABLE
+PURGATORY_CFLAGS_REMOVE += -fPIE
+endif
+
+ifdef CONFIG_SHADOW_CALL_STACK
+PURGATORY_CFLAGS_REMOVE += $(CC_FLAGS_SCS)
+endif
+
CFLAGS_REMOVE_purgatory.o += $(PURGATORY_CFLAGS_REMOVE)
CFLAGS_purgatory.o += $(PURGATORY_CFLAGS)
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b0d67ac8695f..3bec98d20283 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -236,6 +236,7 @@ config S390
select THREAD_INFO_IN_TASK
select TRACE_IRQFLAGS_SUPPORT
select TTY
+ select USER_STACKTRACE_SUPPORT
select VIRT_CPU_ACCOUNTING
select ZONE_DMA
# Note: keep the above list sorted alphabetically
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 7b7521762633..2ab4872fbee1 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/pgtable.h>
+#include <asm/page-states.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
#include <asm/sections.h>
@@ -24,6 +25,7 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support) = ZLIB_DFLTCC_FULL;
struct ipl_parameter_block __bootdata_preserved(ipl_block);
int __bootdata_preserved(ipl_block_valid);
int __bootdata_preserved(__kaslr_enabled);
+int __bootdata_preserved(cmma_flag) = 1;
unsigned long vmalloc_size = VMALLOC_DEFAULT_SIZE;
unsigned long memory_limit;
@@ -295,6 +297,12 @@ void parse_boot_command_line(void)
if (!strcmp(param, "nokaslr"))
__kaslr_enabled = 0;
+ if (!strcmp(param, "cmma")) {
+ rc = kstrtobool(val, &enabled);
+ if (!rc && !enabled)
+ cmma_flag = 0;
+ }
+
#if IS_ENABLED(CONFIG_KVM)
if (!strcmp(param, "prot_virt")) {
rc = kstrtobool(val, &enabled);
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index 8826c4f18645..8104e0e3d188 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/string.h>
#include <linux/elf.h>
+#include <asm/page-states.h>
#include <asm/boot_data.h>
#include <asm/sections.h>
#include <asm/maccess.h>
@@ -57,6 +58,48 @@ static void detect_facilities(void)
machine.has_nx = 1;
}
+static int cmma_test_essa(void)
+{
+ unsigned long reg1, reg2, tmp = 0;
+ int rc = 1;
+ psw_t old;
+
+ /* Test ESSA_GET_STATE */
+ asm volatile(
+ " mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
+ " epsw %[reg1],%[reg2]\n"
+ " st %[reg1],0(%[psw_pgm])\n"
+ " st %[reg2],4(%[psw_pgm])\n"
+ " larl %[reg1],1f\n"
+ " stg %[reg1],8(%[psw_pgm])\n"
+ " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
+ " la %[rc],0\n"
+ "1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
+ : [reg1] "=&d" (reg1),
+ [reg2] "=&a" (reg2),
+ [rc] "+&d" (rc),
+ [tmp] "=&d" (tmp),
+ "+Q" (S390_lowcore.program_new_psw),
+ "=Q" (old)
+ : [psw_old] "a" (&old),
+ [psw_pgm] "a" (&S390_lowcore.program_new_psw),
+ [cmd] "i" (ESSA_GET_STATE)
+ : "cc", "memory");
+ return rc;
+}
+
+static void cmma_init(void)
+{
+ if (!cmma_flag)
+ return;
+ if (cmma_test_essa()) {
+ cmma_flag = 0;
+ return;
+ }
+ if (test_facility(147))
+ cmma_flag = 2;
+}
+
static void setup_lpp(void)
{
S390_lowcore.current_pid = 0;
@@ -306,6 +349,7 @@ void startup_kernel(void)
setup_boot_command_line();
parse_boot_command_line();
detect_facilities();
+ cmma_init();
sanitize_prot_virt_host();
max_physmem_end = detect_max_physmem_end();
setup_ident_map_size(max_physmem_end);
diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c
index 3075d65e112c..e3a4500a5a75 100644
--- a/arch/s390/boot/vmem.c
+++ b/arch/s390/boot/vmem.c
@@ -2,6 +2,7 @@
#include <linux/sched/task.h>
#include <linux/pgtable.h>
#include <linux/kasan.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/facility.h>
#include <asm/sections.h>
@@ -70,6 +71,10 @@ static void kasan_populate_shadow(void)
crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
+ __arch_set_page_dat(kasan_early_shadow_p4d, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pud, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pmd, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat(kasan_early_shadow_pte, 1);
/*
* Current memory layout:
@@ -223,6 +228,7 @@ static void *boot_crst_alloc(unsigned long val)
table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
crst_table_init(table, val);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@@ -238,6 +244,7 @@ static pte_t *boot_pte_alloc(void)
if (!pte_leftover) {
pte_leftover = (void *)physmem_alloc_top_down(RR_VMEM, PAGE_SIZE, PAGE_SIZE);
pte = pte_leftover + _PAGE_TABLE_SIZE;
+ __arch_set_page_dat(pte, 1);
} else {
pte = pte_leftover;
pte_leftover = NULL;
@@ -418,6 +425,14 @@ void setup_vmem(unsigned long asce_limit)
unsigned long asce_bits;
int i;
+ /*
+ * Mark whole memory as no-dat. This must be done before any
+ * page tables are allocated, or kernel image builtin pages
+ * are marked as dat tables.
+ */
+ for_each_physmem_online_range(i, &start, &end)
+ __arch_set_page_nodat((void *)start, (end - start) >> PAGE_SHIFT);
+
if (asce_limit == _REGION1_SIZE) {
asce_type = _REGION2_ENTRY_EMPTY;
asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
@@ -429,6 +444,8 @@ void setup_vmem(unsigned long asce_limit)
crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+ __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
+ __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 829d68e2c685..bb1b4bef1878 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -11,7 +11,6 @@ typedef struct {
cpumask_t cpu_attach_mask;
atomic_t flush_count;
unsigned int flush_mm;
- struct list_head pgtable_list;
struct list_head gmap_list;
unsigned long gmap_asce;
unsigned long asce;
@@ -39,7 +38,6 @@ typedef struct {
#define INIT_MM_CONTEXT(name) \
.context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \
- .context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
.context.gmap_list = LIST_HEAD_INIT(name.context.gmap_list),
#endif
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 757fe6f0d802..929af18b0908 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -22,7 +22,6 @@ static inline int init_new_context(struct task_struct *tsk,
unsigned long asce_type, init_entry;
spin_lock_init(&mm->context.lock);
- INIT_LIST_HEAD(&mm->context.pgtable_list);
INIT_LIST_HEAD(&mm->context.gmap_list);
cpumask_clear(&mm->context.cpu_attach_mask);
atomic_set(&mm->context.flush_count, 0);
diff --git a/arch/s390/include/asm/page-states.h b/arch/s390/include/asm/page-states.h
index c33c4deb545f..08fcbd628120 100644
--- a/arch/s390/include/asm/page-states.h
+++ b/arch/s390/include/asm/page-states.h
@@ -7,6 +7,9 @@
#ifndef PAGE_STATES_H
#define PAGE_STATES_H
+#include <asm/sections.h>
+#include <asm/page.h>
+
#define ESSA_GET_STATE 0
#define ESSA_SET_STABLE 1
#define ESSA_SET_UNUSED 2
@@ -18,4 +21,60 @@
#define ESSA_MAX ESSA_SET_STABLE_NODAT
+extern int __bootdata_preserved(cmma_flag);
+
+static __always_inline unsigned long essa(unsigned long paddr, unsigned char cmd)
+{
+ unsigned long rc;
+
+ asm volatile(
+ " .insn rrf,0xb9ab0000,%[rc],%[paddr],%[cmd],0"
+ : [rc] "=d" (rc)
+ : [paddr] "d" (paddr),
+ [cmd] "i" (cmd));
+ return rc;
+}
+
+static __always_inline void __set_page_state(void *addr, unsigned long num_pages, unsigned char cmd)
+{
+ unsigned long paddr = __pa(addr) & PAGE_MASK;
+
+ while (num_pages--) {
+ essa(paddr, cmd);
+ paddr += PAGE_SIZE;
+ }
+}
+
+static inline void __set_page_unused(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_UNUSED);
+}
+
+static inline void __set_page_stable_dat(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_STABLE);
+}
+
+static inline void __set_page_stable_nodat(void *addr, unsigned long num_pages)
+{
+ __set_page_state(addr, num_pages, ESSA_SET_STABLE_NODAT);
+}
+
+static inline void __arch_set_page_nodat(void *addr, unsigned long num_pages)
+{
+ if (!cmma_flag)
+ return;
+ if (cmma_flag < 2)
+ __set_page_stable_dat(addr, num_pages);
+ else
+ __set_page_stable_nodat(addr, num_pages);
+}
+
+static inline void __arch_set_page_dat(void *addr, unsigned long num_pages)
+{
+ if (!cmma_flag)
+ return;
+ __set_page_stable_dat(addr, num_pages);
+}
+
#endif
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index cfec0743314e..73b9c3bf377f 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -164,7 +164,6 @@ static inline int page_reset_referenced(unsigned long addr)
struct page;
void arch_free_page(struct page *page, int order);
void arch_alloc_page(struct page *page, int order);
-void arch_set_page_dat(struct page *page, int order);
static inline int devmem_is_allowed(unsigned long pfn)
{
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b248694e0024..e91cd6bbc330 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -159,13 +159,6 @@ struct zpci_dev {
unsigned long *dma_table;
int tlb_refresh;
- spinlock_t iommu_bitmap_lock;
- unsigned long *iommu_bitmap;
- unsigned long *lazy_bitmap;
- unsigned long iommu_size;
- unsigned long iommu_pages;
- unsigned int next_bit;
-
struct iommu_device iommu_dev; /* IOMMU core handle */
char res_name[16];
@@ -180,10 +173,6 @@ struct zpci_dev {
struct zpci_fmb *fmb;
u16 fmb_update; /* update interval */
u16 fmb_length;
- /* software counters */
- atomic64_t allocated_pages;
- atomic64_t mapped_pages;
- atomic64_t unmapped_pages;
u8 version;
enum pci_bus_speed max_bus_speed;
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index d6189ed14f84..f0c677ddd270 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -50,6 +50,9 @@ struct clp_fh_list_entry {
#define CLP_UTIL_STR_LEN 64
#define CLP_PFIP_NR_SEGMENTS 4
+/* PCI function type numbers */
+#define PCI_FUNC_TYPE_ISM 0x5 /* ISM device */
+
extern bool zpci_unique_uid;
struct clp_rsp_slpc_pci {
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 7119c04c51c5..42d7cc4262ca 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -82,117 +82,16 @@ enum zpci_ioat_dtype {
#define ZPCI_TABLE_VALID_MASK 0x20
#define ZPCI_TABLE_PROT_MASK 0x200
-static inline unsigned int calc_rtx(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_sx(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_px(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
-}
-
-static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
-{
- *entry &= ZPCI_PTE_FLAG_MASK;
- *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
-}
-
-static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
-{
- *entry &= ZPCI_RTE_FLAG_MASK;
- *entry |= (sto & ZPCI_RTE_ADDR_MASK);
- *entry |= ZPCI_TABLE_TYPE_RTX;
-}
-
-static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
-{
- *entry &= ZPCI_STE_FLAG_MASK;
- *entry |= (pto & ZPCI_STE_ADDR_MASK);
- *entry |= ZPCI_TABLE_TYPE_SX;
-}
-
-static inline void validate_rt_entry(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_VALID_MASK;
- *entry &= ~ZPCI_TABLE_OFFSET_MASK;
- *entry |= ZPCI_TABLE_VALID;
- *entry |= ZPCI_TABLE_LEN_RTX;
-}
-
-static inline void validate_st_entry(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_VALID_MASK;
- *entry |= ZPCI_TABLE_VALID;
-}
-
-static inline void invalidate_pt_entry(unsigned long *entry)
-{
- WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
- *entry &= ~ZPCI_PTE_VALID_MASK;
- *entry |= ZPCI_PTE_INVALID;
-}
-
-static inline void validate_pt_entry(unsigned long *entry)
-{
- WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
- *entry &= ~ZPCI_PTE_VALID_MASK;
- *entry |= ZPCI_PTE_VALID;
-}
-
-static inline void entry_set_protected(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_PROT_MASK;
- *entry |= ZPCI_TABLE_PROTECTED;
-}
-
-static inline void entry_clr_protected(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_PROT_MASK;
- *entry |= ZPCI_TABLE_UNPROTECTED;
-}
-
-static inline int reg_entry_isvalid(unsigned long entry)
-{
- return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
-}
-
-static inline int pt_entry_isvalid(unsigned long entry)
-{
- return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
-}
-
-static inline unsigned long *get_rt_sto(unsigned long entry)
-{
- if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
- return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
- else
- return NULL;
-
-}
-
-static inline unsigned long *get_st_pto(unsigned long entry)
-{
- if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
- return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
- else
- return NULL;
-}
-
-/* Prototypes */
-void dma_free_seg_table(unsigned long);
-unsigned long *dma_alloc_cpu_table(gfp_t gfp);
-void dma_cleanup_tables(unsigned long *);
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
- gfp_t gfp);
-void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
-
-extern const struct dma_map_ops s390_pci_dma_ops;
+struct zpci_iommu_ctrs {
+ atomic64_t mapped_pages;
+ atomic64_t unmapped_pages;
+ atomic64_t global_rpcits;
+ atomic64_t sync_map_rpcits;
+ atomic64_t sync_rpcits;
+};
+
+struct zpci_dev;
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev);
#endif
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index 376b4b23bdaa..502d655fe6ae 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -25,7 +25,6 @@ void crst_table_free(struct mm_struct *, unsigned long *);
unsigned long *page_table_alloc(struct mm_struct *);
struct page *page_table_alloc_pgste(struct mm_struct *mm);
void page_table_free(struct mm_struct *, unsigned long *);
-void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long);
void page_table_free_pgste(struct page *page);
extern int page_table_allocate_pgste;
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 25cadc2b9cff..df316436d2e1 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -125,9 +125,6 @@ static inline void vmcp_cma_reserve(void) { }
void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault);
-void cmma_init(void);
-void cmma_init_nodat(void);
-
extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
extern void (*_machine_power_off)(void);
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index 78f7b729b65f..31ec4f545e03 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -6,6 +6,13 @@
#include <linux/ptrace.h>
#include <asm/switch_to.h>
+struct stack_frame_user {
+ unsigned long back_chain;
+ unsigned long empty1[5];
+ unsigned long gprs[10];
+ unsigned long empty2[4];
+};
+
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 383b1f91442c..d1455a601adc 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -69,12 +69,9 @@ static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_pmds = 1;
- /*
- * page_table_free_rcu takes care of the allocation bit masks
- * of the 2K table fragments in the 4K page table page,
- * then calls tlb_remove_table.
- */
- page_table_free_rcu(tlb, (unsigned long *) pte, address);
+ if (mm_alloc_pgste(tlb->mm))
+ gmap_unlink(tlb->mm, (unsigned long *)pte, address);
+ tlb_remove_ptdesc(tlb, pte);
}
/*
@@ -112,7 +109,7 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
__tlb_adjust_range(tlb, address, PAGE_SIZE);
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
- tlb_remove_table(tlb, p4d);
+ tlb_remove_ptdesc(tlb, p4d);
}
/*
@@ -130,7 +127,7 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
tlb->mm->context.flush_mm = 1;
tlb->freed_tables = 1;
tlb->cleared_p4ds = 1;
- tlb_remove_table(tlb, pud);
+ tlb_remove_ptdesc(tlb, pud);
}
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index ff1f02b54771..eb43e5922a25 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -46,6 +46,7 @@ decompressor_handled_param(vmalloc);
decompressor_handled_param(dfltcc);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
+decompressor_handled_param(cmma);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index c27321cb0969..dfa77da2fd2e 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -15,7 +15,10 @@
#include <linux/export.h>
#include <linux/seq_file.h>
#include <linux/spinlock.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <linux/sysfs.h>
+#include <asm/stacktrace.h>
#include <asm/irq.h>
#include <asm/cpu_mf.h>
#include <asm/lowcore.h>
@@ -212,6 +215,44 @@ void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
}
}
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ struct stack_frame_user __user *sf;
+ unsigned long ip, sp;
+ bool first = true;
+
+ if (is_compat_task())
+ return;
+ perf_callchain_store(entry, instruction_pointer(regs));
+ sf = (void __user *)user_stack_pointer(regs);
+ pagefault_disable();
+ while (entry->nr < entry->max_stack) {
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ if (ip & 0x1) {
+ /*
+ * If the instruction address is invalid, and this
+ * is the first stack frame, assume r14 has not
+ * been written to the stack yet. Otherwise exit.
+ */
+ if (first && !(regs->gprs[14] & 0x1))
+ ip = regs->gprs[14];
+ else
+ break;
+ }
+ perf_callchain_store(entry, ip);
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (!sp || sp & 0x7)
+ break;
+ sf = (void __user *)sp;
+ first = false;
+ }
+ pagefault_enable();
+}
+
/* Perf definitions for PMU event attributes in sysfs */
ssize_t cpumf_events_sysfs_show(struct device *dev,
struct device_attribute *attr, char *page)
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 0787010139f7..94f440e38303 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -6,9 +6,12 @@
*/
#include <linux/stacktrace.h>
+#include <linux/uaccess.h>
+#include <linux/compat.h>
#include <asm/stacktrace.h>
#include <asm/unwind.h>
#include <asm/kprobes.h>
+#include <asm/ptrace.h>
void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
struct task_struct *task, struct pt_regs *regs)
@@ -58,3 +61,43 @@ int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
return -EINVAL;
return 0;
}
+
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
+{
+ struct stack_frame_user __user *sf;
+ unsigned long ip, sp;
+ bool first = true;
+
+ if (is_compat_task())
+ return;
+ if (!consume_entry(cookie, instruction_pointer(regs)))
+ return;
+ sf = (void __user *)user_stack_pointer(regs);
+ pagefault_disable();
+ while (1) {
+ if (__get_user(sp, &sf->back_chain))
+ break;
+ if (__get_user(ip, &sf->gprs[8]))
+ break;
+ if (ip & 0x1) {
+ /*
+ * If the instruction address is invalid, and this
+ * is the first stack frame, assume r14 has not
+ * been written to the stack yet. Otherwise exit.
+ */
+ if (first && !(regs->gprs[14] & 0x1))
+ ip = regs->gprs[14];
+ else
+ break;
+ }
+ if (!consume_entry(cookie, ip))
+ break;
+ /* Sanity check: ABI requires SP to be aligned 8 bytes. */
+ if (!sp || sp & 0x7)
+ break;
+ sf = (void __user *)sp;
+ first = false;
+ }
+ pagefault_enable();
+}
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 20786f6883b2..6f96b5a71c63 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -18,7 +18,7 @@
#include <linux/ksm.h>
#include <linux/mman.h>
#include <linux/pgtable.h>
-
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/page.h>
@@ -33,7 +33,7 @@ static struct page *gmap_alloc_crst(void)
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
if (!page)
return NULL;
- arch_set_page_dat(page, CRST_ALLOC_ORDER);
+ __arch_set_page_dat(page_to_virt(page), 1UL << CRST_ALLOC_ORDER);
return page;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 7eca10c32caa..43e612bc2bcd 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -164,14 +164,10 @@ void __init mem_init(void)
pv_init();
kfence_split_mapping();
- /* Setup guest page hinting */
- cmma_init();
/* this will put all low memory onto the freelists */
memblock_free_all();
setup_zero_pages(); /* Setup zeroed pages. */
-
- cmma_init_nodat();
}
void free_initmem(void)
diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
index a31acb2c4ef2..01f9b39e65f5 100644
--- a/arch/s390/mm/page-states.c
+++ b/arch/s390/mm/page-states.c
@@ -7,212 +7,18 @@
* Author(s): Martin Schwidefsky <[email protected]>
*/
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/types.h>
#include <linux/mm.h>
-#include <linux/memblock.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <asm/asm-extable.h>
-#include <asm/facility.h>
#include <asm/page-states.h>
+#include <asm/sections.h>
+#include <asm/page.h>
-static int cmma_flag = 1;
-
-static int __init cmma(char *str)
-{
- bool enabled;
-
- if (!kstrtobool(str, &enabled))
- cmma_flag = enabled;
- return 1;
-}
-__setup("cmma=", cmma);
-
-static inline int cmma_test_essa(void)
-{
- unsigned long tmp = 0;
- int rc = -EOPNOTSUPP;
-
- /* test ESSA_GET_STATE */
- asm volatile(
- " .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
- "0: la %[rc],0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : [rc] "+&d" (rc), [tmp] "+&d" (tmp)
- : [cmd] "i" (ESSA_GET_STATE));
- return rc;
-}
-
-void __init cmma_init(void)
-{
- if (!cmma_flag)
- return;
- if (cmma_test_essa()) {
- cmma_flag = 0;
- return;
- }
- if (test_facility(147))
- cmma_flag = 2;
-}
-
-static inline void set_page_unused(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_UNUSED));
-}
-
-static inline void set_page_stable_dat(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE));
-}
-
-static inline void set_page_stable_nodat(struct page *page, int order)
-{
- int i, rc;
-
- for (i = 0; i < (1 << order); i++)
- asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
- : "=&d" (rc)
- : "a" (page_to_phys(page + i)),
- "i" (ESSA_SET_STABLE_NODAT));
-}
-
-static void mark_kernel_pmd(pud_t *pud, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- pmd_t *pmd;
-
- pmd = pmd_offset(pud, addr);
- do {
- next = pmd_addr_end(addr, end);
- if (pmd_none(*pmd) || pmd_large(*pmd))
- continue;
- page = phys_to_page(pmd_val(*pmd));
- set_bit(PG_arch_1, &page->flags);
- } while (pmd++, addr = next, addr != end);
-}
-
-static void mark_kernel_pud(p4d_t *p4d, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- pud_t *pud;
- int i;
-
- pud = pud_offset(p4d, addr);
- do {
- next = pud_addr_end(addr, end);
- if (pud_none(*pud) || pud_large(*pud))
- continue;
- if (!pud_folded(*pud)) {
- page = phys_to_page(pud_val(*pud));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_pmd(pud, addr, next);
- } while (pud++, addr = next, addr != end);
-}
-
-static void mark_kernel_p4d(pgd_t *pgd, unsigned long addr, unsigned long end)
-{
- unsigned long next;
- struct page *page;
- p4d_t *p4d;
- int i;
-
- p4d = p4d_offset(pgd, addr);
- do {
- next = p4d_addr_end(addr, end);
- if (p4d_none(*p4d))
- continue;
- if (!p4d_folded(*p4d)) {
- page = phys_to_page(p4d_val(*p4d));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_pud(p4d, addr, next);
- } while (p4d++, addr = next, addr != end);
-}
-
-static void mark_kernel_pgd(void)
-{
- unsigned long addr, next, max_addr;
- struct page *page;
- pgd_t *pgd;
- int i;
-
- addr = 0;
- /*
- * Figure out maximum virtual address accessible with the
- * kernel ASCE. This is required to keep the page table walker
- * from accessing non-existent entries.
- */
- max_addr = (S390_lowcore.kernel_asce.val & _ASCE_TYPE_MASK) >> 2;
- max_addr = 1UL << (max_addr * 11 + 31);
- pgd = pgd_offset_k(addr);
- do {
- next = pgd_addr_end(addr, max_addr);
- if (pgd_none(*pgd))
- continue;
- if (!pgd_folded(*pgd)) {
- page = phys_to_page(pgd_val(*pgd));
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- }
- mark_kernel_p4d(pgd, addr, next);
- } while (pgd++, addr = next, addr != max_addr);
-}
-
-void __init cmma_init_nodat(void)
-{
- struct page *page;
- unsigned long start, end, ix;
- int i;
-
- if (cmma_flag < 2)
- return;
- /* Mark pages used in kernel page tables */
- mark_kernel_pgd();
- page = virt_to_page(&swapper_pg_dir);
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
- page = virt_to_page(&invalid_pg_dir);
- for (i = 0; i < 4; i++)
- set_bit(PG_arch_1, &page[i].flags);
-
- /* Set all kernel pages not used for page tables to stable/no-dat */
- for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
- page = pfn_to_page(start);
- for (ix = start; ix < end; ix++, page++) {
- if (__test_and_clear_bit(PG_arch_1, &page->flags))
- continue; /* skip page table pages */
- if (!list_empty(&page->lru))
- continue; /* skip free pages */
- set_page_stable_nodat(page, 0);
- }
- }
-}
+int __bootdata_preserved(cmma_flag);
void arch_free_page(struct page *page, int order)
{
if (!cmma_flag)
return;
- set_page_unused(page, order);
+ __set_page_unused(page_to_virt(page), 1UL << order);
}
void arch_alloc_page(struct page *page, int order)
@@ -220,14 +26,7 @@ void arch_alloc_page(struct page *page, int order)
if (!cmma_flag)
return;
if (cmma_flag < 2)
- set_page_stable_dat(page, order);
+ __set_page_stable_dat(page_to_virt(page), 1UL << order);
else
- set_page_stable_nodat(page, order);
-}
-
-void arch_set_page_dat(struct page *page, int order)
-{
- if (!cmma_flag)
- return;
- set_page_stable_dat(page, order);
+ __set_page_stable_nodat(page_to_virt(page), 1UL << order);
}
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 5488ae17318e..008e487c94a6 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -10,6 +10,7 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <asm/mmu_context.h>
+#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/gmap.h>
#include <asm/tlb.h>
@@ -43,11 +44,13 @@ __initcall(page_table_register_sysctl);
unsigned long *crst_table_alloc(struct mm_struct *mm)
{
struct ptdesc *ptdesc = pagetable_alloc(GFP_KERNEL, CRST_ALLOC_ORDER);
+ unsigned long *table;
if (!ptdesc)
return NULL;
- arch_set_page_dat(ptdesc_page(ptdesc), CRST_ALLOC_ORDER);
- return (unsigned long *) ptdesc_to_virt(ptdesc);
+ table = ptdesc_to_virt(ptdesc);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
+ return table;
}
void crst_table_free(struct mm_struct *mm, unsigned long *table)
@@ -130,11 +133,6 @@ err_p4d:
return -ENOMEM;
}
-static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
-{
- return atomic_fetch_xor(bits, v) ^ bits;
-}
-
#ifdef CONFIG_PGSTE
struct page *page_table_alloc_pgste(struct mm_struct *mm)
@@ -145,7 +143,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (ptdesc) {
table = (u64 *)ptdesc_to_virt(ptdesc);
- arch_set_page_dat(virt_to_page(table), 0);
+ __arch_set_page_dat(table, 1);
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
}
@@ -159,125 +157,11 @@ void page_table_free_pgste(struct page *page)
#endif /* CONFIG_PGSTE */
-/*
- * A 2KB-pgtable is either upper or lower half of a normal page.
- * The second half of the page may be unused or used as another
- * 2KB-pgtable.
- *
- * Whenever possible the parent page for a new 2KB-pgtable is picked
- * from the list of partially allocated pages mm_context_t::pgtable_list.
- * In case the list is empty a new parent page is allocated and added to
- * the list.
- *
- * When a parent page gets fully allocated it contains 2KB-pgtables in both
- * upper and lower halves and is removed from mm_context_t::pgtable_list.
- *
- * When 2KB-pgtable is freed from to fully allocated parent page that
- * page turns partially allocated and added to mm_context_t::pgtable_list.
- *
- * If 2KB-pgtable is freed from the partially allocated parent page that
- * page turns unused and gets removed from mm_context_t::pgtable_list.
- * Furthermore, the unused parent page is released.
- *
- * As follows from the above, no unallocated or fully allocated parent
- * pages are contained in mm_context_t::pgtable_list.
- *
- * The upper byte (bits 24-31) of the parent page _refcount is used
- * for tracking contained 2KB-pgtables and has the following format:
- *
- * PP AA
- * 01234567 upper byte (bits 24-31) of struct page::_refcount
- * || ||
- * || |+--- upper 2KB-pgtable is allocated
- * || +---- lower 2KB-pgtable is allocated
- * |+------- upper 2KB-pgtable is pending for removal
- * +-------- lower 2KB-pgtable is pending for removal
- *
- * (See commit 620b4e903179 ("s390: use _refcount for pgtables") on why
- * using _refcount is possible).
- *
- * When 2KB-pgtable is allocated the corresponding AA bit is set to 1.
- * The parent page is either:
- * - added to mm_context_t::pgtable_list in case the second half of the
- * parent page is still unallocated;
- * - removed from mm_context_t::pgtable_list in case both hales of the
- * parent page are allocated;
- * These operations are protected with mm_context_t::lock.
- *
- * When 2KB-pgtable is deallocated the corresponding AA bit is set to 0
- * and the corresponding PP bit is set to 1 in a single atomic operation.
- * Thus, PP and AA bits corresponding to the same 2KB-pgtable are mutually
- * exclusive and may never be both set to 1!
- * The parent page is either:
- * - added to mm_context_t::pgtable_list in case the second half of the
- * parent page is still allocated;
- * - removed from mm_context_t::pgtable_list in case the second half of
- * the parent page is unallocated;
- * These operations are protected with mm_context_t::lock.
- *
- * It is important to understand that mm_context_t::lock only protects
- * mm_context_t::pgtable_list and AA bits, but not the parent page itself
- * and PP bits.
- *
- * Releasing the parent page happens whenever the PP bit turns from 1 to 0,
- * while both AA bits and the second PP bit are already unset. Then the
- * parent page does not contain any 2KB-pgtable fragment anymore, and it has
- * also been removed from mm_context_t::pgtable_list. It is safe to release
- * the page therefore.
- *
- * PGSTE memory spaces use full 4KB-pgtables and do not need most of the
- * logic described above. Both AA bits are set to 1 to denote a 4KB-pgtable
- * while the PP bits are never used, nor such a page is added to or removed
- * from mm_context_t::pgtable_list.
- *
- * pte_free_defer() overrides those rules: it takes the page off pgtable_list,
- * and prevents both 2K fragments from being reused. pte_free_defer() has to
- * guarantee that its pgtable cannot be reused before the RCU grace period
- * has elapsed (which page_table_free_rcu() does not actually guarantee).
- * But for simplicity, because page->rcu_head overlays page->lru, and because
- * the RCU callback might not be called before the mm_context_t has been freed,
- * pte_free_defer() in this implementation prevents both fragments from being
- * reused, and delays making the call to RCU until both fragments are freed.
- */
unsigned long *page_table_alloc(struct mm_struct *mm)
{
- unsigned long *table;
struct ptdesc *ptdesc;
- unsigned int mask, bit;
-
- /* Try to get a fragment of a 4K page as a 2K page table */
- if (!mm_alloc_pgste(mm)) {
- table = NULL;
- spin_lock_bh(&mm->context.lock);
- if (!list_empty(&mm->context.pgtable_list)) {
- ptdesc = list_first_entry(&mm->context.pgtable_list,
- struct ptdesc, pt_list);
- mask = atomic_read(&ptdesc->_refcount) >> 24;
- /*
- * The pending removal bits must also be checked.
- * Failure to do so might lead to an impossible
- * value of (i.e 0x13 or 0x23) written to _refcount.
- * Such values violate the assumption that pending and
- * allocation bits are mutually exclusive, and the rest
- * of the code unrails as result. That could lead to
- * a whole bunch of races and corruptions.
- */
- mask = (mask | (mask >> 4)) & 0x03U;
- if (mask != 0x03U) {
- table = (unsigned long *) ptdesc_to_virt(ptdesc);
- bit = mask & 1; /* =1 -> second 2K */
- if (bit)
- table += PTRS_PER_PTE;
- atomic_xor_bits(&ptdesc->_refcount,
- 0x01U << (bit + 24));
- list_del_init(&ptdesc->pt_list);
- }
- }
- spin_unlock_bh(&mm->context.lock);
- if (table)
- return table;
- }
- /* Allocate a fresh page */
+ unsigned long *table;
+
ptdesc = pagetable_alloc(GFP_KERNEL, 0);
if (!ptdesc)
return NULL;
@@ -285,177 +169,57 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
pagetable_free(ptdesc);
return NULL;
}
- arch_set_page_dat(ptdesc_page(ptdesc), 0);
- /* Initialize page table */
- table = (unsigned long *) ptdesc_to_virt(ptdesc);
- if (mm_alloc_pgste(mm)) {
- /* Return 4K page table with PGSTEs */
- INIT_LIST_HEAD(&ptdesc->pt_list);
- atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
- memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
- } else {
- /* Return the first 2K fragment of the page */
- atomic_xor_bits(&ptdesc->_refcount, 0x01U << 24);
- memset64((u64 *)table, _PAGE_INVALID, 2 * PTRS_PER_PTE);
- spin_lock_bh(&mm->context.lock);
- list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
- spin_unlock_bh(&mm->context.lock);
- }
+ table = ptdesc_to_virt(ptdesc);
+ __arch_set_page_dat(table, 1);
+ /* pt_list is used by gmap only */
+ INIT_LIST_HEAD(&ptdesc->pt_list);
+ memset64((u64 *)table, _PAGE_INVALID, PTRS_PER_PTE);
+ memset64((u64 *)table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
return table;
}
-static void page_table_release_check(struct page *page, void *table,
- unsigned int half, unsigned int mask)
+static void pagetable_pte_dtor_free(struct ptdesc *ptdesc)
{
- char msg[128];
-
- if (!IS_ENABLED(CONFIG_DEBUG_VM))
- return;
- if (!mask && list_empty(&page->lru))
- return;
- snprintf(msg, sizeof(msg),
- "Invalid pgtable %p release half 0x%02x mask 0x%02x",
- table, half, mask);
- dump_page(page, msg);
-}
-
-static void pte_free_now(struct rcu_head *head)
-{
- struct ptdesc *ptdesc;
-
- ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
pagetable_pte_dtor(ptdesc);
pagetable_free(ptdesc);
}
void page_table_free(struct mm_struct *mm, unsigned long *table)
{
- unsigned int mask, bit, half;
struct ptdesc *ptdesc = virt_to_ptdesc(table);
- if (!mm_alloc_pgste(mm)) {
- /* Free 2K page table fragment of a 4K page */
- bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.lock);
- /*
- * Mark the page for delayed release. The actual release
- * will happen outside of the critical section from this
- * function or from __tlb_remove_table()
- */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
- mask >>= 24;
- if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
- /*
- * Other half is allocated, and neither half has had
- * its free deferred: add page to head of list, to make
- * this freed half available for immediate reuse.
- */
- list_add(&ptdesc->pt_list, &mm->context.pgtable_list);
- } else {
- /* If page is on list, now remove it. */
- list_del_init(&ptdesc->pt_list);
- }
- spin_unlock_bh(&mm->context.lock);
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x10U << (bit + 24));
- mask >>= 24;
- if (mask != 0x00U)
- return;
- half = 0x01U << bit;
- } else {
- half = 0x03U;
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- mask >>= 24;
- }
-
- page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
- if (folio_test_clear_active(ptdesc_folio(ptdesc)))
- call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- else
- pte_free_now(&ptdesc->pt_rcu_head);
+ pagetable_pte_dtor_free(ptdesc);
}
-void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
- unsigned long vmaddr)
+void __tlb_remove_table(void *table)
{
- struct mm_struct *mm;
- unsigned int bit, mask;
struct ptdesc *ptdesc = virt_to_ptdesc(table);
+ struct page *page = ptdesc_page(ptdesc);
- mm = tlb->mm;
- if (mm_alloc_pgste(mm)) {
- gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) ((unsigned long)table | 0x03U);
- tlb_remove_ptdesc(tlb, table);
+ if (compound_order(page) == CRST_ALLOC_ORDER) {
+ /* pmd, pud, or p4d */
+ pagetable_free(ptdesc);
return;
}
- bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
- spin_lock_bh(&mm->context.lock);
- /*
- * Mark the page for delayed release. The actual release will happen
- * outside of the critical section from __tlb_remove_table() or from
- * page_table_free()
- */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x11U << (bit + 24));
- mask >>= 24;
- if ((mask & 0x03U) && !folio_test_active(ptdesc_folio(ptdesc))) {
- /*
- * Other half is allocated, and neither half has had
- * its free deferred: add page to end of list, to make
- * this freed half available for reuse once its pending
- * bit has been cleared by __tlb_remove_table().
- */
- list_add_tail(&ptdesc->pt_list, &mm->context.pgtable_list);
- } else {
- /* If page is on list, now remove it. */
- list_del_init(&ptdesc->pt_list);
- }
- spin_unlock_bh(&mm->context.lock);
- table = (unsigned long *) ((unsigned long) table | (0x01U << bit));
- tlb_remove_table(tlb, table);
+ pagetable_pte_dtor_free(ptdesc);
}
-void __tlb_remove_table(void *_table)
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static void pte_free_now(struct rcu_head *head)
{
- unsigned int mask = (unsigned long) _table & 0x03U, half = mask;
- void *table = (void *)((unsigned long) _table ^ mask);
- struct ptdesc *ptdesc = virt_to_ptdesc(table);
+ struct ptdesc *ptdesc = container_of(head, struct ptdesc, pt_rcu_head);
- switch (half) {
- case 0x00U: /* pmd, pud, or p4d */
- pagetable_free(ptdesc);
- return;
- case 0x01U: /* lower 2K of a 4K page table */
- case 0x02U: /* higher 2K of a 4K page table */
- mask = atomic_xor_bits(&ptdesc->_refcount, mask << (4 + 24));
- mask >>= 24;
- if (mask != 0x00U)
- return;
- break;
- case 0x03U: /* 4K page table with pgstes */
- mask = atomic_xor_bits(&ptdesc->_refcount, 0x03U << 24);
- mask >>= 24;
- break;
- }
-
- page_table_release_check(ptdesc_page(ptdesc), table, half, mask);
- if (folio_test_clear_active(ptdesc_folio(ptdesc)))
- call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
- else
- pte_free_now(&ptdesc->pt_rcu_head);
+ pagetable_pte_dtor_free(ptdesc);
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable)
{
- struct page *page;
+ struct ptdesc *ptdesc = virt_to_ptdesc(pgtable);
- page = virt_to_page(pgtable);
- SetPageActive(page);
- page_table_free(mm, (unsigned long *)pgtable);
+ call_rcu(&ptdesc->pt_rcu_head, pte_free_now);
/*
- * page_table_free() does not do the pgste gmap_unlink() which
- * page_table_free_rcu() does: warn us if pgste ever reaches here.
+ * THPs are not allowed for KVM guests. Warn if pgste ever reaches here.
+ * Turn to the generic pte_free_defer() version once gmap is removed.
*/
WARN_ON_ONCE(mm_has_pgste(mm));
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 2e8a1064f103..186a020857cf 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -50,8 +50,7 @@ void *vmem_crst_alloc(unsigned long val)
if (!table)
return NULL;
crst_table_init(table, val);
- if (slab_is_available())
- arch_set_page_dat(virt_to_page(table), CRST_ALLOC_ORDER);
+ __arch_set_page_dat(table, 1UL << CRST_ALLOC_ORDER);
return table;
}
@@ -67,6 +66,7 @@ pte_t __ref *vmem_pte_alloc(void)
if (!pte)
return NULL;
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ __arch_set_page_dat(pte, 1);
return pte;
}
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 5ae31ca9dd44..0547a10406e7 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,7 +3,7 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
pci_bus.o pci_kvm_hook.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 6fab5c085565..676ac74026a8 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -124,7 +124,11 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
- fib.pal = limit;
+ /* Work around off by one in ISM virt device */
+ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
+ fib.pal = limit + (1 << 12);
+ else
+ fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, status);
@@ -153,6 +157,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
int zpci_fmb_enable_device(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_iommu_ctrs *ctrs;
struct zpci_fib fib = {0};
u8 cc, status;
@@ -165,9 +170,15 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
WARN_ON((u64) zdev->fmb & 0xf);
/* reset software counters */
- atomic64_set(&zdev->allocated_pages, 0);
- atomic64_set(&zdev->mapped_pages, 0);
- atomic64_set(&zdev->unmapped_pages, 0);
+ ctrs = zpci_get_iommu_ctrs(zdev);
+ if (ctrs) {
+ atomic64_set(&ctrs->mapped_pages, 0);
+ atomic64_set(&ctrs->unmapped_pages, 0);
+ atomic64_set(&ctrs->global_rpcits, 0);
+ atomic64_set(&ctrs->sync_map_rpcits, 0);
+ atomic64_set(&ctrs->sync_rpcits, 0);
+ }
+
fib.fmb_addr = virt_to_phys(zdev->fmb);
fib.gd = zdev->gisa;
@@ -582,7 +593,6 @@ int pcibios_device_add(struct pci_dev *pdev)
pdev->no_vf_scan = 1;
pdev->dev.groups = zpci_attr_groups;
- pdev->dev.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -756,8 +766,6 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
if (zdev->dma_table)
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table), &status);
- else
- rc = zpci_dma_init_device(zdev);
if (rc) {
zpci_disable_device(zdev);
return rc;
@@ -865,11 +873,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
- if (zdev->dma_table) {
- rc = zpci_dma_exit_device(zdev);
- if (rc)
- return rc;
- }
if (zdev_enabled(zdev)) {
rc = zpci_disable_device(zdev);
if (rc)
@@ -918,8 +921,6 @@ void zpci_release_device(struct kref *kref)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
@@ -1109,10 +1110,6 @@ static int __init pci_base_init(void)
if (rc)
goto out_irq;
- rc = zpci_dma_init();
- if (rc)
- goto out_dma;
-
rc = clp_scan_pci_devices();
if (rc)
goto out_find;
@@ -1122,8 +1119,6 @@ static int __init pci_base_init(void)
return 0;
out_find:
- zpci_dma_exit();
-out_dma:
zpci_irq_exit();
out_irq:
zpci_mem_exit();
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 32245b970a0c..daa5d7450c7d 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -47,11 +47,6 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
rc = zpci_enable_device(zdev);
if (rc)
return rc;
- rc = zpci_dma_init_device(zdev);
- if (rc) {
- zpci_disable_device(zdev);
- return rc;
- }
}
if (!zdev->has_resources) {
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index ca6bd98eec13..6dde2263c79d 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = {
};
static char *pci_sw_names[] = {
- "Allocated pages",
"Mapped pages",
"Unmapped pages",
+ "Global RPCITs",
+ "Sync Map RPCITs",
+ "Sync RPCITs",
};
static void pci_fmb_show(struct seq_file *m, char *name[], int length,
@@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m)
{
- struct zpci_dev *zdev = m->private;
- atomic64_t *counter = &zdev->allocated_pages;
+ struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private);
+ atomic64_t *counter;
int i;
+ if (!ctrs)
+ return;
+
+ counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
deleted file mode 100644
index 99209085c75b..000000000000
--- a/arch/s390/pci/pci_dma.c
+++ /dev/null
@@ -1,746 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <[email protected]>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/iommu-helper.h>
-#include <linux/dma-map-ops.h>
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <asm/pci_dma.h>
-
-static struct kmem_cache *dma_region_table_cache;
-static struct kmem_cache *dma_page_table_cache;
-static int s390_iommu_strict;
-static u64 s390_iommu_aperture;
-static u32 s390_iommu_aperture_factor = 1;
-
-static int zpci_refresh_global(struct zpci_dev *zdev)
-{
- return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
- zdev->iommu_pages * PAGE_SIZE);
-}
-
-unsigned long *dma_alloc_cpu_table(gfp_t gfp)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_region_table_cache, gfp);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
- *entry = ZPCI_TABLE_INVALID;
- return table;
-}
-
-static void dma_free_cpu_table(void *table)
-{
- kmem_cache_free(dma_region_table_cache, table);
-}
-
-static unsigned long *dma_alloc_page_table(gfp_t gfp)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_page_table_cache, gfp);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
- *entry = ZPCI_PTE_INVALID;
- return table;
-}
-
-static void dma_free_page_table(void *table)
-{
- kmem_cache_free(dma_page_table_cache, table);
-}
-
-static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
-{
- unsigned long old_rte, rte;
- unsigned long *sto;
-
- rte = READ_ONCE(*rtep);
- if (reg_entry_isvalid(rte)) {
- sto = get_rt_sto(rte);
- } else {
- sto = dma_alloc_cpu_table(gfp);
- if (!sto)
- return NULL;
-
- set_rt_sto(&rte, virt_to_phys(sto));
- validate_rt_entry(&rte);
- entry_clr_protected(&rte);
-
- old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
- if (old_rte != ZPCI_TABLE_INVALID) {
- /* Somone else was faster, use theirs */
- dma_free_cpu_table(sto);
- sto = get_rt_sto(old_rte);
- }
- }
- return sto;
-}
-
-static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
-{
- unsigned long old_ste, ste;
- unsigned long *pto;
-
- ste = READ_ONCE(*step);
- if (reg_entry_isvalid(ste)) {
- pto = get_st_pto(ste);
- } else {
- pto = dma_alloc_page_table(gfp);
- if (!pto)
- return NULL;
- set_st_pto(&ste, virt_to_phys(pto));
- validate_st_entry(&ste);
- entry_clr_protected(&ste);
-
- old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
- if (old_ste != ZPCI_TABLE_INVALID) {
- /* Somone else was faster, use theirs */
- dma_free_page_table(pto);
- pto = get_st_pto(old_ste);
- }
- }
- return pto;
-}
-
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
- gfp_t gfp)
-{
- unsigned long *sto, *pto;
- unsigned int rtx, sx, px;
-
- rtx = calc_rtx(dma_addr);
- sto = dma_get_seg_table_origin(&rto[rtx], gfp);
- if (!sto)
- return NULL;
-
- sx = calc_sx(dma_addr);
- pto = dma_get_page_table_origin(&sto[sx], gfp);
- if (!pto)
- return NULL;
-
- px = calc_px(dma_addr);
- return &pto[px];
-}
-
-void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
-{
- unsigned long pte;
-
- pte = READ_ONCE(*ptep);
- if (flags & ZPCI_PTE_INVALID) {
- invalidate_pt_entry(&pte);
- } else {
- set_pt_pfaa(&pte, page_addr);
- validate_pt_entry(&pte);
- }
-
- if (flags & ZPCI_TABLE_PROTECTED)
- entry_set_protected(&pte);
- else
- entry_clr_protected(&pte);
-
- xchg(ptep, pte);
-}
-
-static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- phys_addr_t page_addr = (pa & PAGE_MASK);
- unsigned long *entry;
- int i, rc = 0;
-
- if (!nr_pages)
- return -EINVAL;
-
- if (!zdev->dma_table)
- return -EINVAL;
-
- for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
- GFP_ATOMIC);
- if (!entry) {
- rc = -ENOMEM;
- goto undo_cpu_trans;
- }
- dma_update_cpu_trans(entry, page_addr, flags);
- page_addr += PAGE_SIZE;
- dma_addr += PAGE_SIZE;
- }
-
-undo_cpu_trans:
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
- flags = ZPCI_PTE_INVALID;
- while (i-- > 0) {
- page_addr -= PAGE_SIZE;
- dma_addr -= PAGE_SIZE;
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
- GFP_ATOMIC);
- if (!entry)
- break;
- dma_update_cpu_trans(entry, page_addr, flags);
- }
- }
- return rc;
-}
-
-static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
- size_t size, int flags)
-{
- unsigned long irqflags;
- int ret;
-
- /*
- * With zdev->tlb_refresh == 0, rpcit is not required to establish new
- * translations when previously invalid translation-table entries are
- * validated. With lazy unmap, rpcit is skipped for previously valid
- * entries, but a global rpcit is then required before any address can
- * be re-used, i.e. after each iommu bitmap wrap-around.
- */
- if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
- if (!zdev->tlb_refresh)
- return 0;
- } else {
- if (!s390_iommu_strict)
- return 0;
- }
-
- ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
- PAGE_ALIGN(size));
- if (ret == -ENOMEM && !s390_iommu_strict) {
- /* enable the hypervisor to free some resources */
- if (zpci_refresh_global(zdev))
- goto out;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
- ret = 0;
- }
-out:
- return ret;
-}
-
-static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- int rc;
-
- rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (rc)
- return rc;
-
- rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
- __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
-
- return rc;
-}
-
-void dma_free_seg_table(unsigned long entry)
-{
- unsigned long *sto = get_rt_sto(entry);
- int sx;
-
- for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
- if (reg_entry_isvalid(sto[sx]))
- dma_free_page_table(get_st_pto(sto[sx]));
-
- dma_free_cpu_table(sto);
-}
-
-void dma_cleanup_tables(unsigned long *table)
-{
- int rtx;
-
- if (!table)
- return;
-
- for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
- if (reg_entry_isvalid(table[rtx]))
- dma_free_seg_table(table[rtx]);
-
- dma_free_cpu_table(table);
-}
-
-static unsigned long __dma_alloc_iommu(struct device *dev,
- unsigned long start, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
- start, size, zdev->start_dma >> PAGE_SHIFT,
- dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
- 0);
-}
-
-static dma_addr_t dma_alloc_address(struct device *dev, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long offset, flags;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
- if (offset == -1) {
- if (!s390_iommu_strict) {
- /* global flush before DMA addresses are reused */
- if (zpci_refresh_global(zdev))
- goto out_error;
-
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- }
- /* wrap-around */
- offset = __dma_alloc_iommu(dev, 0, size);
- if (offset == -1)
- goto out_error;
- }
- zdev->next_bit = offset + size;
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-
- return zdev->start_dma + offset * PAGE_SIZE;
-
-out_error:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
- return DMA_MAPPING_ERROR;
-}
-
-static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long flags, offset;
-
- offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- if (!zdev->iommu_bitmap)
- goto out;
-
- if (s390_iommu_strict)
- bitmap_clear(zdev->iommu_bitmap, offset, size);
- else
- bitmap_set(zdev->lazy_bitmap, offset, size);
-
-out:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-}
-
-static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
-{
- struct {
- unsigned long rc;
- unsigned long addr;
- } __packed data = {rc, addr};
-
- zpci_err_hex(&data, sizeof(data));
-}
-
-static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long pa = page_to_phys(page) + offset;
- int flags = ZPCI_PTE_VALID;
- unsigned long nr_pages;
- dma_addr_t dma_addr;
- int ret;
-
- /* This rounds up number of pages based on size and offset */
- nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
- dma_addr = dma_alloc_address(dev, nr_pages);
- if (dma_addr == DMA_MAPPING_ERROR) {
- ret = -ENOSPC;
- goto out_err;
- }
-
- /* Use rounded up size */
- size = nr_pages * PAGE_SIZE;
-
- if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (ret)
- goto out_free;
-
- atomic64_add(nr_pages, &zdev->mapped_pages);
- return dma_addr + (offset & ~PAGE_MASK);
-
-out_free:
- dma_free_address(dev, dma_addr, nr_pages);
-out_err:
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return DMA_MAPPING_ERROR;
-}
-
-static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- int npages, ret;
-
- npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
- dma_addr = dma_addr & PAGE_MASK;
- ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_PTE_INVALID);
- if (ret) {
- zpci_err("unmap error:\n");
- zpci_err_dma(ret, dma_addr);
- return;
- }
-
- atomic64_add(npages, &zdev->unmapped_pages);
- dma_free_address(dev, dma_addr, npages);
-}
-
-static void *s390_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- struct page *page;
- phys_addr_t pa;
- dma_addr_t map;
-
- size = PAGE_ALIGN(size);
- page = alloc_pages(flag | __GFP_ZERO, get_order(size));
- if (!page)
- return NULL;
-
- pa = page_to_phys(page);
- map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
- if (dma_mapping_error(dev, map)) {
- __free_pages(page, get_order(size));
- return NULL;
- }
-
- atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
- if (dma_handle)
- *dma_handle = map;
- return phys_to_virt(pa);
-}
-
-static void s390_dma_free(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- size = PAGE_ALIGN(size);
- atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
- s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
- free_pages((unsigned long)vaddr, get_order(size));
-}
-
-/* Map a segment into a contiguous dma address area */
-static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- size_t size, dma_addr_t *handle,
- enum dma_data_direction dir)
-{
- unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- dma_addr_t dma_addr_base, dma_addr;
- int flags = ZPCI_PTE_VALID;
- struct scatterlist *s;
- phys_addr_t pa = 0;
- int ret;
-
- dma_addr_base = dma_alloc_address(dev, nr_pages);
- if (dma_addr_base == DMA_MAPPING_ERROR)
- return -ENOMEM;
-
- dma_addr = dma_addr_base;
- if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
- pa = page_to_phys(sg_page(s));
- ret = __dma_update_trans(zdev, pa, dma_addr,
- s->offset + s->length, flags);
- if (ret)
- goto unmap;
-
- dma_addr += s->offset + s->length;
- }
- ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
- if (ret)
- goto unmap;
-
- *handle = dma_addr_base;
- atomic64_add(nr_pages, &zdev->mapped_pages);
-
- return ret;
-
-unmap:
- dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
- ZPCI_PTE_INVALID);
- dma_free_address(dev, dma_addr_base, nr_pages);
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return ret;
-}
-
-static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s = sg, *start = sg, *dma = sg;
- unsigned int max = dma_get_max_seg_size(dev);
- unsigned int size = s->offset + s->length;
- unsigned int offset = s->offset;
- int count = 0, i, ret;
-
- for (i = 1; i < nr_elements; i++) {
- s = sg_next(s);
-
- s->dma_length = 0;
-
- if (s->offset || (size & ~PAGE_MASK) ||
- size + s->length > max) {
- ret = __s390_dma_map_sg(dev, start, size,
- &dma->dma_address, dir);
- if (ret)
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- size = offset = s->offset;
- start = s;
- dma = sg_next(dma);
- count++;
- }
- size += s->length;
- }
- ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
- if (ret)
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- return count + 1;
-unmap:
- for_each_sg(sg, s, count, i)
- s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
- dir, attrs);
-
- return ret;
-}
-
-static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nr_elements, i) {
- if (s->dma_length)
- s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
- dir, attrs);
- s->dma_address = 0;
- s->dma_length = 0;
- }
-}
-
-static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags)
-{
- size_t n = BITS_TO_LONGS(bits);
- size_t bytes;
-
- if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes)))
- return NULL;
-
- return vzalloc(bytes);
-}
-
-int zpci_dma_init_device(struct zpci_dev *zdev)
-{
- u8 status;
- int rc;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
-
- spin_lock_init(&zdev->iommu_bitmap_lock);
-
- zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
- if (!zdev->dma_table) {
- rc = -ENOMEM;
- goto out;
- }
-
- /*
- * Restrict the iommu bitmap size to the minimum of the following:
- * - s390_iommu_aperture which defaults to high_memory
- * - 3-level pagetable address limit minus start_dma offset
- * - DMA address range allowed by the hardware (clp query pci fn)
- *
- * Also set zdev->end_dma to the actual end address of the usable
- * range, instead of the theoretical maximum as reported by hardware.
- *
- * This limits the number of concurrently usable DMA mappings since
- * for each DMA mapped memory address we need a DMA address including
- * extra DMA addresses for multiple mappings of the same memory address.
- */
- zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
- zdev->iommu_size = min3(s390_iommu_aperture,
- ZPCI_TABLE_SIZE_RT - zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
- zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
- zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
- if (!zdev->iommu_bitmap) {
- rc = -ENOMEM;
- goto free_dma_table;
- }
- if (!s390_iommu_strict) {
- zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
- if (!zdev->lazy_bitmap) {
- rc = -ENOMEM;
- goto free_bitmap;
- }
-
- }
- if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status)) {
- rc = -EIO;
- goto free_bitmap;
- }
-
- return 0;
-free_bitmap:
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
-free_dma_table:
- dma_free_cpu_table(zdev->dma_table);
- zdev->dma_table = NULL;
-out:
- return rc;
-}
-
-int zpci_dma_exit_device(struct zpci_dev *zdev)
-{
- int cc = 0;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
- if (zdev_enabled(zdev))
- cc = zpci_unregister_ioat(zdev, 0);
- /*
- * cc == 3 indicates the function is gone already. This can happen
- * if the function was deconfigured/disabled suddenly and we have not
- * received a new handle yet.
- */
- if (cc && cc != 3)
- return -EIO;
-
- dma_cleanup_tables(zdev->dma_table);
- zdev->dma_table = NULL;
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
- zdev->next_bit = 0;
- return 0;
-}
-
-static int __init dma_alloc_cpu_table_caches(void)
-{
- dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
- ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
- 0, NULL);
- if (!dma_region_table_cache)
- return -ENOMEM;
-
- dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
- ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
- 0, NULL);
- if (!dma_page_table_cache) {
- kmem_cache_destroy(dma_region_table_cache);
- return -ENOMEM;
- }
- return 0;
-}
-
-int __init zpci_dma_init(void)
-{
- s390_iommu_aperture = (u64)virt_to_phys(high_memory);
- if (!s390_iommu_aperture_factor)
- s390_iommu_aperture = ULONG_MAX;
- else
- s390_iommu_aperture *= s390_iommu_aperture_factor;
-
- return dma_alloc_cpu_table_caches();
-}
-
-void zpci_dma_exit(void)
-{
- kmem_cache_destroy(dma_page_table_cache);
- kmem_cache_destroy(dma_region_table_cache);
-}
-
-const struct dma_map_ops s390_pci_dma_ops = {
- .alloc = s390_dma_alloc,
- .free = s390_dma_free,
- .map_sg = s390_dma_map_sg,
- .unmap_sg = s390_dma_unmap_sg,
- .map_page = s390_dma_map_pages,
- .unmap_page = s390_dma_unmap_pages,
- .mmap = dma_common_mmap,
- .get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
- .free_pages = dma_common_free_pages,
- /* dma_supported is unconditionally true without a callback */
-};
-EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
-
-static int __init s390_iommu_setup(char *str)
-{
- if (!strcmp(str, "strict"))
- s390_iommu_strict = 1;
- return 1;
-}
-
-__setup("s390_iommu=", s390_iommu_setup);
-
-static int __init s390_iommu_aperture_setup(char *str)
-{
- if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
- s390_iommu_aperture_factor = 1;
- return 1;
-}
-
-__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b9324ca2eb94..4d9773ef9e0a 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -59,9 +59,16 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
}
}
-static bool is_passed_through(struct zpci_dev *zdev)
+static bool is_passed_through(struct pci_dev *pdev)
{
- return zdev->s390_domain;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ bool ret;
+
+ mutex_lock(&zdev->kzdev_lock);
+ ret = !!zdev->kzdev;
+ mutex_unlock(&zdev->kzdev_lock);
+
+ return ret;
}
static bool is_driver_supported(struct pci_driver *driver)
@@ -176,7 +183,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
}
pdev->error_state = pci_channel_io_frozen;
- if (is_passed_through(to_zpci(pdev))) {
+ if (is_passed_through(pdev)) {
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
pci_name(pdev));
goto out_unlock;
@@ -239,7 +246,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
* we will inject the error event and let the guest recover the device
* itself.
*/
- if (is_passed_through(to_zpci(pdev)))
+ if (is_passed_through(pdev))
goto out;
driver = to_pci_driver(pdev->dev.driver);
if (driver && driver->err_handler && driver->err_handler->error_detected)
@@ -306,8 +313,6 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
/* Even though the device is already gone we still
* need to free zPCI resources as part of the disable.
*/
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index cae280e5c047..8a7abac51816 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -56,6 +56,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
struct pci_dev *pdev = to_pci_dev(dev);
struct zpci_dev *zdev = to_zpci(pdev);
int ret = 0;
+ u8 status;
/* Can't use device_remove_self() here as that would lead us to lock
* the pci_rescan_remove_lock while holding the device' kernfs lock.
@@ -82,12 +83,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
pci_lock_rescan_remove();
if (pci_dev_is_added(pdev)) {
pci_stop_and_remove_bus_device(pdev);
- if (zdev->dma_table) {
- ret = zpci_dma_exit_device(zdev);
- if (ret)
- goto out;
- }
-
if (zdev_enabled(zdev)) {
ret = zpci_disable_device(zdev);
/*
@@ -105,14 +100,16 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
ret = zpci_enable_device(zdev);
if (ret)
goto out;
- ret = zpci_dma_init_device(zdev);
- if (ret) {
- zpci_disable_device(zdev);
- goto out;
+
+ if (zdev->dma_table) {
+ ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table), &status);
+ if (ret)
+ zpci_disable_device(zdev);
}
- pci_rescan_bus(zdev->zbus->bus);
}
out:
+ pci_rescan_bus(zdev->zbus->bus);
pci_unlock_rescan_remove();
if (kn)
sysfs_unbreak_active_protection(kn);
diff --git a/drivers/acpi/riscv/rhct.c b/drivers/acpi/riscv/rhct.c
index b280b3e9c7d9..caa2c16e1697 100644
--- a/drivers/acpi/riscv/rhct.c
+++ b/drivers/acpi/riscv/rhct.c
@@ -8,8 +8,9 @@
#define pr_fmt(fmt) "ACPI: RHCT: " fmt
#include <linux/acpi.h>
+#include <linux/bits.h>
-static struct acpi_table_header *acpi_get_rhct(void)
+static struct acpi_table_rhct *acpi_get_rhct(void)
{
static struct acpi_table_header *rhct;
acpi_status status;
@@ -26,7 +27,7 @@ static struct acpi_table_header *acpi_get_rhct(void)
}
}
- return rhct;
+ return (struct acpi_table_rhct *)rhct;
}
/*
@@ -48,7 +49,7 @@ int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const
BUG_ON(acpi_disabled);
if (!table) {
- rhct = (struct acpi_table_rhct *)acpi_get_rhct();
+ rhct = acpi_get_rhct();
if (!rhct)
return -ENOENT;
} else {
@@ -81,3 +82,89 @@ int acpi_get_riscv_isa(struct acpi_table_header *table, unsigned int cpu, const
return -1;
}
+
+static void acpi_parse_hart_info_cmo_node(struct acpi_table_rhct *rhct,
+ struct acpi_rhct_hart_info *hart_info,
+ u32 *cbom_size, u32 *cboz_size, u32 *cbop_size)
+{
+ u32 size_hartinfo = sizeof(struct acpi_rhct_hart_info);
+ u32 size_hdr = sizeof(struct acpi_rhct_node_header);
+ struct acpi_rhct_node_header *ref_node;
+ struct acpi_rhct_cmo_node *cmo_node;
+ u32 *hart_info_node_offset;
+
+ hart_info_node_offset = ACPI_ADD_PTR(u32, hart_info, size_hartinfo);
+ for (int i = 0; i < hart_info->num_offsets; i++) {
+ ref_node = ACPI_ADD_PTR(struct acpi_rhct_node_header,
+ rhct, hart_info_node_offset[i]);
+ if (ref_node->type == ACPI_RHCT_NODE_TYPE_CMO) {
+ cmo_node = ACPI_ADD_PTR(struct acpi_rhct_cmo_node,
+ ref_node, size_hdr);
+ if (cbom_size && cmo_node->cbom_size <= 30) {
+ if (!*cbom_size)
+ *cbom_size = BIT(cmo_node->cbom_size);
+ else if (*cbom_size != BIT(cmo_node->cbom_size))
+ pr_warn("CBOM size is not the same across harts\n");
+ }
+
+ if (cboz_size && cmo_node->cboz_size <= 30) {
+ if (!*cboz_size)
+ *cboz_size = BIT(cmo_node->cboz_size);
+ else if (*cboz_size != BIT(cmo_node->cboz_size))
+ pr_warn("CBOZ size is not the same across harts\n");
+ }
+
+ if (cbop_size && cmo_node->cbop_size <= 30) {
+ if (!*cbop_size)
+ *cbop_size = BIT(cmo_node->cbop_size);
+ else if (*cbop_size != BIT(cmo_node->cbop_size))
+ pr_warn("CBOP size is not the same across harts\n");
+ }
+ }
+ }
+}
+
+/*
+ * During early boot, the caller should call acpi_get_table() and pass its pointer to
+ * these functions (and free up later). At run time, since this table can be used
+ * multiple times, pass NULL so that the table remains in memory.
+ */
+void acpi_get_cbo_block_size(struct acpi_table_header *table, u32 *cbom_size,
+ u32 *cboz_size, u32 *cbop_size)
+{
+ u32 size_hdr = sizeof(struct acpi_rhct_node_header);
+ struct acpi_rhct_node_header *node, *end;
+ struct acpi_rhct_hart_info *hart_info;
+ struct acpi_table_rhct *rhct;
+
+ if (acpi_disabled)
+ return;
+
+ if (table) {
+ rhct = (struct acpi_table_rhct *)table;
+ } else {
+ rhct = acpi_get_rhct();
+ if (!rhct)
+ return;
+ }
+
+ if (cbom_size)
+ *cbom_size = 0;
+
+ if (cboz_size)
+ *cboz_size = 0;
+
+ if (cbop_size)
+ *cbop_size = 0;
+
+ end = ACPI_ADD_PTR(struct acpi_rhct_node_header, rhct, rhct->header.length);
+ for (node = ACPI_ADD_PTR(struct acpi_rhct_node_header, rhct, rhct->node_offset);
+ node < end;
+ node = ACPI_ADD_PTR(struct acpi_rhct_node_header, node, node->length)) {
+ if (node->type == ACPI_RHCT_NODE_TYPE_HART_INFO) {
+ hart_info = ACPI_ADD_PTR(struct acpi_rhct_hart_info, node, size_hdr);
+ acpi_parse_hart_info_cmo_node(rhct, hart_info, cbom_size,
+ cboz_size, cbop_size);
+ }
+ }
+}
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 50198657230e..06f5bad3c3e0 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -22,6 +22,7 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/interrupt.h>
#include <linux/of_irq.h>
+#include <linux/limits.h>
#include <clocksource/timer-riscv.h>
#include <asm/smp.h>
#include <asm/hwcap.h>
@@ -31,12 +32,22 @@
static DEFINE_STATIC_KEY_FALSE(riscv_sstc_available);
static bool riscv_timer_cannot_wake_cpu;
+static void riscv_clock_event_stop(void)
+{
+ if (static_branch_likely(&riscv_sstc_available)) {
+ csr_write(CSR_STIMECMP, ULONG_MAX);
+ if (IS_ENABLED(CONFIG_32BIT))
+ csr_write(CSR_STIMECMPH, ULONG_MAX);
+ } else {
+ sbi_set_timer(U64_MAX);
+ }
+}
+
static int riscv_clock_next_event(unsigned long delta,
struct clock_event_device *ce)
{
u64 next_tval = get_cycles64() + delta;
- csr_set(CSR_IE, IE_TIE);
if (static_branch_likely(&riscv_sstc_available)) {
#if defined(CONFIG_32BIT)
csr_write(CSR_STIMECMP, next_tval & 0xFFFFFFFF);
@@ -94,6 +105,8 @@ static int riscv_timer_starting_cpu(unsigned int cpu)
ce->irq = riscv_clock_event_irq;
if (riscv_timer_cannot_wake_cpu)
ce->features |= CLOCK_EVT_FEAT_C3STOP;
+ if (static_branch_likely(&riscv_sstc_available))
+ ce->rating = 450;
clockevents_config_and_register(ce, riscv_timebase, 100, 0x7fffffff);
enable_percpu_irq(riscv_clock_event_irq,
@@ -119,7 +132,7 @@ static irqreturn_t riscv_timer_interrupt(int irq, void *dev_id)
{
struct clock_event_device *evdev = this_cpu_ptr(&riscv_clock_event);
- csr_clear(CSR_IE, IE_TIE);
+ riscv_clock_event_stop();
evdev->event_handler(evdev);
return IRQ_HANDLED;
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index 8342bfc2d3f9..a13f3c18ccd4 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -757,7 +757,6 @@ static const struct pwm_ops mvebu_pwm_ops = {
.free = mvebu_pwm_free,
.get_state = mvebu_pwm_get_state,
.apply = mvebu_pwm_apply,
- .owner = THIS_MODULE,
};
static void __maybe_unused mvebu_pwm_suspend(struct mvebu_gpio_chip *mvchip)
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 84148a79414b..c45c07840f64 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -1580,7 +1580,6 @@ static const struct pwm_ops ti_sn_pwm_ops = {
.free = ti_sn_pwm_free,
.apply = ti_sn_pwm_apply,
.get_state = ti_sn_pwm_get_state,
- .owner = THIS_MODULE,
};
static int ti_sn_pwm_probe(struct auxiliary_device *adev,
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index ee9e2a2edbf5..7673bb82945b 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -95,7 +95,7 @@ config IOMMU_DEBUGFS
choice
prompt "IOMMU default domain type"
depends on IOMMU_API
- default IOMMU_DEFAULT_DMA_LAZY if X86
+ default IOMMU_DEFAULT_DMA_LAZY if X86 || S390
default IOMMU_DEFAULT_DMA_STRICT
help
Choose the type of IOMMU domain used to manage DMA API usage by
@@ -150,7 +150,7 @@ config OF_IOMMU
# IOMMU-agnostic DMA-mapping layer
config IOMMU_DMA
- def_bool ARM64 || X86
+ def_bool ARM64 || X86 || S390
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
@@ -240,17 +240,6 @@ config SUN50I_IOMMU
help
Support for the IOMMU introduced in the Allwinner H6 SoCs.
-config TEGRA_IOMMU_GART
- bool "Tegra GART IOMMU Support"
- depends on ARCH_TEGRA_2x_SOC
- depends on TEGRA_MC
- select IOMMU_API
- help
- Enables support for remapping discontiguous physical memory
- shared with the operating system into contiguous I/O virtual
- space through the GART (Graphics Address Relocation Table)
- hardware included on Tegra SoCs.
-
config TEGRA_IOMMU_SMMU
bool "NVIDIA Tegra SMMU Support"
depends on ARCH_TEGRA
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 769e43d780ce..95ad9dbfbda0 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -20,7 +20,6 @@ obj-$(CONFIG_OMAP_IOMMU) += omap-iommu.o
obj-$(CONFIG_OMAP_IOMMU_DEBUG) += omap-iommu-debug.o
obj-$(CONFIG_ROCKCHIP_IOMMU) += rockchip-iommu.o
obj-$(CONFIG_SUN50I_IOMMU) += sun50i-iommu.o
-obj-$(CONFIG_TEGRA_IOMMU_GART) += tegra-gart.o
obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 8bd4c3b183ec..443b2c13c37b 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -23,15 +23,6 @@ config AMD_IOMMU
your BIOS for an option to enable it or if you have an IVRS ACPI
table.
-config AMD_IOMMU_V2
- tristate "AMD IOMMU Version 2 driver"
- depends on AMD_IOMMU
- select MMU_NOTIFIER
- help
- This option enables support for the AMD IOMMUv2 features of the IOMMU
- hardware. Select this option if you want to use devices that support
- the PCI PRI and PASID interface.
-
config AMD_IOMMU_DEBUGFS
bool "Enable AMD IOMMU internals in DebugFS"
depends on AMD_IOMMU && IOMMU_DEBUGFS
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 773d8aa00283..f454fbb1569e 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
-obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index e2857109e966..86be1edd50ee 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -38,9 +38,6 @@ extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
-/* IOMMUv2 specific functions */
-struct iommu_domain;
-
bool amd_iommu_v2_supported(void);
struct amd_iommu *get_amd_iommu(unsigned int idx);
u8 amd_iommu_pc_get_max_banks(unsigned int idx);
@@ -51,10 +48,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
-int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
-int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
-void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+/* Device capabilities */
+int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev);
+void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev);
+
int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
void amd_iommu_domain_update(struct protection_domain *domain);
@@ -87,9 +84,25 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
(pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
}
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask)
+static inline bool check_feature(u64 mask)
+{
+ return (amd_iommu_efr & mask);
+}
+
+static inline bool check_feature2(u64 mask)
+{
+ return (amd_iommu_efr2 & mask);
+}
+
+static inline int check_feature_gpt_level(void)
+{
+ return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
+}
+
+static inline bool amd_iommu_gt_ppr_supported(void)
{
- return !!(iommu->features & mask);
+ return (check_feature(FEATURE_GT) &&
+ check_feature(FEATURE_PPR));
}
static inline u64 iommu_virt_to_phys(void *vaddr)
@@ -105,7 +118,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
static inline
void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
{
- atomic64_set(&domain->iop.pt_root, root);
domain->iop.root = (u64 *)(root & PAGE_MASK);
domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
}
@@ -146,8 +158,5 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
-extern u64 amd_iommu_efr;
-extern u64 amd_iommu_efr2;
-
extern bool amd_iommu_snp_en;
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index dec4e5c2b66b..90b7d7950a9e 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -462,6 +462,10 @@
#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */
#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */
+/* Timeout stuff */
+#define LOOP_TIMEOUT 100000
+#define MMIO_STATUS_TIMEOUT 2000000
+
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
do { \
@@ -516,19 +520,6 @@ extern struct kmem_cache *amd_iommu_irq_cache;
#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
-/*
- * This struct is used to pass information about
- * incoming PPR faults around.
- */
-struct amd_iommu_fault {
- u64 address; /* IO virtual address of the fault*/
- u32 pasid; /* Address space identifier */
- u32 sbdf; /* Originating PCI device id */
- u16 tag; /* PPR tag */
- u16 flags; /* Fault flags */
-
-};
-
struct amd_iommu;
struct iommu_domain;
@@ -555,7 +546,6 @@ struct amd_io_pgtable {
struct io_pgtable iop;
int mode;
u64 *root;
- atomic64_t pt_root; /* pgtable root and pgtable mode */
u64 *pgd; /* v2 pgtable pgd pointer */
};
@@ -688,9 +678,6 @@ struct amd_iommu {
/* Extended features 2 */
u64 features2;
- /* IOMMUv2 */
- bool is_iommu_v2;
-
/* PCI device id of the IOMMU device */
u16 devid;
@@ -811,6 +798,14 @@ struct devid_map {
bool cmd_line;
};
+#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */
+/* Device may request execution on memory pages */
+#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8
+/* Device may request super-user privileges */
+#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10
+
/*
* This struct contains device specific data for the IOMMU
*/
@@ -823,13 +818,15 @@ struct iommu_dev_data {
struct protection_domain *domain; /* Domain the device is bound to */
struct device *dev;
u16 devid; /* PCI Device ID */
- bool iommu_v2; /* Device can make use of IOMMUv2 */
- struct {
- bool enabled;
- int qdep;
- } ats; /* ATS state */
- bool pri_tlp; /* PASID TLB required for
+
+ u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */
+ int ats_qdep;
+ u8 ats_enabled :1; /* ATS state */
+ u8 pri_enabled :1; /* PRI state */
+ u8 pasid_enabled:1; /* PASID state */
+ u8 pri_tlp :1; /* PASID TLB required for
PPR completions */
+ u8 ppr :1; /* Enable device PPR support */
bool use_vapic; /* Enable device to use vapic mode */
bool defer_attach;
@@ -896,16 +893,15 @@ extern unsigned amd_iommu_aperture_order;
/* allocation bitmap for domain ids */
extern unsigned long *amd_iommu_pd_alloc_bitmap;
-/* Smallest max PASID supported by any IOMMU in the system */
-extern u32 amd_iommu_max_pasid;
-
-extern bool amd_iommu_v2_present;
-
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
extern int amd_iommu_max_glx_val;
+/* Global EFR and EFR2 registers */
+extern u64 amd_iommu_efr;
+extern u64 amd_iommu_efr2;
+
/*
* This function flushes all internal caches of
* the IOMMU used by this driver.
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 45efb7e5d725..64bcf3df37ee 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -83,8 +83,6 @@
#define ACPI_DEVFLAG_LINT1 0x80
#define ACPI_DEVFLAG_ATSDIS 0x10000000
-#define LOOP_TIMEOUT 2000000
-
#define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
| ((dev & 0x1f) << 3) | (fn & 0x7))
@@ -187,9 +185,6 @@ static int amd_iommus_present;
bool amd_iommu_np_cache __read_mostly;
bool amd_iommu_iotlb_sup __read_mostly = true;
-u32 amd_iommu_max_pasid __read_mostly = ~0;
-
-bool amd_iommu_v2_present __read_mostly;
static bool amd_iommu_pc_present __read_mostly;
bool amdr_ivrs_remap_support __read_mostly;
@@ -272,7 +267,7 @@ int amd_iommu_get_num_iommus(void)
* Iterate through all the IOMMUs to get common EFR
* masks among all IOMMUs and warn if found inconsistency.
*/
-static void get_global_efr(void)
+static __init void get_global_efr(void)
{
struct amd_iommu *iommu;
@@ -304,16 +299,6 @@ static void get_global_efr(void)
pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
}
-static bool check_feature_on_all_iommus(u64 mask)
-{
- return !!(amd_iommu_efr & mask);
-}
-
-static inline int check_feature_gpt_level(void)
-{
- return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
-}
-
/*
* For IVHD type 0x11/0x40, EFR is also available via IVHD.
* Default to IVHD EFR since it is available sooner
@@ -399,7 +384,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu)
u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
u64 entry = start & PM_ADDR_MASK;
- if (!check_feature_on_all_iommus(FEATURE_SNP))
+ if (!check_feature(FEATURE_SNP))
return;
/* Note:
@@ -869,7 +854,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
void *buf = (void *)__get_free_pages(gfp, order);
if (buf &&
- check_feature_on_all_iommus(FEATURE_SNP) &&
+ check_feature(FEATURE_SNP) &&
set_memory_4k((unsigned long)buf, (1 << order))) {
free_pages((unsigned long)buf, order);
buf = NULL;
@@ -985,14 +970,14 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
iommu_feature_enable(iommu, CONTROL_GAINT_EN);
iommu_feature_enable(iommu, CONTROL_GALOG_EN);
- for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
if (status & (MMIO_STATUS_GALOG_RUN_MASK))
break;
udelay(10);
}
- if (WARN_ON(i >= LOOP_TIMEOUT))
+ if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
return -EINVAL;
return 0;
@@ -1048,7 +1033,7 @@ static void iommu_enable_xt(struct amd_iommu *iommu)
static void iommu_enable_gt(struct amd_iommu *iommu)
{
- if (!iommu_feature(iommu, FEATURE_GT))
+ if (!check_feature(FEATURE_GT))
return;
iommu_feature_enable(iommu, CONTROL_GT_EN);
@@ -1987,7 +1972,7 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
u64 val;
struct pci_dev *pdev = iommu->dev;
- if (!iommu_feature(iommu, FEATURE_PC))
+ if (!check_feature(FEATURE_PC))
return;
amd_iommu_pc_present = true;
@@ -2014,8 +1999,7 @@ static ssize_t amd_iommu_show_features(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct amd_iommu *iommu = dev_to_amd_iommu(dev);
- return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features);
+ return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
}
static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
@@ -2051,9 +2035,9 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
- if (!iommu->features) {
- iommu->features = features;
- iommu->features2 = features2;
+ if (!amd_iommu_efr) {
+ amd_iommu_efr = features;
+ amd_iommu_efr2 = features2;
return;
}
@@ -2061,12 +2045,12 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
* Sanity check and warn if EFR values from
* IVHD and MMIO conflict.
*/
- if (features != iommu->features ||
- features2 != iommu->features2) {
+ if (features != amd_iommu_efr ||
+ features2 != amd_iommu_efr2) {
pr_warn(FW_WARN
"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
- features, iommu->features,
- features2, iommu->features2);
+ features, amd_iommu_efr,
+ features2, amd_iommu_efr2);
}
}
@@ -2092,20 +2076,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
late_iommu_features_init(iommu);
- if (iommu_feature(iommu, FEATURE_GT)) {
+ if (check_feature(FEATURE_GT)) {
int glxval;
- u32 max_pasid;
u64 pasmax;
- pasmax = iommu->features & FEATURE_PASID_MASK;
+ pasmax = amd_iommu_efr & FEATURE_PASID_MASK;
pasmax >>= FEATURE_PASID_SHIFT;
- max_pasid = (1 << (pasmax + 1)) - 1;
+ iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
- amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
+ BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
- BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
-
- glxval = iommu->features & FEATURE_GLXVAL_MASK;
+ glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK;
glxval >>= FEATURE_GLXVAL_SHIFT;
if (amd_iommu_max_glx_val == -1)
@@ -2114,13 +2095,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
}
- if (iommu_feature(iommu, FEATURE_GT) &&
- iommu_feature(iommu, FEATURE_PPR)) {
- iommu->is_iommu_v2 = true;
- amd_iommu_v2_present = true;
- }
-
- if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
+ if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu))
return -ENOMEM;
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
@@ -2132,13 +2107,10 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
init_iommu_perf_ctr(iommu);
if (amd_iommu_pgtable == AMD_IOMMU_V2) {
- if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
- !iommu_feature(iommu, FEATURE_GT)) {
+ if (!check_feature(FEATURE_GIOSUP) ||
+ !check_feature(FEATURE_GT)) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
amd_iommu_pgtable = AMD_IOMMU_V1;
- } else if (iommu_default_passthrough()) {
- pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
}
}
@@ -2186,35 +2158,29 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
static void print_iommu_info(void)
{
+ int i;
static const char * const feat_str[] = {
"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
"IA", "GA", "HE", "PC"
};
- struct amd_iommu *iommu;
-
- for_each_iommu(iommu) {
- struct pci_dev *pdev = iommu->dev;
- int i;
- pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
+ if (amd_iommu_efr) {
+ pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
- if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
-
- for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
- if (iommu_feature(iommu, (1ULL << i)))
- pr_cont(" %s", feat_str[i]);
- }
+ for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
+ if (check_feature(1ULL << i))
+ pr_cont(" %s", feat_str[i]);
+ }
- if (iommu->features & FEATURE_GAM_VAPIC)
- pr_cont(" GA_vAPIC");
+ if (check_feature(FEATURE_GAM_VAPIC))
+ pr_cont(" GA_vAPIC");
- if (iommu->features & FEATURE_SNP)
- pr_cont(" SNP");
+ if (check_feature(FEATURE_SNP))
+ pr_cont(" SNP");
- pr_cont("\n");
- }
+ pr_cont("\n");
}
+
if (irq_remapping_enabled) {
pr_info("Interrupt remapping enabled\n");
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
@@ -2900,19 +2866,19 @@ static void enable_iommus_vapic(void)
* Need to set and poll check the GALOGRun bit to zero before
* we can set/ modify GA Log registers safely.
*/
- for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
break;
udelay(10);
}
- if (WARN_ON(i >= LOOP_TIMEOUT))
+ if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
return;
}
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
- !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
+ !check_feature(FEATURE_GAM_VAPIC)) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
return;
}
@@ -3698,9 +3664,8 @@ bool amd_iommu_v2_supported(void)
* (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
* setting up IOMMUv1 page table.
*/
- return amd_iommu_v2_present && !amd_iommu_snp_en;
+ return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
}
-EXPORT_SYMBOL(amd_iommu_v2_supported);
struct amd_iommu *get_amd_iommu(unsigned int idx)
{
@@ -3824,7 +3789,7 @@ int amd_iommu_snp_enable(void)
return -EINVAL;
}
- amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
+ amd_iommu_snp_en = check_feature(FEATURE_SNP);
if (!amd_iommu_snp_en)
return -EINVAL;
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index e9ef2e0a62f6..f818a7e254d4 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -363,10 +363,10 @@ static void v2_free_pgtable(struct io_pgtable *iop)
if (!(pdom->flags & PD_IOMMUV2_MASK))
return;
- /*
- * Make changes visible to IOMMUs. No need to clear gcr3 entry
- * as gcr3 table is already freed.
- */
+ /* Clear gcr3 entry */
+ amd_iommu_domain_clear_gcr3(&pdom->domain, 0);
+
+ /* Make changes visible to IOMMUs */
amd_iommu_domain_update(pdom);
/* Free page table */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b399c5741378..fcc987f5d4ed 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -45,8 +45,6 @@
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-#define LOOP_TIMEOUT 100000
-
/* IO virtual address start page frame number */
#define IOVA_START_PFN (1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
@@ -68,7 +66,6 @@ LIST_HEAD(acpihid_map);
const struct iommu_ops amd_iommu_ops;
const struct iommu_dirty_ops amd_dirty_ops;
-static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
/*
@@ -81,7 +78,6 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
static void detach_device(struct device *dev);
-static int domain_enable_v2(struct protection_domain *domain, int pasids);
/****************************************************************************
*
@@ -324,24 +320,141 @@ static struct iommu_group *acpihid_device_group(struct device *dev)
return entry->group;
}
-static bool pci_iommuv2_capable(struct pci_dev *pdev)
+static inline bool pdev_pasid_supported(struct iommu_dev_data *dev_data)
{
- static const int caps[] = {
- PCI_EXT_CAP_ID_PRI,
- PCI_EXT_CAP_ID_PASID,
- };
- int i, pos;
+ return (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP);
+}
- if (!pci_ats_supported(pdev))
- return false;
+static u32 pdev_get_caps(struct pci_dev *pdev)
+{
+ int features;
+ u32 flags = 0;
+
+ if (pci_ats_supported(pdev))
+ flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
+
+ if (pci_pri_supported(pdev))
+ flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
+
+ features = pci_pasid_features(pdev);
+ if (features >= 0) {
+ flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+
+ if (features & PCI_PASID_CAP_EXEC)
+ flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
- for (i = 0; i < 2; ++i) {
- pos = pci_find_ext_capability(pdev, caps[i]);
- if (pos == 0)
- return false;
+ if (features & PCI_PASID_CAP_PRIV)
+ flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
}
- return true;
+ return flags;
+}
+
+static inline int pdev_enable_cap_ats(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->ats_enabled)
+ return 0;
+
+ if (amd_iommu_iotlb_sup &&
+ (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP)) {
+ ret = pci_enable_ats(pdev, PAGE_SHIFT);
+ if (!ret) {
+ dev_data->ats_enabled = 1;
+ dev_data->ats_qdep = pci_ats_queue_depth(pdev);
+ }
+ }
+
+ return ret;
+}
+
+static inline void pdev_disable_cap_ats(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->ats_enabled) {
+ pci_disable_ats(pdev);
+ dev_data->ats_enabled = 0;
+ }
+}
+
+int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->pri_enabled)
+ return 0;
+
+ if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) {
+ /*
+ * First reset the PRI state of the device.
+ * FIXME: Hardcode number of outstanding requests for now
+ */
+ if (!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) {
+ dev_data->pri_enabled = 1;
+ dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev);
+
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
+void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->pri_enabled) {
+ pci_disable_pri(pdev);
+ dev_data->pri_enabled = 0;
+ }
+}
+
+static inline int pdev_enable_cap_pasid(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->pasid_enabled)
+ return 0;
+
+ if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) {
+ /* Only allow access to user-accessible pages */
+ ret = pci_enable_pasid(pdev, 0);
+ if (!ret)
+ dev_data->pasid_enabled = 1;
+ }
+
+ return ret;
+}
+
+static inline void pdev_disable_cap_pasid(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->pasid_enabled) {
+ pci_disable_pasid(pdev);
+ dev_data->pasid_enabled = 0;
+ }
+}
+
+static void pdev_enable_caps(struct pci_dev *pdev)
+{
+ pdev_enable_cap_ats(pdev);
+ pdev_enable_cap_pasid(pdev);
+ amd_iommu_pdev_enable_cap_pri(pdev);
+
+}
+
+static void pdev_disable_caps(struct pci_dev *pdev)
+{
+ pdev_disable_cap_ats(pdev);
+ pdev_disable_cap_pasid(pdev);
+ amd_iommu_pdev_disable_cap_pri(pdev);
}
/*
@@ -401,8 +514,8 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
* it'll be forced to go into translation mode.
*/
if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
- dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
- dev_data->iommu_v2 = iommu->is_iommu_v2;
+ dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) {
+ dev_data->flags = pdev_get_caps(to_pci_dev(dev));
}
dev_iommu_priv_set(dev, dev_data);
@@ -703,24 +816,6 @@ static void iommu_poll_events(struct amd_iommu *iommu)
writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
}
-static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
-{
- struct amd_iommu_fault fault;
-
- if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
- pr_err_ratelimited("Unknown PPR request received\n");
- return;
- }
-
- fault.address = raw[1];
- fault.pasid = PPR_PASID(raw[0]);
- fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0]));
- fault.tag = PPR_TAG(raw[0]);
- fault.flags = PPR_FLAGS(raw[0]);
-
- atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
-}
-
static void iommu_poll_ppr_log(struct amd_iommu *iommu)
{
u32 head, tail;
@@ -766,8 +861,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu)
head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
- /* Handle PPR entry */
- iommu_handle_ppr_entry(iommu, entry);
+ /* TODO: PPR Handler will be added when we add IOPF support */
/* Refresh ring-buffer information */
head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
@@ -1096,7 +1190,7 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid,
}
static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid,
- int status, int tag, bool gn)
+ int status, int tag, u8 gn)
{
memset(cmd, 0, sizeof(*cmd));
@@ -1300,7 +1394,7 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
void iommu_flush_all_caches(struct amd_iommu *iommu)
{
- if (iommu_feature(iommu, FEATURE_IA)) {
+ if (check_feature(FEATURE_IA)) {
amd_iommu_flush_all(iommu);
} else {
amd_iommu_flush_dte_all(iommu);
@@ -1319,7 +1413,7 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
struct iommu_cmd cmd;
int qdep;
- qdep = dev_data->ats.qdep;
+ qdep = dev_data->ats_qdep;
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
return -EINVAL;
@@ -1370,7 +1464,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
return ret;
}
- if (dev_data->ats.enabled)
+ if (dev_data->ats_enabled)
ret = device_flush_iotlb(dev_data, 0, ~0UL);
return ret;
@@ -1403,7 +1497,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
list_for_each_entry(dev_data, &domain->dev_list, list) {
- if (!dev_data->ats.enabled)
+ if (!dev_data->ats_enabled)
continue;
ret |= device_flush_iotlb(dev_data, address, size);
@@ -1579,6 +1673,42 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
+/*
+ * Number of GCR3 table levels required. Level must be 4-Kbyte
+ * page and can contain up to 512 entries.
+ */
+static int get_gcr3_levels(int pasids)
+{
+ int levels;
+
+ if (pasids == -1)
+ return amd_iommu_max_glx_val;
+
+ levels = get_count_order(pasids);
+
+ return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels;
+}
+
+/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
+static int setup_gcr3_table(struct protection_domain *domain, int pasids)
+{
+ int levels = get_gcr3_levels(pasids);
+
+ if (levels > amd_iommu_max_glx_val)
+ return -EINVAL;
+
+ domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
+ if (domain->gcr3_tbl == NULL)
+ return -ENOMEM;
+
+ domain->glx = levels;
+ domain->flags |= PD_IOMMUV2_MASK;
+
+ amd_iommu_domain_update(domain);
+
+ return 0;
+}
+
static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
struct protection_domain *domain, bool ats, bool ppr)
{
@@ -1607,10 +1737,8 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
if (ats)
flags |= DTE_FLAG_IOTLB;
- if (ppr) {
- if (iommu_feature(iommu, FEATURE_EPHSUP))
- pte_root |= 1ULL << DEV_ENTRY_PPR;
- }
+ if (ppr)
+ pte_root |= 1ULL << DEV_ENTRY_PPR;
if (domain->dirty_tracking)
pte_root |= DTE_FLAG_HAD;
@@ -1690,7 +1818,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
return;
- ats = dev_data->ats.enabled;
+ ats = dev_data->ats_enabled;
/* Update data structures */
dev_data->domain = domain;
@@ -1706,7 +1834,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
/* Update device table */
set_dte_entry(iommu, dev_data->devid, domain,
- ats, dev_data->iommu_v2);
+ ats, dev_data->ppr);
clone_aliases(iommu, dev_data->dev);
device_flush_dte(dev_data);
@@ -1741,48 +1869,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
domain->dev_cnt -= 1;
}
-static void pdev_iommuv2_disable(struct pci_dev *pdev)
-{
- pci_disable_ats(pdev);
- pci_disable_pri(pdev);
- pci_disable_pasid(pdev);
-}
-
-static int pdev_pri_ats_enable(struct pci_dev *pdev)
-{
- int ret;
-
- /* Only allow access to user-accessible pages */
- ret = pci_enable_pasid(pdev, 0);
- if (ret)
- return ret;
-
- /* First reset the PRI state of the device */
- ret = pci_reset_pri(pdev);
- if (ret)
- goto out_err_pasid;
-
- /* Enable PRI */
- /* FIXME: Hardcode number of outstanding requests for now */
- ret = pci_enable_pri(pdev, 32);
- if (ret)
- goto out_err_pasid;
-
- ret = pci_enable_ats(pdev, PAGE_SHIFT);
- if (ret)
- goto out_err_pri;
-
- return 0;
-
-out_err_pri:
- pci_disable_pri(pdev);
-
-out_err_pasid:
- pci_disable_pasid(pdev);
-
- return ret;
-}
-
/*
* If a device is not yet associated with a domain, this function makes the
* device visible in the domain
@@ -1791,9 +1877,8 @@ static int attach_device(struct device *dev,
struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
- struct pci_dev *pdev;
unsigned long flags;
- int ret;
+ int ret = 0;
spin_lock_irqsave(&domain->lock, flags);
@@ -1801,45 +1886,13 @@ static int attach_device(struct device *dev,
spin_lock(&dev_data->lock);
- ret = -EBUSY;
- if (dev_data->domain != NULL)
+ if (dev_data->domain != NULL) {
+ ret = -EBUSY;
goto out;
-
- if (!dev_is_pci(dev))
- goto skip_ats_check;
-
- pdev = to_pci_dev(dev);
- if (domain->flags & PD_IOMMUV2_MASK) {
- struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
-
- ret = -EINVAL;
-
- /*
- * In case of using AMD_IOMMU_V1 page table mode and the device
- * is enabling for PPR/ATS support (using v2 table),
- * we need to make sure that the domain type is identity map.
- */
- if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
- def_domain->type != IOMMU_DOMAIN_IDENTITY) {
- goto out;
- }
-
- if (dev_data->iommu_v2) {
- if (pdev_pri_ats_enable(pdev) != 0)
- goto out;
-
- dev_data->ats.enabled = true;
- dev_data->ats.qdep = pci_ats_queue_depth(pdev);
- dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev);
- }
- } else if (amd_iommu_iotlb_sup &&
- pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
- dev_data->ats.enabled = true;
- dev_data->ats.qdep = pci_ats_queue_depth(pdev);
}
-skip_ats_check:
- ret = 0;
+ if (dev_is_pci(dev))
+ pdev_enable_caps(to_pci_dev(dev));
do_attach(dev_data, domain);
@@ -1887,15 +1940,8 @@ static void detach_device(struct device *dev)
do_detach(dev_data);
- if (!dev_is_pci(dev))
- goto out;
-
- if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
- pdev_iommuv2_disable(to_pci_dev(dev));
- else if (dev_data->ats.enabled)
- pci_disable_ats(to_pci_dev(dev));
-
- dev_data->ats.enabled = false;
+ if (dev_is_pci(dev))
+ pdev_disable_caps(to_pci_dev(dev));
out:
spin_unlock(&dev_data->lock);
@@ -1985,7 +2031,7 @@ static void update_device_table(struct protection_domain *domain)
if (!iommu)
continue;
set_dte_entry(iommu, dev_data->devid, domain,
- dev_data->ats.enabled, dev_data->iommu_v2);
+ dev_data->ats_enabled, dev_data->ppr);
clone_aliases(iommu, dev_data->dev);
}
}
@@ -2019,9 +2065,11 @@ void amd_iommu_domain_update(struct protection_domain *domain)
static void cleanup_domain(struct protection_domain *domain)
{
struct iommu_dev_data *entry;
- unsigned long flags;
- spin_lock_irqsave(&domain->lock, flags);
+ lockdep_assert_held(&domain->lock);
+
+ if (!domain->dev_cnt)
+ return;
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
@@ -2029,8 +2077,7 @@ static void cleanup_domain(struct protection_domain *domain)
BUG_ON(!entry->domain);
do_detach(entry);
}
-
- spin_unlock_irqrestore(&domain->lock, flags);
+ WARN_ON(domain->dev_cnt != 0);
}
static void protection_domain_free(struct protection_domain *domain)
@@ -2041,6 +2088,12 @@ static void protection_domain_free(struct protection_domain *domain)
if (domain->iop.pgtbl_cfg.tlb)
free_io_pgtable_ops(&domain->iop.iop.ops);
+ if (domain->flags & PD_IOMMUV2_MASK)
+ free_gcr3_table(domain);
+
+ if (domain->iop.root)
+ free_page((unsigned long)domain->iop.root);
+
if (domain->id)
domain_id_free(domain->id);
@@ -2053,18 +2106,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
- spin_lock_init(&domain->lock);
- domain->id = domain_id_alloc();
- if (!domain->id)
- return -ENOMEM;
- INIT_LIST_HEAD(&domain->dev_list);
-
if (mode != PAGE_MODE_NONE) {
pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pt_root) {
- domain_id_free(domain->id);
+ if (!pt_root)
return -ENOMEM;
- }
}
amd_iommu_domain_set_pgtable(domain, pt_root, mode);
@@ -2074,20 +2119,12 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
static int protection_domain_init_v2(struct protection_domain *domain)
{
- spin_lock_init(&domain->lock);
- domain->id = domain_id_alloc();
- if (!domain->id)
- return -ENOMEM;
- INIT_LIST_HEAD(&domain->dev_list);
-
domain->flags |= PD_GIOV_MASK;
domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
- if (domain_enable_v2(domain, 1)) {
- domain_id_free(domain->id);
+ if (setup_gcr3_table(domain, 1))
return -ENOMEM;
- }
return 0;
}
@@ -2097,57 +2134,60 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
int pgtable;
- int mode = DEFAULT_PGTABLE_LEVEL;
int ret;
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return NULL;
+
+ domain->id = domain_id_alloc();
+ if (!domain->id)
+ goto out_err;
+
+ spin_lock_init(&domain->lock);
+ INIT_LIST_HEAD(&domain->dev_list);
+ domain->nid = NUMA_NO_NODE;
+
+ switch (type) {
+ /* No need to allocate io pgtable ops in passthrough mode */
+ case IOMMU_DOMAIN_IDENTITY:
+ return domain;
+ case IOMMU_DOMAIN_DMA:
+ pgtable = amd_iommu_pgtable;
+ break;
/*
- * Force IOMMU v1 page table when iommu=pt and
- * when allocating domain for pass-through devices.
+ * Force IOMMU v1 page table when allocating
+ * domain for pass-through devices.
*/
- if (type == IOMMU_DOMAIN_IDENTITY) {
- pgtable = AMD_IOMMU_V1;
- mode = PAGE_MODE_NONE;
- } else if (type == IOMMU_DOMAIN_UNMANAGED) {
+ case IOMMU_DOMAIN_UNMANAGED:
pgtable = AMD_IOMMU_V1;
- } else if (type == IOMMU_DOMAIN_DMA || type == IOMMU_DOMAIN_DMA_FQ) {
- pgtable = amd_iommu_pgtable;
- } else {
- return NULL;
+ break;
+ default:
+ goto out_err;
}
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (!domain)
- return NULL;
-
switch (pgtable) {
case AMD_IOMMU_V1:
- ret = protection_domain_init_v1(domain, mode);
+ ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL);
break;
case AMD_IOMMU_V2:
ret = protection_domain_init_v2(domain);
break;
default:
ret = -EINVAL;
+ break;
}
if (ret)
goto out_err;
- /* No need to allocate io pgtable ops in passthrough mode */
- if (type == IOMMU_DOMAIN_IDENTITY)
- return domain;
-
- domain->nid = NUMA_NO_NODE;
-
pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
- if (!pgtbl_ops) {
- domain_id_free(domain->id);
+ if (!pgtbl_ops)
goto out_err;
- }
return domain;
out_err:
- kfree(domain);
+ protection_domain_free(domain);
return NULL;
}
@@ -2236,19 +2276,18 @@ amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
static void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
+ unsigned long flags;
- domain = to_pdomain(dom);
+ if (!dom)
+ return;
- if (domain->dev_cnt > 0)
- cleanup_domain(domain);
+ domain = to_pdomain(dom);
- BUG_ON(domain->dev_cnt != 0);
+ spin_lock_irqsave(&domain->lock, flags);
- if (!dom)
- return;
+ cleanup_domain(domain);
- if (domain->flags & PD_IOMMUV2_MASK)
- free_gcr3_table(domain);
+ spin_unlock_irqrestore(&domain->lock, flags);
protection_domain_free(domain);
}
@@ -2296,14 +2335,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return ret;
}
-static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
- unsigned long iova, size_t size)
+static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+ unsigned long iova, size_t size)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
if (ops->map_pages)
domain_flush_np_cache(domain, iova, size);
+ return 0;
}
static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
@@ -2541,7 +2581,6 @@ bool amd_iommu_is_attach_deferred(struct device *dev)
return dev_data->defer_attach;
}
-EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred);
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
@@ -2581,7 +2620,7 @@ static int amd_iommu_def_domain_type(struct device *dev)
* and require remapping.
* - SNP is enabled, because it prohibits DTE[Mode]=0.
*/
- if (dev_data->iommu_v2 &&
+ if (pdev_pasid_supported(dev_data) &&
!cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
!amd_iommu_snp_en) {
return IOMMU_DOMAIN_IDENTITY;
@@ -2626,93 +2665,6 @@ const struct iommu_ops amd_iommu_ops = {
}
};
-/*****************************************************************************
- *
- * The next functions do a basic initialization of IOMMU for pass through
- * mode
- *
- * In passthrough mode the IOMMU is initialized and enabled but not used for
- * DMA-API translation.
- *
- *****************************************************************************/
-
-/* IOMMUv2 specific functions */
-int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_register(&ppr_notifier, nb);
-}
-EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
-
-int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&ppr_notifier, nb);
-}
-EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
-
-void amd_iommu_domain_direct_map(struct iommu_domain *dom)
-{
- struct protection_domain *domain = to_pdomain(dom);
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
-
- if (domain->iop.pgtbl_cfg.tlb)
- free_io_pgtable_ops(&domain->iop.iop.ops);
-
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-EXPORT_SYMBOL(amd_iommu_domain_direct_map);
-
-/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
-static int domain_enable_v2(struct protection_domain *domain, int pasids)
-{
- int levels;
-
- /* Number of GCR3 table levels required */
- for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
- levels += 1;
-
- if (levels > amd_iommu_max_glx_val)
- return -EINVAL;
-
- domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
- if (domain->gcr3_tbl == NULL)
- return -ENOMEM;
-
- domain->glx = levels;
- domain->flags |= PD_IOMMUV2_MASK;
-
- amd_iommu_domain_update(domain);
-
- return 0;
-}
-
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
-{
- struct protection_domain *pdom = to_pdomain(dom);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&pdom->lock, flags);
-
- /*
- * Save us all sanity checks whether devices already in the
- * domain support IOMMUv2. Just force that the domain has no
- * devices attached when it is switched into IOMMUv2 mode.
- */
- ret = -EBUSY;
- if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK)
- goto out;
-
- if (!pdom->gcr3_tbl)
- ret = domain_enable_v2(pdom, pasids);
-
-out:
- spin_unlock_irqrestore(&pdom->lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
-
static int __flush_pasid(struct protection_domain *domain, u32 pasid,
u64 address, bool size)
{
@@ -2750,10 +2702,10 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
There might be non-IOMMUv2 capable devices in an IOMMUv2
* domain.
*/
- if (!dev_data->ats.enabled)
+ if (!dev_data->ats_enabled)
continue;
- qdep = dev_data->ats.qdep;
+ qdep = dev_data->ats_qdep;
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
continue;
@@ -2794,7 +2746,6 @@ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
return ret;
}
-EXPORT_SYMBOL(amd_iommu_flush_page);
static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid)
{
@@ -2814,7 +2765,6 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid)
return ret;
}
-EXPORT_SYMBOL(amd_iommu_flush_tlb);
static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
{
@@ -2894,7 +2844,6 @@ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
return ret;
}
-EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
{
@@ -2908,7 +2857,6 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
return ret;
}
-EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag)
@@ -2927,49 +2875,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
return iommu_queue_command(iommu, &cmd);
}
-EXPORT_SYMBOL(amd_iommu_complete_ppr);
-
-int amd_iommu_device_info(struct pci_dev *pdev,
- struct amd_iommu_device_info *info)
-{
- int max_pasids;
- int pos;
-
- if (pdev == NULL || info == NULL)
- return -EINVAL;
-
- if (!amd_iommu_v2_supported())
- return -EINVAL;
-
- memset(info, 0, sizeof(*info));
-
- if (pci_ats_supported(pdev))
- info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
- if (pos)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
- if (pos) {
- int features;
-
- max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
- max_pasids = min(max_pasids, (1 << 20));
-
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
- info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
-
- features = pci_pasid_features(pdev);
- if (features & PCI_PASID_CAP_EXEC)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
- if (features & PCI_PASID_CAP_PRIV)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(amd_iommu_device_info);
#ifdef CONFIG_IRQ_REMAP
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
deleted file mode 100644
index 57c2fb1146e2..000000000000
--- a/drivers/iommu/amd/iommu_v2.c
+++ /dev/null
@@ -1,996 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
- * Author: Joerg Roedel <[email protected]>
- */
-
-#define pr_fmt(fmt) "AMD-Vi: " fmt
-
-#include <linux/refcount.h>
-#include <linux/mmu_notifier.h>
-#include <linux/amd-iommu.h>
-#include <linux/mm_types.h>
-#include <linux/profile.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/wait.h>
-#include <linux/pci.h>
-#include <linux/gfp.h>
-#include <linux/cc_platform.h>
-
-#include "amd_iommu.h"
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Joerg Roedel <[email protected]>");
-
-#define PRI_QUEUE_SIZE 512
-
-struct pri_queue {
- atomic_t inflight;
- bool finish;
- int status;
-};
-
-struct pasid_state {
- struct list_head list; /* For global state-list */
- refcount_t count; /* Reference count */
- unsigned mmu_notifier_count; /* Counting nested mmu_notifier
- calls */
- struct mm_struct *mm; /* mm_struct for the faults */
- struct mmu_notifier mn; /* mmu_notifier handle */
- struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
- struct device_state *device_state; /* Link to our device_state */
- u32 pasid; /* PASID index */
- bool invalid; /* Used during setup and
- teardown of the pasid */
- spinlock_t lock; /* Protect pri_queues and
- mmu_notifer_count */
- wait_queue_head_t wq; /* To wait for count == 0 */
-};
-
-struct device_state {
- struct list_head list;
- u32 sbdf;
- atomic_t count;
- struct pci_dev *pdev;
- struct pasid_state **states;
- struct iommu_domain *domain;
- int pasid_levels;
- int max_pasids;
- amd_iommu_invalid_ppr_cb inv_ppr_cb;
- amd_iommu_invalidate_ctx inv_ctx_cb;
- spinlock_t lock;
- wait_queue_head_t wq;
-};
-
-struct fault {
- struct work_struct work;
- struct device_state *dev_state;
- struct pasid_state *state;
- struct mm_struct *mm;
- u64 address;
- u32 pasid;
- u16 tag;
- u16 finish;
- u16 flags;
-};
-
-static LIST_HEAD(state_list);
-static DEFINE_SPINLOCK(state_lock);
-
-static struct workqueue_struct *iommu_wq;
-
-static void free_pasid_states(struct device_state *dev_state);
-
-static struct device_state *__get_device_state(u32 sbdf)
-{
- struct device_state *dev_state;
-
- list_for_each_entry(dev_state, &state_list, list) {
- if (dev_state->sbdf == sbdf)
- return dev_state;
- }
-
- return NULL;
-}
-
-static struct device_state *get_device_state(u32 sbdf)
-{
- struct device_state *dev_state;
- unsigned long flags;
-
- spin_lock_irqsave(&state_lock, flags);
- dev_state = __get_device_state(sbdf);
- if (dev_state != NULL)
- atomic_inc(&dev_state->count);
- spin_unlock_irqrestore(&state_lock, flags);
-
- return dev_state;
-}
-
-static void free_device_state(struct device_state *dev_state)
-{
- struct iommu_group *group;
-
- /* Get rid of any remaining pasid states */
- free_pasid_states(dev_state);
-
- /*
- * Wait until the last reference is dropped before freeing
- * the device state.
- */
- wait_event(dev_state->wq, !atomic_read(&dev_state->count));
-
- /*
- * First detach device from domain - No more PRI requests will arrive
- * from that device after it is unbound from the IOMMUv2 domain.
- */
- group = iommu_group_get(&dev_state->pdev->dev);
- if (WARN_ON(!group))
- return;
-
- iommu_detach_group(dev_state->domain, group);
-
- iommu_group_put(group);
-
- /* Everything is down now, free the IOMMUv2 domain */
- iommu_domain_free(dev_state->domain);
-
- /* Finally get rid of the device-state */
- kfree(dev_state);
-}
-
-static void put_device_state(struct device_state *dev_state)
-{
- if (atomic_dec_and_test(&dev_state->count))
- wake_up(&dev_state->wq);
-}
-
-/* Must be called under dev_state->lock */
-static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
- u32 pasid, bool alloc)
-{
- struct pasid_state **root, **ptr;
- int level, index;
-
- level = dev_state->pasid_levels;
- root = dev_state->states;
-
- while (true) {
-
- index = (pasid >> (9 * level)) & 0x1ff;
- ptr = &root[index];
-
- if (level == 0)
- break;
-
- if (*ptr == NULL) {
- if (!alloc)
- return NULL;
-
- *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
- if (*ptr == NULL)
- return NULL;
- }
-
- root = (struct pasid_state **)*ptr;
- level -= 1;
- }
-
- return ptr;
-}
-
-static int set_pasid_state(struct device_state *dev_state,
- struct pasid_state *pasid_state,
- u32 pasid)
-{
- struct pasid_state **ptr;
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, true);
-
- ret = -ENOMEM;
- if (ptr == NULL)
- goto out_unlock;
-
- ret = -ENOMEM;
- if (*ptr != NULL)
- goto out_unlock;
-
- *ptr = pasid_state;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-
- return ret;
-}
-
-static void clear_pasid_state(struct device_state *dev_state, u32 pasid)
-{
- struct pasid_state **ptr;
- unsigned long flags;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, true);
-
- if (ptr == NULL)
- goto out_unlock;
-
- *ptr = NULL;
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-}
-
-static struct pasid_state *get_pasid_state(struct device_state *dev_state,
- u32 pasid)
-{
- struct pasid_state **ptr, *ret = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, false);
-
- if (ptr == NULL)
- goto out_unlock;
-
- ret = *ptr;
- if (ret)
- refcount_inc(&ret->count);
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-
- return ret;
-}
-
-static void free_pasid_state(struct pasid_state *pasid_state)
-{
- kfree(pasid_state);
-}
-
-static void put_pasid_state(struct pasid_state *pasid_state)
-{
- if (refcount_dec_and_test(&pasid_state->count))
- wake_up(&pasid_state->wq);
-}
-
-static void put_pasid_state_wait(struct pasid_state *pasid_state)
-{
- if (!refcount_dec_and_test(&pasid_state->count))
- wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
- free_pasid_state(pasid_state);
-}
-
-static void unbind_pasid(struct pasid_state *pasid_state)
-{
- struct iommu_domain *domain;
-
- domain = pasid_state->device_state->domain;
-
- /*
- * Mark pasid_state as invalid, no more faults will we added to the
- * work queue after this is visible everywhere.
- */
- pasid_state->invalid = true;
-
- /* Make sure this is visible */
- smp_wmb();
-
- /* After this the device/pasid can't access the mm anymore */
- amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
-
- /* Make sure no more pending faults are in the queue */
- flush_workqueue(iommu_wq);
-}
-
-static void free_pasid_states_level1(struct pasid_state **tbl)
-{
- int i;
-
- for (i = 0; i < 512; ++i) {
- if (tbl[i] == NULL)
- continue;
-
- free_page((unsigned long)tbl[i]);
- }
-}
-
-static void free_pasid_states_level2(struct pasid_state **tbl)
-{
- struct pasid_state **ptr;
- int i;
-
- for (i = 0; i < 512; ++i) {
- if (tbl[i] == NULL)
- continue;
-
- ptr = (struct pasid_state **)tbl[i];
- free_pasid_states_level1(ptr);
- }
-}
-
-static void free_pasid_states(struct device_state *dev_state)
-{
- struct pasid_state *pasid_state;
- int i;
-
- for (i = 0; i < dev_state->max_pasids; ++i) {
- pasid_state = get_pasid_state(dev_state, i);
- if (pasid_state == NULL)
- continue;
-
- put_pasid_state(pasid_state);
-
- /* Clear the pasid state so that the pasid can be re-used */
- clear_pasid_state(dev_state, pasid_state->pasid);
-
- /*
- * This will call the mn_release function and
- * unbind the PASID
- */
- mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
- put_pasid_state_wait(pasid_state); /* Reference taken in
- amd_iommu_bind_pasid */
-
- /* Drop reference taken in amd_iommu_bind_pasid */
- put_device_state(dev_state);
- }
-
- if (dev_state->pasid_levels == 2)
- free_pasid_states_level2(dev_state->states);
- else if (dev_state->pasid_levels == 1)
- free_pasid_states_level1(dev_state->states);
- else
- BUG_ON(dev_state->pasid_levels != 0);
-
- free_page((unsigned long)dev_state->states);
-}
-
-static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
-{
- return container_of(mn, struct pasid_state, mn);
-}
-
-static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
-
- pasid_state = mn_to_state(mn);
- dev_state = pasid_state->device_state;
-
- if ((start ^ (end - 1)) < PAGE_SIZE)
- amd_iommu_flush_page(dev_state->domain, pasid_state->pasid,
- start);
- else
- amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid);
-}
-
-static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- bool run_inv_ctx_cb;
-
- might_sleep();
-
- pasid_state = mn_to_state(mn);
- dev_state = pasid_state->device_state;
- run_inv_ctx_cb = !pasid_state->invalid;
-
- if (run_inv_ctx_cb && dev_state->inv_ctx_cb)
- dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid);
-
- unbind_pasid(pasid_state);
-}
-
-static const struct mmu_notifier_ops iommu_mn = {
- .release = mn_release,
- .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs,
-};
-
-static void set_pri_tag_status(struct pasid_state *pasid_state,
- u16 tag, int status)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- pasid_state->pri[tag].status = status;
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-}
-
-static void finish_pri_tag(struct device_state *dev_state,
- struct pasid_state *pasid_state,
- u16 tag)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
- pasid_state->pri[tag].finish) {
- amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
- pasid_state->pri[tag].status, tag);
- pasid_state->pri[tag].finish = false;
- pasid_state->pri[tag].status = PPR_SUCCESS;
- }
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-}
-
-static void handle_fault_error(struct fault *fault)
-{
- int status;
-
- if (!fault->dev_state->inv_ppr_cb) {
- set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
- return;
- }
-
- status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
- fault->pasid,
- fault->address,
- fault->flags);
- switch (status) {
- case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
- set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
- break;
- case AMD_IOMMU_INV_PRI_RSP_INVALID:
- set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
- break;
- case AMD_IOMMU_INV_PRI_RSP_FAIL:
- set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
- break;
- default:
- BUG();
- }
-}
-
-static bool access_error(struct vm_area_struct *vma, struct fault *fault)
-{
- unsigned long requested = 0;
-
- if (fault->flags & PPR_FAULT_EXEC)
- requested |= VM_EXEC;
-
- if (fault->flags & PPR_FAULT_READ)
- requested |= VM_READ;
-
- if (fault->flags & PPR_FAULT_WRITE)
- requested |= VM_WRITE;
-
- return (requested & ~vma->vm_flags) != 0;
-}
-
-static void do_fault(struct work_struct *work)
-{
- struct fault *fault = container_of(work, struct fault, work);
- struct vm_area_struct *vma;
- vm_fault_t ret = VM_FAULT_ERROR;
- unsigned int flags = 0;
- struct mm_struct *mm;
- u64 address;
-
- mm = fault->state->mm;
- address = fault->address;
-
- if (fault->flags & PPR_FAULT_USER)
- flags |= FAULT_FLAG_USER;
- if (fault->flags & PPR_FAULT_WRITE)
- flags |= FAULT_FLAG_WRITE;
- flags |= FAULT_FLAG_REMOTE;
-
- mmap_read_lock(mm);
- vma = vma_lookup(mm, address);
- if (!vma)
- /* failed to get a vma in the right range */
- goto out;
-
- /* Check if we have the right permissions on the vma */
- if (access_error(vma, fault))
- goto out;
-
- ret = handle_mm_fault(vma, address, flags, NULL);
-out:
- mmap_read_unlock(mm);
-
- if (ret & VM_FAULT_ERROR)
- /* failed to service fault */
- handle_fault_error(fault);
-
- finish_pri_tag(fault->dev_state, fault->state, fault->tag);
-
- put_pasid_state(fault->state);
-
- kfree(fault);
-}
-
-static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
-{
- struct amd_iommu_fault *iommu_fault;
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- struct pci_dev *pdev = NULL;
- unsigned long flags;
- struct fault *fault;
- bool finish;
- u16 tag, devid, seg_id;
- int ret;
-
- iommu_fault = data;
- tag = iommu_fault->tag & 0x1ff;
- finish = (iommu_fault->tag >> 9) & 1;
-
- seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf);
- devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf);
- pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid),
- devid & 0xff);
- if (!pdev)
- return -ENODEV;
-
- ret = NOTIFY_DONE;
-
- /* In kdump kernel pci dev is not initialized yet -> send INVALID */
- if (amd_iommu_is_attach_deferred(&pdev->dev)) {
- amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
- PPR_INVALID, tag);
- goto out;
- }
-
- dev_state = get_device_state(iommu_fault->sbdf);
- if (dev_state == NULL)
- goto out;
-
- pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
- if (pasid_state == NULL || pasid_state->invalid) {
- /* We know the device but not the PASID -> send INVALID */
- amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
- PPR_INVALID, tag);
- goto out_drop_state;
- }
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- atomic_inc(&pasid_state->pri[tag].inflight);
- if (finish)
- pasid_state->pri[tag].finish = true;
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-
- fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
- if (fault == NULL) {
- /* We are OOM - send success and let the device re-fault */
- finish_pri_tag(dev_state, pasid_state, tag);
- goto out_drop_state;
- }
-
- fault->dev_state = dev_state;
- fault->address = iommu_fault->address;
- fault->state = pasid_state;
- fault->tag = tag;
- fault->finish = finish;
- fault->pasid = iommu_fault->pasid;
- fault->flags = iommu_fault->flags;
- INIT_WORK(&fault->work, do_fault);
-
- queue_work(iommu_wq, &fault->work);
-
- ret = NOTIFY_OK;
-
-out_drop_state:
-
- if (ret != NOTIFY_OK && pasid_state)
- put_pasid_state(pasid_state);
-
- put_device_state(dev_state);
-
-out:
- pci_dev_put(pdev);
- return ret;
-}
-
-static struct notifier_block ppr_nb = {
- .notifier_call = ppr_notifier,
-};
-
-int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
- struct task_struct *task)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- struct mm_struct *mm;
- u32 sbdf;
- int ret;
-
- might_sleep();
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
- dev_state = get_device_state(sbdf);
-
- if (dev_state == NULL)
- return -EINVAL;
-
- ret = -EINVAL;
- if (pasid >= dev_state->max_pasids)
- goto out;
-
- ret = -ENOMEM;
- pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
- if (pasid_state == NULL)
- goto out;
-
-
- refcount_set(&pasid_state->count, 1);
- init_waitqueue_head(&pasid_state->wq);
- spin_lock_init(&pasid_state->lock);
-
- mm = get_task_mm(task);
- pasid_state->mm = mm;
- pasid_state->device_state = dev_state;
- pasid_state->pasid = pasid;
- pasid_state->invalid = true; /* Mark as valid only if we are
- done with setting up the pasid */
- pasid_state->mn.ops = &iommu_mn;
-
- if (pasid_state->mm == NULL)
- goto out_free;
-
- ret = mmu_notifier_register(&pasid_state->mn, mm);
- if (ret)
- goto out_free;
-
- ret = set_pasid_state(dev_state, pasid_state, pasid);
- if (ret)
- goto out_unregister;
-
- ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
- __pa(pasid_state->mm->pgd));
- if (ret)
- goto out_clear_state;
-
- /* Now we are ready to handle faults */
- pasid_state->invalid = false;
-
- /*
- * Drop the reference to the mm_struct here. We rely on the
- * mmu_notifier release call-back to inform us when the mm
- * is going away.
- */
- mmput(mm);
-
- return 0;
-
-out_clear_state:
- clear_pasid_state(dev_state, pasid);
-
-out_unregister:
- mmu_notifier_unregister(&pasid_state->mn, mm);
- mmput(mm);
-
-out_free:
- free_pasid_state(pasid_state);
-
-out:
- put_device_state(dev_state);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_bind_pasid);
-
-void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- u32 sbdf;
-
- might_sleep();
-
- if (!amd_iommu_v2_supported())
- return;
-
- sbdf = get_pci_sbdf_id(pdev);
- dev_state = get_device_state(sbdf);
- if (dev_state == NULL)
- return;
-
- if (pasid >= dev_state->max_pasids)
- goto out;
-
- pasid_state = get_pasid_state(dev_state, pasid);
- if (pasid_state == NULL)
- goto out;
- /*
- * Drop reference taken here. We are safe because we still hold
- * the reference taken in the amd_iommu_bind_pasid function.
- */
- put_pasid_state(pasid_state);
-
- /* Clear the pasid state so that the pasid can be re-used */
- clear_pasid_state(dev_state, pasid_state->pasid);
-
- /*
- * Call mmu_notifier_unregister to drop our reference
- * to pasid_state->mm
- */
- mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
- put_pasid_state_wait(pasid_state); /* Reference taken in
- amd_iommu_bind_pasid */
-out:
- /* Drop reference taken in this function */
- put_device_state(dev_state);
-
- /* Drop reference taken in amd_iommu_bind_pasid */
- put_device_state(dev_state);
-}
-EXPORT_SYMBOL(amd_iommu_unbind_pasid);
-
-int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
-{
- struct device_state *dev_state;
- struct iommu_group *group;
- unsigned long flags;
- int ret, tmp;
- u32 sbdf;
-
- might_sleep();
-
- /*
- * When memory encryption is active the device is likely not in a
- * direct-mapped domain. Forbid using IOMMUv2 functionality for now.
- */
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return -ENODEV;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- if (pasids <= 0 || pasids > (PASID_MASK + 1))
- return -EINVAL;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
- if (dev_state == NULL)
- return -ENOMEM;
-
- spin_lock_init(&dev_state->lock);
- init_waitqueue_head(&dev_state->wq);
- dev_state->pdev = pdev;
- dev_state->sbdf = sbdf;
-
- tmp = pasids;
- for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
- dev_state->pasid_levels += 1;
-
- atomic_set(&dev_state->count, 1);
- dev_state->max_pasids = pasids;
-
- ret = -ENOMEM;
- dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
- if (dev_state->states == NULL)
- goto out_free_dev_state;
-
- dev_state->domain = iommu_domain_alloc(&pci_bus_type);
- if (dev_state->domain == NULL)
- goto out_free_states;
-
- /* See iommu_is_default_domain() */
- dev_state->domain->type = IOMMU_DOMAIN_IDENTITY;
- amd_iommu_domain_direct_map(dev_state->domain);
-
- ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
- if (ret)
- goto out_free_domain;
-
- group = iommu_group_get(&pdev->dev);
- if (!group) {
- ret = -EINVAL;
- goto out_free_domain;
- }
-
- ret = iommu_attach_group(dev_state->domain, group);
- if (ret != 0)
- goto out_drop_group;
-
- iommu_group_put(group);
-
- spin_lock_irqsave(&state_lock, flags);
-
- if (__get_device_state(sbdf) != NULL) {
- spin_unlock_irqrestore(&state_lock, flags);
- ret = -EBUSY;
- goto out_free_domain;
- }
-
- list_add_tail(&dev_state->list, &state_list);
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- return 0;
-
-out_drop_group:
- iommu_group_put(group);
-
-out_free_domain:
- iommu_domain_free(dev_state->domain);
-
-out_free_states:
- free_page((unsigned long)dev_state->states);
-
-out_free_dev_state:
- kfree(dev_state);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_init_device);
-
-void amd_iommu_free_device(struct pci_dev *pdev)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
-
- if (!amd_iommu_v2_supported())
- return;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL) {
- spin_unlock_irqrestore(&state_lock, flags);
- return;
- }
-
- list_del(&dev_state->list);
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- put_device_state(dev_state);
- free_device_state(dev_state);
-}
-EXPORT_SYMBOL(amd_iommu_free_device);
-
-int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
- amd_iommu_invalid_ppr_cb cb)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
- int ret;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- ret = -EINVAL;
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL)
- goto out_unlock;
-
- dev_state->inv_ppr_cb = cb;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&state_lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
-
-int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
- amd_iommu_invalidate_ctx cb)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
- int ret;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- ret = -EINVAL;
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL)
- goto out_unlock;
-
- dev_state->inv_ctx_cb = cb;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&state_lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
-
-static int __init amd_iommu_v2_init(void)
-{
- int ret;
-
- if (!amd_iommu_v2_supported()) {
- pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n");
- /*
- * Load anyway to provide the symbols to other modules
- * which may use AMD IOMMUv2 optionally.
- */
- return 0;
- }
-
- ret = -ENOMEM;
- iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
- if (iommu_wq == NULL)
- goto out;
-
- amd_iommu_register_ppr_notifier(&ppr_nb);
-
- pr_info("AMD IOMMUv2 loaded and initialized\n");
-
- return 0;
-
-out:
- return ret;
-}
-
-static void __exit amd_iommu_v2_exit(void)
-{
- struct device_state *dev_state, *next;
- unsigned long flags;
- LIST_HEAD(freelist);
-
- if (!amd_iommu_v2_supported())
- return;
-
- amd_iommu_unregister_ppr_notifier(&ppr_nb);
-
- flush_workqueue(iommu_wq);
-
- /*
- * The loop below might call flush_workqueue(), so call
- * destroy_workqueue() after it
- */
- spin_lock_irqsave(&state_lock, flags);
-
- list_for_each_entry_safe(dev_state, next, &state_list, list) {
- WARN_ON_ONCE(1);
-
- put_device_state(dev_state);
- list_del(&dev_state->list);
- list_add_tail(&dev_state->list, &freelist);
- }
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- /*
- * Since free_device_state waits on the count to be zero,
- * we need to free dev_state outside the spinlock.
- */
- list_for_each_entry_safe(dev_state, next, &freelist, list) {
- list_del(&dev_state->list);
- free_device_state(dev_state);
- }
-
- destroy_workqueue(iommu_wq);
-}
-
-module_init(amd_iommu_v2_init);
-module_exit(amd_iommu_v2_exit);
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 0b8927508427..ee05f4824bfa 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -196,7 +196,6 @@ struct apple_dart_hw {
* @lock: lock for hardware operations involving this dart
* @pgsize: pagesize supported by this DART
* @supports_bypass: indicates if this DART supports bypass mode
- * @force_bypass: force bypass mode due to pagesize mismatch?
* @sid2group: maps stream ids to iommu_groups
* @iommu: iommu core device
*/
@@ -217,7 +216,6 @@ struct apple_dart {
u32 pgsize;
u32 num_streams;
u32 supports_bypass : 1;
- u32 force_bypass : 1;
struct iommu_group *sid2group[DART_MAX_STREAMS];
struct iommu_device iommu;
@@ -506,10 +504,11 @@ static void apple_dart_iotlb_sync(struct iommu_domain *domain,
apple_dart_domain_flush_tlb(to_dart_domain(domain));
}
-static void apple_dart_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int apple_dart_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
apple_dart_domain_flush_tlb(to_dart_domain(domain));
+ return 0;
}
static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain,
@@ -568,15 +567,17 @@ apple_dart_setup_translation(struct apple_dart_domain *domain,
stream_map->dart->hw->invalidate_tlb(stream_map);
}
-static int apple_dart_finalize_domain(struct iommu_domain *domain,
+static int apple_dart_finalize_domain(struct apple_dart_domain *dart_domain,
struct apple_dart_master_cfg *cfg)
{
- struct apple_dart_domain *dart_domain = to_dart_domain(domain);
struct apple_dart *dart = cfg->stream_maps[0].dart;
struct io_pgtable_cfg pgtbl_cfg;
int ret = 0;
int i, j;
+ if (dart->pgsize > PAGE_SIZE)
+ return -EINVAL;
+
mutex_lock(&dart_domain->init_lock);
if (dart_domain->finalized)
@@ -597,17 +598,18 @@ static int apple_dart_finalize_domain(struct iommu_domain *domain,
.iommu_dev = dart->dev,
};
- dart_domain->pgtbl_ops =
- alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg, domain);
+ dart_domain->pgtbl_ops = alloc_io_pgtable_ops(dart->hw->fmt, &pgtbl_cfg,
+ &dart_domain->domain);
if (!dart_domain->pgtbl_ops) {
ret = -ENOMEM;
goto done;
}
- domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
- domain->geometry.aperture_start = 0;
- domain->geometry.aperture_end = (dma_addr_t)DMA_BIT_MASK(dart->ias);
- domain->geometry.force_aperture = true;
+ dart_domain->domain.pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
+ dart_domain->domain.geometry.aperture_start = 0;
+ dart_domain->domain.geometry.aperture_end =
+ (dma_addr_t)DMA_BIT_MASK(dart->ias);
+ dart_domain->domain.geometry.force_aperture = true;
dart_domain->finalized = true;
@@ -651,47 +653,72 @@ static int apple_dart_domain_add_streams(struct apple_dart_domain *domain,
true);
}
-static int apple_dart_attach_dev(struct iommu_domain *domain,
- struct device *dev)
+static int apple_dart_attach_dev_paging(struct iommu_domain *domain,
+ struct device *dev)
{
int ret, i;
struct apple_dart_stream_map *stream_map;
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
struct apple_dart_domain *dart_domain = to_dart_domain(domain);
- if (cfg->stream_maps[0].dart->force_bypass &&
- domain->type != IOMMU_DOMAIN_IDENTITY)
- return -EINVAL;
- if (!cfg->stream_maps[0].dart->supports_bypass &&
- domain->type == IOMMU_DOMAIN_IDENTITY)
- return -EINVAL;
+ ret = apple_dart_finalize_domain(dart_domain, cfg);
+ if (ret)
+ return ret;
- ret = apple_dart_finalize_domain(domain, cfg);
+ ret = apple_dart_domain_add_streams(dart_domain, cfg);
if (ret)
return ret;
- switch (domain->type) {
- default:
- ret = apple_dart_domain_add_streams(dart_domain, cfg);
- if (ret)
- return ret;
+ for_each_stream_map(i, cfg, stream_map)
+ apple_dart_setup_translation(dart_domain, stream_map);
+ return 0;
+}
- for_each_stream_map(i, cfg, stream_map)
- apple_dart_setup_translation(dart_domain, stream_map);
- break;
- case IOMMU_DOMAIN_BLOCKED:
- for_each_stream_map(i, cfg, stream_map)
- apple_dart_hw_disable_dma(stream_map);
- break;
- case IOMMU_DOMAIN_IDENTITY:
- for_each_stream_map(i, cfg, stream_map)
- apple_dart_hw_enable_bypass(stream_map);
- break;
- }
+static int apple_dart_attach_dev_identity(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+ struct apple_dart_stream_map *stream_map;
+ int i;
- return ret;
+ if (!cfg->stream_maps[0].dart->supports_bypass)
+ return -EINVAL;
+
+ for_each_stream_map(i, cfg, stream_map)
+ apple_dart_hw_enable_bypass(stream_map);
+ return 0;
}
+static const struct iommu_domain_ops apple_dart_identity_ops = {
+ .attach_dev = apple_dart_attach_dev_identity,
+};
+
+static struct iommu_domain apple_dart_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &apple_dart_identity_ops,
+};
+
+static int apple_dart_attach_dev_blocked(struct iommu_domain *domain,
+ struct device *dev)
+{
+ struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+ struct apple_dart_stream_map *stream_map;
+ int i;
+
+ for_each_stream_map(i, cfg, stream_map)
+ apple_dart_hw_disable_dma(stream_map);
+ return 0;
+}
+
+static const struct iommu_domain_ops apple_dart_blocked_ops = {
+ .attach_dev = apple_dart_attach_dev_blocked,
+};
+
+static struct iommu_domain apple_dart_blocked_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
+ .ops = &apple_dart_blocked_ops,
+};
+
static struct iommu_device *apple_dart_probe_device(struct device *dev)
{
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
@@ -717,24 +744,26 @@ static void apple_dart_release_device(struct device *dev)
kfree(cfg);
}
-static struct iommu_domain *apple_dart_domain_alloc(unsigned int type)
+static struct iommu_domain *apple_dart_domain_alloc_paging(struct device *dev)
{
struct apple_dart_domain *dart_domain;
- if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED &&
- type != IOMMU_DOMAIN_IDENTITY && type != IOMMU_DOMAIN_BLOCKED)
- return NULL;
-
dart_domain = kzalloc(sizeof(*dart_domain), GFP_KERNEL);
if (!dart_domain)
return NULL;
mutex_init(&dart_domain->init_lock);
- /* no need to allocate pgtbl_ops or do any other finalization steps */
- if (type == IOMMU_DOMAIN_IDENTITY || type == IOMMU_DOMAIN_BLOCKED)
- dart_domain->finalized = true;
+ if (dev) {
+ struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
+ int ret;
+ ret = apple_dart_finalize_domain(dart_domain, cfg);
+ if (ret) {
+ kfree(dart_domain);
+ return ERR_PTR(ret);
+ }
+ }
return &dart_domain->domain;
}
@@ -770,8 +799,6 @@ static int apple_dart_of_xlate(struct device *dev, struct of_phandle_args *args)
if (cfg_dart) {
if (cfg_dart->supports_bypass != dart->supports_bypass)
return -EINVAL;
- if (cfg_dart->force_bypass != dart->force_bypass)
- return -EINVAL;
if (cfg_dart->pgsize != dart->pgsize)
return -EINVAL;
}
@@ -913,7 +940,7 @@ static int apple_dart_def_domain_type(struct device *dev)
{
struct apple_dart_master_cfg *cfg = dev_iommu_priv_get(dev);
- if (cfg->stream_maps[0].dart->force_bypass)
+ if (cfg->stream_maps[0].dart->pgsize > PAGE_SIZE)
return IOMMU_DOMAIN_IDENTITY;
if (!cfg->stream_maps[0].dart->supports_bypass)
return IOMMU_DOMAIN_DMA;
@@ -947,7 +974,9 @@ static void apple_dart_get_resv_regions(struct device *dev,
}
static const struct iommu_ops apple_dart_iommu_ops = {
- .domain_alloc = apple_dart_domain_alloc,
+ .identity_domain = &apple_dart_identity_domain,
+ .blocked_domain = &apple_dart_blocked_domain,
+ .domain_alloc_paging = apple_dart_domain_alloc_paging,
.probe_device = apple_dart_probe_device,
.release_device = apple_dart_release_device,
.device_group = apple_dart_device_group,
@@ -957,7 +986,7 @@ static const struct iommu_ops apple_dart_iommu_ops = {
.pgsize_bitmap = -1UL, /* Restricted during dart probe */
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = apple_dart_attach_dev,
+ .attach_dev = apple_dart_attach_dev_paging,
.map_pages = apple_dart_map_pages,
.unmap_pages = apple_dart_unmap_pages,
.flush_iotlb_all = apple_dart_flush_iotlb_all,
@@ -1111,8 +1140,6 @@ static int apple_dart_probe(struct platform_device *pdev)
goto err_clk_disable;
}
- dart->force_bypass = dart->pgsize > PAGE_SIZE;
-
ret = apple_dart_hw_reset(dart);
if (ret)
goto err_clk_disable;
@@ -1136,7 +1163,8 @@ static int apple_dart_probe(struct platform_device *pdev)
dev_info(
&pdev->dev,
"DART [pagesize %x, %d streams, bypass support: %d, bypass forced: %d] initialized\n",
- dart->pgsize, dart->num_streams, dart->supports_bypass, dart->force_bypass);
+ dart->pgsize, dart->num_streams, dart->supports_bypass,
+ dart->pgsize > PAGE_SIZE);
return 0;
err_sysfs_remove:
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 8a16cd3ef487..353248ab18e7 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -25,11 +25,9 @@ struct arm_smmu_mmu_notifier {
#define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn)
struct arm_smmu_bond {
- struct iommu_sva sva;
struct mm_struct *mm;
struct arm_smmu_mmu_notifier *smmu_mn;
struct list_head list;
- refcount_t refs;
};
#define sva_to_bond(handle) \
@@ -38,6 +36,25 @@ struct arm_smmu_bond {
static DEFINE_MUTEX(sva_lock);
/*
+ * Write the CD to the CD tables for all masters that this domain is attached
+ * to. Note that this is only used to update existing CD entries in the target
+ * CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail.
+ */
+static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain,
+ int ssid,
+ struct arm_smmu_ctx_desc *cd)
+{
+ struct arm_smmu_master *master;
+ unsigned long flags;
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ arm_smmu_write_ctx_desc(master, ssid, cd);
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+}
+
+/*
* Check if the CPU ASID is available on the SMMU side. If a private context
* descriptor is using it, try to replace it.
*/
@@ -62,7 +79,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
return cd;
}
- smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
+ smmu_domain = container_of(cd, struct arm_smmu_domain, cd);
smmu = smmu_domain->smmu;
ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd,
@@ -80,7 +97,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
* be some overlap between use of both ASIDs, until we invalidate the
* TLB.
*/
- arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd);
+ arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd);
/* Invalidate TLB entries previously associated with that context */
arm_smmu_tlb_inv_asid(smmu, asid);
@@ -247,7 +264,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* DMA may still be running. Keep the cd valid to avoid C_BAD_CD events,
* but disable translation.
*/
- arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd);
+ arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
@@ -273,8 +290,10 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
struct mm_struct *mm)
{
int ret;
+ unsigned long flags;
struct arm_smmu_ctx_desc *cd;
struct arm_smmu_mmu_notifier *smmu_mn;
+ struct arm_smmu_master *master;
list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) {
if (smmu_mn->mn.mm == mm) {
@@ -304,7 +323,16 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
goto err_free_cd;
}
- ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ ret = arm_smmu_write_ctx_desc(master, mm->pasid, cd);
+ if (ret) {
+ list_for_each_entry_from_reverse(master, &smmu_domain->devices, domain_head)
+ arm_smmu_write_ctx_desc(master, mm->pasid, NULL);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
if (ret)
goto err_put_notifier;
@@ -329,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
return;
list_del(&smmu_mn->list);
- arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL);
+
+ arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
/*
* If we went through clear(), we've already invalidated, and no
@@ -345,8 +374,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
arm_smmu_free_shared_cd(cd);
}
-static struct iommu_sva *
-__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
+static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
{
int ret;
struct arm_smmu_bond *bond;
@@ -355,23 +383,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (!master || !master->sva_enabled)
- return ERR_PTR(-ENODEV);
-
- /* If bind() was already called for this {dev, mm} pair, reuse it. */
- list_for_each_entry(bond, &master->bonds, list) {
- if (bond->mm == mm) {
- refcount_inc(&bond->refs);
- return &bond->sva;
- }
- }
+ return -ENODEV;
bond = kzalloc(sizeof(*bond), GFP_KERNEL);
if (!bond)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
bond->mm = mm;
- bond->sva.dev = dev;
- refcount_set(&bond->refs, 1);
bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
if (IS_ERR(bond->smmu_mn)) {
@@ -380,11 +398,11 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
}
list_add(&bond->list, &master->bonds);
- return &bond->sva;
+ return 0;
err_free_bond:
kfree(bond);
- return ERR_PTR(ret);
+ return ret;
}
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
@@ -550,7 +568,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
}
}
- if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
+ if (!WARN_ON(!bond)) {
list_del(&bond->list);
arm_smmu_mmu_notifier_put(bond->smmu_mn);
kfree(bond);
@@ -562,13 +580,10 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t id)
{
int ret = 0;
- struct iommu_sva *handle;
struct mm_struct *mm = domain->mm;
mutex_lock(&sva_lock);
- handle = __arm_smmu_sva_bind(dev, mm);
- if (IS_ERR(handle))
- ret = PTR_ERR(handle);
+ ret = __arm_smmu_sva_bind(dev, mm);
mutex_unlock(&sva_lock);
return ret;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index bd0a596f9863..7445454c2af2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
-static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
+static void arm_smmu_sync_cd(struct arm_smmu_master *master,
int ssid, bool leaf)
{
size_t i;
- unsigned long flags;
- struct arm_smmu_master *master;
struct arm_smmu_cmdq_batch cmds;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_CFGI_CD,
.cfgi = {
@@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
};
cmds.num = 0;
-
- spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_for_each_entry(master, &smmu_domain->devices, domain_head) {
- for (i = 0; i < master->num_streams; i++) {
- cmd.cfgi.sid = master->streams[i].id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
- }
+ for (i = 0; i < master->num_streams; i++) {
+ cmd.cfgi.sid = master->streams[i].id;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
}
- spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
@@ -1026,34 +1019,33 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
WRITE_ONCE(*dst, cpu_to_le64(val));
}
-static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
- u32 ssid)
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
{
__le64 *l1ptr;
unsigned int idx;
struct arm_smmu_l1_ctx_desc *l1_desc;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
- return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
+ if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+ return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
idx = ssid >> CTXDESC_SPLIT;
- l1_desc = &cdcfg->l1_desc[idx];
+ l1_desc = &cd_table->l1_desc[idx];
if (!l1_desc->l2ptr) {
if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
return NULL;
- l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
+ l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
/* An invalid L1CD can be cached */
- arm_smmu_sync_cd(smmu_domain, ssid, false);
+ arm_smmu_sync_cd(master, ssid, false);
}
idx = ssid & (CTXDESC_L2_ENTRIES - 1);
return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
}
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
struct arm_smmu_ctx_desc *cd)
{
/*
@@ -1070,11 +1062,12 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
u64 val;
bool cd_live;
__le64 *cdptr;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+ if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
return -E2BIG;
- cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+ cdptr = arm_smmu_get_cd_ptr(master, ssid);
if (!cdptr)
return -ENOMEM;
@@ -1098,11 +1091,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
cdptr[3] = cpu_to_le64(cd->mair);
/*
- * STE is live, and the SMMU might read dwords of this CD in any
+ * STE may be live, and the SMMU might read dwords of this CD in any
* order. Ensure that it observes valid values before reading
* V=1.
*/
- arm_smmu_sync_cd(smmu_domain, ssid, true);
+ arm_smmu_sync_cd(master, ssid, true);
val = cd->tcr |
#ifdef __BIG_ENDIAN
@@ -1114,7 +1107,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
CTXDESC_CD_0_V;
- if (smmu_domain->stall_enabled)
+ if (cd_table->stall_enabled)
val |= CTXDESC_CD_0_S;
}
@@ -1128,44 +1121,45 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
* without first making the structure invalid.
*/
WRITE_ONCE(cdptr[0], cpu_to_le64(val));
- arm_smmu_sync_cd(smmu_domain, ssid, true);
+ arm_smmu_sync_cd(master, ssid, true);
return 0;
}
-static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
{
int ret;
size_t l1size;
size_t max_contexts;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- max_contexts = 1 << cfg->s1cdmax;
+ cd_table->stall_enabled = master->stall_enabled;
+ cd_table->s1cdmax = master->ssid_bits;
+ max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
- cdcfg->num_l1_ents = max_contexts;
+ cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+ cd_table->num_l1_ents = max_contexts;
l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
} else {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
- cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
+ cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
+ cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
CTXDESC_L2_ENTRIES);
- cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
- sizeof(*cdcfg->l1_desc),
+ cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
+ sizeof(*cd_table->l1_desc),
GFP_KERNEL);
- if (!cdcfg->l1_desc)
+ if (!cd_table->l1_desc)
return -ENOMEM;
- l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
}
- cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
+ cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
GFP_KERNEL);
- if (!cdcfg->cdtab) {
+ if (!cd_table->cdtab) {
dev_warn(smmu->dev, "failed to allocate context descriptor\n");
ret = -ENOMEM;
goto err_free_l1;
@@ -1174,42 +1168,42 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
return 0;
err_free_l1:
- if (cdcfg->l1_desc) {
- devm_kfree(smmu->dev, cdcfg->l1_desc);
- cdcfg->l1_desc = NULL;
+ if (cd_table->l1_desc) {
+ devm_kfree(smmu->dev, cd_table->l1_desc);
+ cd_table->l1_desc = NULL;
}
return ret;
}
-static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
+static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
{
int i;
size_t size, l1size;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (cdcfg->l1_desc) {
+ if (cd_table->l1_desc) {
size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
- for (i = 0; i < cdcfg->num_l1_ents; i++) {
- if (!cdcfg->l1_desc[i].l2ptr)
+ for (i = 0; i < cd_table->num_l1_ents; i++) {
+ if (!cd_table->l1_desc[i].l2ptr)
continue;
dmam_free_coherent(smmu->dev, size,
- cdcfg->l1_desc[i].l2ptr,
- cdcfg->l1_desc[i].l2ptr_dma);
+ cd_table->l1_desc[i].l2ptr,
+ cd_table->l1_desc[i].l2ptr_dma);
}
- devm_kfree(smmu->dev, cdcfg->l1_desc);
- cdcfg->l1_desc = NULL;
+ devm_kfree(smmu->dev, cd_table->l1_desc);
+ cd_table->l1_desc = NULL;
- l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
} else {
- l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
}
- dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
- cdcfg->cdtab_dma = 0;
- cdcfg->cdtab = NULL;
+ dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
+ cd_table->cdtab_dma = 0;
+ cd_table->cdtab = NULL;
}
bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
@@ -1276,7 +1270,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
u64 val = le64_to_cpu(dst[0]);
bool ste_live = false;
struct arm_smmu_device *smmu = NULL;
- struct arm_smmu_s1_cfg *s1_cfg = NULL;
+ struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
struct arm_smmu_s2_cfg *s2_cfg = NULL;
struct arm_smmu_domain *smmu_domain = NULL;
struct arm_smmu_cmdq_ent prefetch_cmd = {
@@ -1294,7 +1288,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
if (smmu_domain) {
switch (smmu_domain->stage) {
case ARM_SMMU_DOMAIN_S1:
- s1_cfg = &smmu_domain->s1_cfg;
+ cd_table = &master->cd_table;
break;
case ARM_SMMU_DOMAIN_S2:
case ARM_SMMU_DOMAIN_NESTED:
@@ -1325,7 +1319,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
val = STRTAB_STE_0_V;
/* Bypass/fault */
- if (!smmu_domain || !(s1_cfg || s2_cfg)) {
+ if (!smmu_domain || !(cd_table || s2_cfg)) {
if (!smmu_domain && disable_bypass)
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
else
@@ -1344,7 +1338,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
return;
}
- if (s1_cfg) {
+ if (cd_table) {
u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
@@ -1360,10 +1354,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
!master->stall_enabled)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
- val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
- FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
- FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
+ FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
+ FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
}
if (s2_cfg) {
@@ -1869,7 +1863,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
* careful, 007.
*/
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
+ arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
} else {
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
@@ -1962,7 +1956,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
- cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
+ cmd.tlbi.asid = smmu_domain->cd.asid;
} else {
cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
@@ -2067,15 +2061,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
- /* Free the CD and ASID, if we allocated them */
+ /* Free the ASID or VMID */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
-
/* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
- if (cfg->cdcfg.cdtab)
- arm_smmu_free_cd_tables(smmu_domain);
- arm_smmu_free_asid(&cfg->cd);
+ arm_smmu_free_asid(&smmu_domain->cd);
mutex_unlock(&arm_smmu_asid_lock);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
@@ -2087,66 +2077,43 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
}
static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
u32 asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
- refcount_set(&cfg->cd.refs, 1);
+ refcount_set(&cd->refs, 1);
/* Prevent SVA from modifying the ASID until it is written to the CD */
mutex_lock(&arm_smmu_asid_lock);
- ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
+ ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
if (ret)
goto out_unlock;
- cfg->s1cdmax = master->ssid_bits;
-
- smmu_domain->stall_enabled = master->stall_enabled;
-
- ret = arm_smmu_alloc_cd_tables(smmu_domain);
- if (ret)
- goto out_free_asid;
-
- cfg->cd.asid = (u16)asid;
- cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
- cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
+ cd->asid = (u16)asid;
+ cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
+ cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
- cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
-
- /*
- * Note that this will end up calling arm_smmu_sync_cd() before
- * the master has been added to the devices list for this domain.
- * This isn't an issue because the STE hasn't been installed yet.
- */
- ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
- if (ret)
- goto out_free_cd_tables;
+ cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
mutex_unlock(&arm_smmu_asid_lock);
return 0;
-out_free_cd_tables:
- arm_smmu_free_cd_tables(smmu_domain);
-out_free_asid:
- arm_smmu_free_asid(&cfg->cd);
out_unlock:
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int vmid;
@@ -2173,8 +2140,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
return 0;
}
-static int arm_smmu_domain_finalise(struct iommu_domain *domain,
- struct arm_smmu_master *master)
+static int arm_smmu_domain_finalise(struct iommu_domain *domain)
{
int ret;
unsigned long ias, oas;
@@ -2182,7 +2148,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable_ops *pgtbl_ops;
int (*finalise_stage_fn)(struct arm_smmu_domain *,
- struct arm_smmu_master *,
struct io_pgtable_cfg *);
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -2234,7 +2199,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
domain->geometry.force_aperture = true;
- ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
+ ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
return ret;
@@ -2403,6 +2368,14 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master)
master->domain = NULL;
master->ats_enabled = false;
arm_smmu_install_ste_for_dev(master);
+ /*
+ * Clearing the CD entry isn't strictly required to detach the domain
+ * since the table is uninstalled anyway, but it helps avoid confusion
+ * in the call to arm_smmu_write_ctx_desc on the next attach (which
+ * expects the entry to be empty).
+ */
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
+ arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -2436,23 +2409,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (!smmu_domain->smmu) {
smmu_domain->smmu = smmu;
- ret = arm_smmu_domain_finalise(domain, master);
- if (ret) {
+ ret = arm_smmu_domain_finalise(domain);
+ if (ret)
smmu_domain->smmu = NULL;
- goto out_unlock;
- }
- } else if (smmu_domain->smmu != smmu) {
- ret = -EINVAL;
- goto out_unlock;
- } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
- master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
+ } else if (smmu_domain->smmu != smmu)
ret = -EINVAL;
- goto out_unlock;
- } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
- smmu_domain->stall_enabled != master->stall_enabled) {
- ret = -EINVAL;
- goto out_unlock;
- }
+
+ mutex_unlock(&smmu_domain->init_mutex);
+ if (ret)
+ return ret;
master->domain = smmu_domain;
@@ -2466,16 +2431,42 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
master->ats_enabled = arm_smmu_ats_supported(master);
- arm_smmu_install_ste_for_dev(master);
-
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_add(&master->domain_head, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ if (!master->cd_table.cdtab) {
+ ret = arm_smmu_alloc_cd_tables(master);
+ if (ret) {
+ master->domain = NULL;
+ goto out_list_del;
+ }
+ }
+
+ /*
+ * Prevent SVA from concurrently modifying the CD or writing to
+ * the CD entry
+ */
+ mutex_lock(&arm_smmu_asid_lock);
+ ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
+ mutex_unlock(&arm_smmu_asid_lock);
+ if (ret) {
+ master->domain = NULL;
+ goto out_list_del;
+ }
+ }
+
+ arm_smmu_install_ste_for_dev(master);
+
arm_smmu_enable_ats(master);
+ return 0;
+
+out_list_del:
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_del(&master->domain_head);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
-out_unlock:
- mutex_unlock(&smmu_domain->init_mutex);
return ret;
}
@@ -2720,6 +2711,8 @@ static void arm_smmu_release_device(struct device *dev)
arm_smmu_detach_dev(master);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
+ if (master->cd_table.cdtab)
+ arm_smmu_free_cd_tables(master);
kfree(master);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 9915850dd4db..961205ba86d2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -595,13 +595,11 @@ struct arm_smmu_ctx_desc_cfg {
dma_addr_t cdtab_dma;
struct arm_smmu_l1_ctx_desc *l1_desc;
unsigned int num_l1_ents;
-};
-
-struct arm_smmu_s1_cfg {
- struct arm_smmu_ctx_desc_cfg cdcfg;
- struct arm_smmu_ctx_desc cd;
u8 s1fmt;
+ /* log2 of the maximum number of CDs supported by this table */
u8 s1cdmax;
+ /* Whether CD entries in this table have the stall bit set. */
+ u8 stall_enabled:1;
};
struct arm_smmu_s2_cfg {
@@ -697,6 +695,8 @@ struct arm_smmu_master {
struct arm_smmu_domain *domain;
struct list_head domain_head;
struct arm_smmu_stream *streams;
+ /* Locked by the iommu core using the group mutex */
+ struct arm_smmu_ctx_desc_cfg cd_table;
unsigned int num_streams;
bool ats_enabled;
bool stall_enabled;
@@ -719,13 +719,12 @@ struct arm_smmu_domain {
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
- bool stall_enabled;
atomic_t nr_ats_masters;
enum arm_smmu_domain_stage stage;
union {
- struct arm_smmu_s1_cfg s1_cfg;
- struct arm_smmu_s2_cfg s2_cfg;
+ struct arm_smmu_ctx_desc cd;
+ struct arm_smmu_s2_cfg s2_cfg;
};
struct iommu_domain domain;
@@ -745,7 +744,7 @@ extern struct xarray arm_smmu_asid_xa;
extern struct mutex arm_smmu_asid_lock;
extern struct arm_smmu_ctx_desc quiet_cd;
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
struct arm_smmu_ctx_desc *cd);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 7f52ac67495f..549ae4dba3a6 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -251,6 +251,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sc7280-mss-pil" },
{ .compatible = "qcom,sc8180x-mdss" },
{ .compatible = "qcom,sc8280xp-mdss" },
+ { .compatible = "qcom,sdm670-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
{ .compatible = "qcom,sm6350-mdss" },
@@ -532,6 +533,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data },
+ { .compatible = "qcom,sm7150-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 775a3cbaff4e..97b2122032b2 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -332,12 +332,10 @@ out_unlock:
return ret;
}
-static struct iommu_domain *qcom_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *qcom_iommu_domain_alloc_paging(struct device *dev)
{
struct qcom_iommu_domain *qcom_domain;
- if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
- return NULL;
/*
* Allocate the domain and initialise some of its data structures.
* We can't really do anything meaningful until we've added a
@@ -400,6 +398,44 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
return 0;
}
+static int qcom_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct qcom_iommu_domain *qcom_domain;
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct qcom_iommu_dev *qcom_iommu = to_iommu(dev);
+ unsigned int i;
+
+ if (domain == identity_domain || !domain)
+ return 0;
+
+ qcom_domain = to_qcom_iommu_domain(domain);
+ if (WARN_ON(!qcom_domain->iommu))
+ return -EINVAL;
+
+ pm_runtime_get_sync(qcom_iommu->dev);
+ for (i = 0; i < fwspec->num_ids; i++) {
+ struct qcom_iommu_ctx *ctx = to_ctx(qcom_domain, fwspec->ids[i]);
+
+ /* Disable the context bank: */
+ iommu_writel(ctx, ARM_SMMU_CB_SCTLR, 0);
+
+ ctx->domain = NULL;
+ }
+ pm_runtime_put_sync(qcom_iommu->dev);
+ return 0;
+}
+
+static struct iommu_domain_ops qcom_iommu_identity_ops = {
+ .attach_dev = qcom_iommu_identity_attach,
+};
+
+static struct iommu_domain qcom_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &qcom_iommu_identity_ops,
+};
+
static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -565,8 +601,9 @@ static int qcom_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
}
static const struct iommu_ops qcom_iommu_ops = {
+ .identity_domain = &qcom_iommu_identity_domain,
.capable = qcom_iommu_capable,
- .domain_alloc = qcom_iommu_domain_alloc,
+ .domain_alloc_paging = qcom_iommu_domain_alloc_paging,
.probe_device = qcom_iommu_probe_device,
.device_group = generic_device_group,
.of_xlate = qcom_iommu_of_xlate,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4b1a88f514c9..85163a83df2f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -43,14 +43,28 @@ enum iommu_dma_cookie_type {
IOMMU_DMA_MSI_COOKIE,
};
+enum iommu_dma_queue_type {
+ IOMMU_DMA_OPTS_PER_CPU_QUEUE,
+ IOMMU_DMA_OPTS_SINGLE_QUEUE,
+};
+
+struct iommu_dma_options {
+ enum iommu_dma_queue_type qt;
+ size_t fq_size;
+ unsigned int fq_timeout;
+};
+
struct iommu_dma_cookie {
enum iommu_dma_cookie_type type;
union {
/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
struct {
struct iova_domain iovad;
-
- struct iova_fq __percpu *fq; /* Flush queue */
+ /* Flush queue */
+ union {
+ struct iova_fq *single_fq;
+ struct iova_fq __percpu *percpu_fq;
+ };
/* Number of TLB flushes that have been started */
atomic64_t fq_flush_start_cnt;
/* Number of TLB flushes that have been finished */
@@ -67,6 +81,8 @@ struct iommu_dma_cookie {
/* Domain for flush queue callback; NULL if flush queue not in use */
struct iommu_domain *fq_domain;
+ /* Options for dma-iommu use */
+ struct iommu_dma_options options;
struct mutex mutex;
};
@@ -84,10 +100,12 @@ static int __init iommu_dma_forcedac_setup(char *str)
early_param("iommu.forcedac", iommu_dma_forcedac_setup);
/* Number of entries per flush queue */
-#define IOVA_FQ_SIZE 256
+#define IOVA_DEFAULT_FQ_SIZE 256
+#define IOVA_SINGLE_FQ_SIZE 32768
/* Timeout (in ms) after which entries are flushed from the queue */
-#define IOVA_FQ_TIMEOUT 10
+#define IOVA_DEFAULT_FQ_TIMEOUT 10
+#define IOVA_SINGLE_FQ_TIMEOUT 1000
/* Flush queue entry for deferred flushing */
struct iova_fq_entry {
@@ -99,18 +117,19 @@ struct iova_fq_entry {
/* Per-CPU flush queue structure */
struct iova_fq {
- struct iova_fq_entry entries[IOVA_FQ_SIZE];
- unsigned int head, tail;
spinlock_t lock;
+ unsigned int head, tail;
+ unsigned int mod_mask;
+ struct iova_fq_entry entries[];
};
#define fq_ring_for_each(i, fq) \
- for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
+ for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask)
static inline bool fq_full(struct iova_fq *fq)
{
assert_spin_locked(&fq->lock);
- return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
+ return (((fq->tail + 1) & fq->mod_mask) == fq->head);
}
static inline unsigned int fq_ring_add(struct iova_fq *fq)
@@ -119,12 +138,12 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq)
assert_spin_locked(&fq->lock);
- fq->tail = (idx + 1) % IOVA_FQ_SIZE;
+ fq->tail = (idx + 1) & fq->mod_mask;
return idx;
}
-static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
{
u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
unsigned int idx;
@@ -141,10 +160,19 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
fq->entries[idx].iova_pfn,
fq->entries[idx].pages);
- fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
+ fq->head = (fq->head + 1) & fq->mod_mask;
}
}
+static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&fq->lock, flags);
+ fq_ring_free_locked(cookie, fq);
+ spin_unlock_irqrestore(&fq->lock, flags);
+}
+
static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
{
atomic64_inc(&cookie->fq_flush_start_cnt);
@@ -160,14 +188,11 @@ static void fq_flush_timeout(struct timer_list *t)
atomic_set(&cookie->fq_timer_on, 0);
fq_flush_iotlb(cookie);
- for_each_possible_cpu(cpu) {
- unsigned long flags;
- struct iova_fq *fq;
-
- fq = per_cpu_ptr(cookie->fq, cpu);
- spin_lock_irqsave(&fq->lock, flags);
- fq_ring_free(cookie, fq);
- spin_unlock_irqrestore(&fq->lock, flags);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) {
+ fq_ring_free(cookie, cookie->single_fq);
+ } else {
+ for_each_possible_cpu(cpu)
+ fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu));
}
}
@@ -188,7 +213,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
*/
smp_mb();
- fq = raw_cpu_ptr(cookie->fq);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ fq = cookie->single_fq;
+ else
+ fq = raw_cpu_ptr(cookie->percpu_fq);
+
spin_lock_irqsave(&fq->lock, flags);
/*
@@ -196,11 +225,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
* flushed out on another CPU. This makes the fq_full() check below less
* likely to be true.
*/
- fq_ring_free(cookie, fq);
+ fq_ring_free_locked(cookie, fq);
if (fq_full(fq)) {
fq_flush_iotlb(cookie);
- fq_ring_free(cookie, fq);
+ fq_ring_free_locked(cookie, fq);
}
idx = fq_ring_add(fq);
@@ -216,34 +245,95 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
if (!atomic_read(&cookie->fq_timer_on) &&
!atomic_xchg(&cookie->fq_timer_on, 1))
mod_timer(&cookie->fq_timer,
- jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
+ jiffies + msecs_to_jiffies(cookie->options.fq_timeout));
}
-static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
+static void iommu_dma_free_fq_single(struct iova_fq *fq)
{
- int cpu, idx;
+ int idx;
- if (!cookie->fq)
- return;
+ fq_ring_for_each(idx, fq)
+ put_pages_list(&fq->entries[idx].freelist);
+ vfree(fq);
+}
+
+static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq)
+{
+ int cpu, idx;
- del_timer_sync(&cookie->fq_timer);
/* The IOVAs will be torn down separately, so just free our queued pages */
for_each_possible_cpu(cpu) {
- struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
+ struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu);
fq_ring_for_each(idx, fq)
put_pages_list(&fq->entries[idx].freelist);
}
- free_percpu(cookie->fq);
+ free_percpu(percpu_fq);
+}
+
+static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
+{
+ if (!cookie->fq_domain)
+ return;
+
+ del_timer_sync(&cookie->fq_timer);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ iommu_dma_free_fq_single(cookie->single_fq);
+ else
+ iommu_dma_free_fq_percpu(cookie->percpu_fq);
+}
+
+static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size)
+{
+ int i;
+
+ fq->head = 0;
+ fq->tail = 0;
+ fq->mod_mask = fq_size - 1;
+
+ spin_lock_init(&fq->lock);
+
+ for (i = 0; i < fq_size; i++)
+ INIT_LIST_HEAD(&fq->entries[i].freelist);
+}
+
+static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
+{
+ size_t fq_size = cookie->options.fq_size;
+ struct iova_fq *queue;
+
+ queue = vmalloc(struct_size(queue, entries, fq_size));
+ if (!queue)
+ return -ENOMEM;
+ iommu_dma_init_one_fq(queue, fq_size);
+ cookie->single_fq = queue;
+
+ return 0;
+}
+
+static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie)
+{
+ size_t fq_size = cookie->options.fq_size;
+ struct iova_fq __percpu *queue;
+ int cpu;
+
+ queue = __alloc_percpu(struct_size(queue, entries, fq_size),
+ __alignof__(*queue));
+ if (!queue)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size);
+ cookie->percpu_fq = queue;
+ return 0;
}
/* sysfs updates are serialised by the mutex of the group owning @domain */
int iommu_dma_init_fq(struct iommu_domain *domain)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
- struct iova_fq __percpu *queue;
- int i, cpu;
+ int rc;
if (cookie->fq_domain)
return 0;
@@ -251,26 +341,16 @@ int iommu_dma_init_fq(struct iommu_domain *domain)
atomic64_set(&cookie->fq_flush_start_cnt, 0);
atomic64_set(&cookie->fq_flush_finish_cnt, 0);
- queue = alloc_percpu(struct iova_fq);
- if (!queue) {
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ rc = iommu_dma_init_fq_single(cookie);
+ else
+ rc = iommu_dma_init_fq_percpu(cookie);
+
+ if (rc) {
pr_warn("iova flush queue initialization failed\n");
return -ENOMEM;
}
- for_each_possible_cpu(cpu) {
- struct iova_fq *fq = per_cpu_ptr(queue, cpu);
-
- fq->head = 0;
- fq->tail = 0;
-
- spin_lock_init(&fq->lock);
-
- for (i = 0; i < IOVA_FQ_SIZE; i++)
- INIT_LIST_HEAD(&fq->entries[i].freelist);
- }
-
- cookie->fq = queue;
-
timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
atomic_set(&cookie->fq_timer_on, 0);
/*
@@ -555,6 +635,28 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
}
/**
+ * iommu_dma_init_options - Initialize dma-iommu options
+ * @options: The options to be initialized
+ * @dev: Device the options are set for
+ *
+ * This allows tuning dma-iommu specific to device properties
+ */
+static void iommu_dma_init_options(struct iommu_dma_options *options,
+ struct device *dev)
+{
+ /* Shadowing IOTLB flushes do better with a single large queue */
+ if (dev->iommu->shadow_on_flush) {
+ options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE;
+ options->fq_timeout = IOVA_SINGLE_FQ_TIMEOUT;
+ options->fq_size = IOVA_SINGLE_FQ_SIZE;
+ } else {
+ options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE;
+ options->fq_size = IOVA_DEFAULT_FQ_SIZE;
+ options->fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT;
+ }
+}
+
+/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
* @base: IOVA at which the mappable address space starts
@@ -614,6 +716,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
if (ret)
goto done_unlock;
+ iommu_dma_init_options(&cookie->options, dev);
+
/* If the FQ fails we can simply fall back to strict mode */
if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
(!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index c275fe71c4db..2c6e9094f1e9 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -24,6 +24,7 @@
typedef u32 sysmmu_iova_t;
typedef u32 sysmmu_pte_t;
+static struct iommu_domain exynos_identity_domain;
/* We do not consider super section mapping (16MB) */
#define SECT_ORDER 20
@@ -829,7 +830,7 @@ static int __maybe_unused exynos_sysmmu_suspend(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (data->domain) {
+ if (&data->domain->domain != &exynos_identity_domain) {
dev_dbg(data->sysmmu, "saving state\n");
__sysmmu_disable(data);
}
@@ -847,7 +848,7 @@ static int __maybe_unused exynos_sysmmu_resume(struct device *dev)
struct exynos_iommu_owner *owner = dev_iommu_priv_get(master);
mutex_lock(&owner->rpm_lock);
- if (data->domain) {
+ if (&data->domain->domain != &exynos_identity_domain) {
dev_dbg(data->sysmmu, "restoring state\n");
__sysmmu_enable(data);
}
@@ -886,7 +887,7 @@ static inline void exynos_iommu_set_pte(sysmmu_pte_t *ent, sysmmu_pte_t val)
DMA_TO_DEVICE);
}
-static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *exynos_iommu_domain_alloc_paging(struct device *dev)
{
struct exynos_iommu_domain *domain;
dma_addr_t handle;
@@ -895,9 +896,6 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type)
/* Check if correct PTE offsets are initialized */
BUG_ON(PG_ENT_SHIFT < 0 || !dma_dev);
- if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
@@ -980,17 +978,20 @@ static void exynos_iommu_domain_free(struct iommu_domain *iommu_domain)
kfree(domain);
}
-static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
- struct device *dev)
+static int exynos_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
- struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
- phys_addr_t pagetable = virt_to_phys(domain->pgtable);
+ struct exynos_iommu_domain *domain;
+ phys_addr_t pagetable;
struct sysmmu_drvdata *data, *next;
unsigned long flags;
- if (!has_sysmmu(dev) || owner->domain != iommu_domain)
- return;
+ if (owner->domain == identity_domain)
+ return 0;
+
+ domain = to_exynos_domain(owner->domain);
+ pagetable = virt_to_phys(domain->pgtable);
mutex_lock(&owner->rpm_lock);
@@ -1009,15 +1010,25 @@ static void exynos_iommu_detach_device(struct iommu_domain *iommu_domain,
list_del_init(&data->domain_node);
spin_unlock(&data->lock);
}
- owner->domain = NULL;
+ owner->domain = identity_domain;
spin_unlock_irqrestore(&domain->lock, flags);
mutex_unlock(&owner->rpm_lock);
- dev_dbg(dev, "%s: Detached IOMMU with pgtable %pa\n", __func__,
- &pagetable);
+ dev_dbg(dev, "%s: Restored IOMMU to IDENTITY from pgtable %pa\n",
+ __func__, &pagetable);
+ return 0;
}
+static struct iommu_domain_ops exynos_identity_ops = {
+ .attach_dev = exynos_iommu_identity_attach,
+};
+
+static struct iommu_domain exynos_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &exynos_identity_ops,
+};
+
static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
struct device *dev)
{
@@ -1026,12 +1037,11 @@ static int exynos_iommu_attach_device(struct iommu_domain *iommu_domain,
struct sysmmu_drvdata *data;
phys_addr_t pagetable = virt_to_phys(domain->pgtable);
unsigned long flags;
+ int err;
- if (!has_sysmmu(dev))
- return -ENODEV;
-
- if (owner->domain)
- exynos_iommu_detach_device(owner->domain, dev);
+ err = exynos_iommu_identity_attach(&exynos_identity_domain, dev);
+ if (err)
+ return err;
mutex_lock(&owner->rpm_lock);
@@ -1219,7 +1229,7 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size,
*/
static int exynos_iommu_map(struct iommu_domain *iommu_domain,
unsigned long l_iova, phys_addr_t paddr, size_t size,
- int prot, gfp_t gfp)
+ size_t count, int prot, gfp_t gfp, size_t *mapped)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
sysmmu_pte_t *entry;
@@ -1253,6 +1263,8 @@ static int exynos_iommu_map(struct iommu_domain *iommu_domain,
if (ret)
pr_err("%s: Failed(%d) to map %#zx bytes @ %#x\n",
__func__, ret, size, iova);
+ else
+ *mapped = size;
spin_unlock_irqrestore(&domain->pgtablelock, flags);
@@ -1274,7 +1286,7 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain
}
static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain,
- unsigned long l_iova, size_t size,
+ unsigned long l_iova, size_t size, size_t count,
struct iommu_iotlb_gather *gather)
{
struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain);
@@ -1407,26 +1419,12 @@ static struct iommu_device *exynos_iommu_probe_device(struct device *dev)
return &data->iommu;
}
-static void exynos_iommu_set_platform_dma(struct device *dev)
-{
- struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
-
- if (owner->domain) {
- struct iommu_group *group = iommu_group_get(dev);
-
- if (group) {
- exynos_iommu_detach_device(owner->domain, dev);
- iommu_group_put(group);
- }
- }
-}
-
static void exynos_iommu_release_device(struct device *dev)
{
struct exynos_iommu_owner *owner = dev_iommu_priv_get(dev);
struct sysmmu_drvdata *data;
- exynos_iommu_set_platform_dma(dev);
+ WARN_ON(exynos_iommu_identity_attach(&exynos_identity_domain, dev));
list_for_each_entry(data, &owner->controllers, owner_node)
device_link_del(data->link);
@@ -1457,6 +1455,7 @@ static int exynos_iommu_of_xlate(struct device *dev,
INIT_LIST_HEAD(&owner->controllers);
mutex_init(&owner->rpm_lock);
+ owner->domain = &exynos_identity_domain;
dev_iommu_priv_set(dev, owner);
}
@@ -1471,19 +1470,17 @@ static int exynos_iommu_of_xlate(struct device *dev,
}
static const struct iommu_ops exynos_iommu_ops = {
- .domain_alloc = exynos_iommu_domain_alloc,
+ .identity_domain = &exynos_identity_domain,
+ .domain_alloc_paging = exynos_iommu_domain_alloc_paging,
.device_group = generic_device_group,
-#ifdef CONFIG_ARM
- .set_platform_dma_ops = exynos_iommu_set_platform_dma,
-#endif
.probe_device = exynos_iommu_probe_device,
.release_device = exynos_iommu_release_device,
.pgsize_bitmap = SECT_SIZE | LPAGE_SIZE | SPAGE_SIZE,
.of_xlate = exynos_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = exynos_iommu_attach_device,
- .map = exynos_iommu_map,
- .unmap = exynos_iommu_unmap,
+ .map_pages = exynos_iommu_map,
+ .unmap_pages = exynos_iommu_unmap,
.iova_to_phys = exynos_iommu_iova_to_phys,
.free = exynos_iommu_domain_free,
}
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index 4ac0e247ec2b..e9d2bff4659b 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -196,6 +196,13 @@ static struct iommu_domain *fsl_pamu_domain_alloc(unsigned type)
{
struct fsl_dma_domain *dma_domain;
+ /*
+ * FIXME: This isn't creating an unmanaged domain since the
+ * default_domain_ops do not have any map/unmap function it doesn't meet
+ * the requirements for __IOMMU_DOMAIN_PAGING. The only purpose seems to
+ * allow drivers/soc/fsl/qbman/qman_portal.c to do
+ * fsl_pamu_configure_l1_stash()
+ */
if (type != IOMMU_DOMAIN_UNMANAGED)
return NULL;
@@ -283,16 +290,34 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
return ret;
}
-static void fsl_pamu_set_platform_dma(struct device *dev)
+/*
+ * FIXME: fsl/pamu is completely broken in terms of how it works with the iommu
+ * API. Immediately after probe the HW is left in an IDENTITY translation and
+ * the driver provides a non-working UNMANAGED domain that it can switch over
+ * to. However it cannot switch back to an IDENTITY translation, instead it
+ * switches to what looks like BLOCKING.
+ */
+static int fsl_pamu_platform_attach(struct iommu_domain *platform_domain,
+ struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
+ struct fsl_dma_domain *dma_domain;
const u32 *prop;
int len;
struct pci_dev *pdev = NULL;
struct pci_controller *pci_ctl;
/*
+ * Hack to keep things working as they always have, only leaving an
+ * UNMANAGED domain makes it BLOCKING.
+ */
+ if (domain == platform_domain || !domain ||
+ domain->type != IOMMU_DOMAIN_UNMANAGED)
+ return 0;
+
+ dma_domain = to_fsl_dma_domain(domain);
+
+ /*
* Use LIODN of the PCI controller while detaching a
* PCI device.
*/
@@ -312,8 +337,18 @@ static void fsl_pamu_set_platform_dma(struct device *dev)
detach_device(dev, dma_domain);
else
pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
+ return 0;
}
+static struct iommu_domain_ops fsl_pamu_platform_ops = {
+ .attach_dev = fsl_pamu_platform_attach,
+};
+
+static struct iommu_domain fsl_pamu_platform_domain = {
+ .type = IOMMU_DOMAIN_PLATFORM,
+ .ops = &fsl_pamu_platform_ops,
+};
+
/* Set the domain stash attribute */
int fsl_pamu_configure_l1_stash(struct iommu_domain *domain, u32 cpu)
{
@@ -395,11 +430,11 @@ static struct iommu_device *fsl_pamu_probe_device(struct device *dev)
}
static const struct iommu_ops fsl_pamu_ops = {
+ .default_domain = &fsl_pamu_platform_domain,
.capable = fsl_pamu_capable,
.domain_alloc = fsl_pamu_domain_alloc,
.probe_device = fsl_pamu_probe_device,
.device_group = fsl_pamu_device_group,
- .set_platform_dma_ops = fsl_pamu_set_platform_dma,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = fsl_pamu_attach_device,
.iova_to_phys = fsl_pamu_iova_to_phys,
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 1f925285104e..dee61e513be6 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -111,6 +111,8 @@ static const struct iommu_regset iommu_regs_64[] = {
IOMMU_REGSET_ENTRY(VCRSP),
};
+static struct dentry *intel_iommu_debug;
+
static int iommu_regset_show(struct seq_file *m, void *unused)
{
struct dmar_drhd_unit *drhd;
@@ -311,9 +313,14 @@ static inline unsigned long level_to_directory_size(int level)
static inline void
dump_page_info(struct seq_file *m, unsigned long iova, u64 *path)
{
- seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n",
- iova >> VTD_PAGE_SHIFT, path[5], path[4],
- path[3], path[2], path[1]);
+ seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx",
+ iova >> VTD_PAGE_SHIFT, path[5], path[4], path[3]);
+ if (path[2]) {
+ seq_printf(m, "\t0x%016llx", path[2]);
+ if (path[1])
+ seq_printf(m, "\t0x%016llx", path[1]);
+ }
+ seq_putc(m, '\n');
}
static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
@@ -340,58 +347,140 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
}
}
-static int __show_device_domain_translation(struct device *dev, void *data)
+static int domain_translation_struct_show(struct seq_file *m,
+ struct device_domain_info *info,
+ ioasid_t pasid)
{
- struct dmar_domain *domain;
- struct seq_file *m = data;
- u64 path[6] = { 0 };
-
- domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
- if (!domain)
- return 0;
+ bool scalable, found = false;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ u16 devfn, bus, seg;
- seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
- (u64)virt_to_phys(domain->pgd));
- seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
+ bus = info->bus;
+ devfn = info->devfn;
+ seg = info->segment;
- pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
- seq_putc(m, '\n');
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ struct context_entry *context;
+ u64 pgd, path[6] = { 0 };
+ u32 sts, agaw;
- /* Don't iterate */
- return 1;
-}
+ if (seg != iommu->segment)
+ continue;
-static int show_device_domain_translation(struct device *dev, void *data)
-{
- struct iommu_group *group;
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_TES)) {
+ seq_printf(m, "DMA Remapping is not enabled on %s\n",
+ iommu->name);
+ continue;
+ }
+ if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)
+ scalable = true;
+ else
+ scalable = false;
- group = iommu_group_get(dev);
- if (group) {
/*
- * The group->mutex is held across the callback, which will
- * block calls to iommu_attach/detach_group/device. Hence,
+ * The iommu->lock is held across the callback, which will
+ * block calls to domain_attach/domain_detach. Hence,
* the domain of the device will not change during traversal.
*
- * All devices in an iommu group share a single domain, hence
- * we only dump the domain of the first device. Even though,
- * this code still possibly races with the iommu_unmap()
+ * Traversing page table possibly races with the iommu_unmap()
* interface. This could be solved by RCU-freeing the page
* table pages in the iommu_unmap() path.
*/
- iommu_group_for_each_dev(group, data,
- __show_device_domain_translation);
- iommu_group_put(group);
+ spin_lock(&iommu->lock);
+
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!context || !context_present(context))
+ goto iommu_unlock;
+
+ if (scalable) { /* scalable mode */
+ struct pasid_entry *pasid_tbl, *pasid_tbl_entry;
+ struct pasid_dir_entry *dir_tbl, *dir_entry;
+ u16 dir_idx, tbl_idx, pgtt;
+ u64 pasid_dir_ptr;
+
+ pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
+
+ /* Dump specified device domain mappings with PASID. */
+ dir_idx = pasid >> PASID_PDE_SHIFT;
+ tbl_idx = pasid & PASID_PTE_MASK;
+
+ dir_tbl = phys_to_virt(pasid_dir_ptr);
+ dir_entry = &dir_tbl[dir_idx];
+
+ pasid_tbl = get_pasid_table_from_pde(dir_entry);
+ if (!pasid_tbl)
+ goto iommu_unlock;
+
+ pasid_tbl_entry = &pasid_tbl[tbl_idx];
+ if (!pasid_pte_is_present(pasid_tbl_entry))
+ goto iommu_unlock;
+
+ /*
+ * According to PASID Granular Translation Type(PGTT),
+ * get the page table pointer.
+ */
+ pgtt = (u16)(pasid_tbl_entry->val[0] & GENMASK_ULL(8, 6)) >> 6;
+ agaw = (u8)(pasid_tbl_entry->val[0] & GENMASK_ULL(4, 2)) >> 2;
+
+ switch (pgtt) {
+ case PASID_ENTRY_PGTT_FL_ONLY:
+ pgd = pasid_tbl_entry->val[2];
+ break;
+ case PASID_ENTRY_PGTT_SL_ONLY:
+ case PASID_ENTRY_PGTT_NESTED:
+ pgd = pasid_tbl_entry->val[0];
+ break;
+ default:
+ goto iommu_unlock;
+ }
+ pgd &= VTD_PAGE_MASK;
+ } else { /* legacy mode */
+ pgd = context->lo & VTD_PAGE_MASK;
+ agaw = context->hi & 7;
+ }
+
+ seq_printf(m, "Device %04x:%02x:%02x.%x ",
+ iommu->segment, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ if (scalable)
+ seq_printf(m, "with pasid %x @0x%llx\n", pasid, pgd);
+ else
+ seq_printf(m, "@0x%llx\n", pgd);
+
+ seq_printf(m, "%-17s\t%-18s\t%-18s\t%-18s\t%-18s\t%-s\n",
+ "IOVA_PFN", "PML5E", "PML4E", "PDPE", "PDE", "PTE");
+ pgtable_walk_level(m, phys_to_virt(pgd), agaw + 2, 0, path);
+
+ found = true;
+iommu_unlock:
+ spin_unlock(&iommu->lock);
+ if (found)
+ break;
}
+ rcu_read_unlock();
return 0;
}
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int dev_domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+ struct device_domain_info *info = (struct device_domain_info *)m->private;
+
+ return domain_translation_struct_show(m, info, IOMMU_NO_PASID);
+}
+DEFINE_SHOW_ATTRIBUTE(dev_domain_translation_struct);
+
+static int pasid_domain_translation_struct_show(struct seq_file *m, void *unused)
{
- return bus_for_each_dev(&pci_bus_type, NULL, m,
- show_device_domain_translation);
+ struct dev_pasid_info *dev_pasid = (struct dev_pasid_info *)m->private;
+ struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+
+ return domain_translation_struct_show(m, info, dev_pasid->pasid);
}
-DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+DEFINE_SHOW_ATTRIBUTE(pasid_domain_translation_struct);
static void invalidation_queue_entry_show(struct seq_file *m,
struct intel_iommu *iommu)
@@ -666,16 +755,12 @@ static const struct file_operations dmar_perf_latency_fops = {
void __init intel_iommu_debugfs_init(void)
{
- struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
- iommu_debugfs_dir);
+ intel_iommu_debug = debugfs_create_dir("intel", iommu_debugfs_dir);
debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
&iommu_regset_fops);
debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
NULL, &dmar_translation_struct_fops);
- debugfs_create_file("domain_translation_struct", 0444,
- intel_iommu_debug, NULL,
- &domain_translation_struct_fops);
debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug,
NULL, &invalidation_queue_fops);
#ifdef CONFIG_IRQ_REMAP
@@ -685,3 +770,51 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
NULL, &dmar_perf_latency_fops);
}
+
+/*
+ * Create a debugfs directory for each device, and then create a
+ * debugfs file in this directory for users to dump the page table
+ * of the default domain. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/domain_translation_struct
+ */
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info)
+{
+ info->debugfs_dentry = debugfs_create_dir(dev_name(info->dev), intel_iommu_debug);
+
+ debugfs_create_file("domain_translation_struct", 0444, info->debugfs_dentry,
+ info, &dev_domain_translation_struct_fops);
+}
+
+/* Remove the device debugfs directory. */
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info)
+{
+ debugfs_remove_recursive(info->debugfs_dentry);
+}
+
+/*
+ * Create a debugfs directory per pair of {device, pasid}, then create the
+ * corresponding debugfs file in this directory for users to dump its page
+ * table. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/1/domain_translation_struct
+ *
+ * The debugfs only dumps the page tables whose mappings are created and
+ * destroyed by the iommu_map/unmap() interfaces. Check the mapping type
+ * of the domain before creating debugfs directory.
+ */
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+ char dir_name[10];
+
+ sprintf(dir_name, "%x", dev_pasid->pasid);
+ dev_pasid->debugfs_dentry = debugfs_create_dir(dir_name, info->debugfs_dentry);
+
+ debugfs_create_file("domain_translation_struct", 0444, dev_pasid->debugfs_dentry,
+ dev_pasid, &pasid_domain_translation_struct_fops);
+}
+
+/* Remove the device pasid debugfs directory. */
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+ debugfs_remove_recursive(dev_pasid->debugfs_dentry);
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index d1037280abf7..3531b956556c 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4016,9 +4016,9 @@ static int blocking_domain_attach_dev(struct iommu_domain *domain,
}
static struct iommu_domain blocking_domain = {
+ .type = IOMMU_DOMAIN_BLOCKED,
.ops = &(const struct iommu_domain_ops) {
.attach_dev = blocking_domain_attach_dev,
- .free = intel_iommu_domain_free
}
};
@@ -4028,8 +4028,6 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
struct iommu_domain *domain;
switch (type) {
- case IOMMU_DOMAIN_BLOCKED:
- return &blocking_domain;
case IOMMU_DOMAIN_DMA:
case IOMMU_DOMAIN_UNMANAGED:
dmar_domain = alloc_domain(type);
@@ -4111,7 +4109,7 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags,
static void intel_iommu_domain_free(struct iommu_domain *domain)
{
- if (domain != &si_domain->domain && domain != &blocking_domain)
+ if (domain != &si_domain->domain)
domain_exit(to_dmar_domain(domain));
}
@@ -4465,6 +4463,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
}
+ intel_iommu_debugfs_create_dev(info);
+
return &iommu->iommu;
}
@@ -4474,6 +4474,7 @@ static void intel_iommu_release_device(struct device *dev)
dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
+ intel_iommu_debugfs_remove_dev(info);
dev_iommu_priv_set(dev, NULL);
kfree(info);
set_dma_ops(dev, NULL);
@@ -4718,8 +4719,8 @@ static bool risky_device(struct pci_dev *pdev)
return false;
}
-static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long pages = aligned_nrpages(iova, size);
@@ -4729,6 +4730,7 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
xa_for_each(&dmar_domain->iommu_array, i, info)
__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
+ return 0;
}
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
@@ -4766,6 +4768,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
spin_unlock_irqrestore(&dmar_domain->lock, flags);
domain_detach_iommu(dmar_domain, iommu);
+ intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
kfree(dev_pasid);
out_tear_down:
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
@@ -4821,6 +4824,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ if (domain->type & __IOMMU_DOMAIN_PAGING)
+ intel_iommu_debugfs_create_dev_pasid(dev_pasid);
+
return 0;
out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
@@ -4925,6 +4931,7 @@ const struct iommu_dirty_ops intel_dirty_ops = {
};
const struct iommu_ops intel_iommu_ops = {
+ .blocked_domain = &blocking_domain,
.capable = intel_iommu_capable,
.hw_info = intel_iommu_hw_info,
.domain_alloc = intel_iommu_domain_alloc,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index d796d0d9b114..65d37a138c75 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -749,12 +749,18 @@ struct device_domain_info {
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
struct pasid_table *pasid_table; /* pasid table */
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+ struct dentry *debugfs_dentry; /* pointer to device directory dentry */
+#endif
};
struct dev_pasid_info {
struct list_head link_domain; /* link to domain siblings */
struct device *dev;
ioasid_t pasid;
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+ struct dentry *debugfs_dentry; /* pointer to pasid directory dentry */
+#endif
};
static inline void __iommu_flush_cache(
@@ -935,8 +941,16 @@ static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid
#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
void intel_iommu_debugfs_init(void);
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid);
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid);
#else
static inline void intel_iommu_debugfs_init(void) {}
+static inline void intel_iommu_debugfs_create_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) {}
+static inline void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) {}
#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
extern const struct attribute_group *intel_iommu_groups[];
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index c146378c7d03..f17a1113f3d6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -37,7 +37,6 @@
#include "iommu-priv.h"
#include "iommu-sva.h"
-#include "iommu-priv.h"
static struct kset *iommu_group_kset;
static DEFINE_IDA(iommu_group_ida);
@@ -96,8 +95,8 @@ static const char * const iommu_group_resv_type_string[] = {
static int iommu_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data);
static void iommu_release_device(struct device *dev);
-static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
- unsigned type);
+static struct iommu_domain *
+__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type);
static int __iommu_attach_device(struct iommu_domain *domain,
struct device *dev);
static int __iommu_attach_group(struct iommu_domain *domain,
@@ -184,6 +183,8 @@ static const char *iommu_domain_type_str(unsigned int t)
case IOMMU_DOMAIN_DMA:
case IOMMU_DOMAIN_DMA_FQ:
return "Translated";
+ case IOMMU_DOMAIN_PLATFORM:
+ return "Platform";
default:
return "Unknown";
}
@@ -290,6 +291,10 @@ void iommu_device_unregister(struct iommu_device *iommu)
spin_lock(&iommu_device_lock);
list_del(&iommu->list);
spin_unlock(&iommu_device_lock);
+
+ /* Pairs with the alloc in generic_single_device_group() */
+ iommu_group_put(iommu->singleton_group);
+ iommu->singleton_group = NULL;
}
EXPORT_SYMBOL_GPL(iommu_device_unregister);
@@ -404,6 +409,7 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
ret = PTR_ERR(iommu_dev);
goto err_module_put;
}
+ dev->iommu->iommu_dev = iommu_dev;
ret = iommu_device_link(iommu_dev, dev);
if (ret)
@@ -418,7 +424,6 @@ static int iommu_init_device(struct device *dev, const struct iommu_ops *ops)
}
dev->iommu_group = group;
- dev->iommu->iommu_dev = iommu_dev;
dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
if (ops->is_attach_deferred)
dev->iommu->attach_deferred = ops->is_attach_deferred(dev);
@@ -432,6 +437,7 @@ err_release:
err_module_put:
module_put(ops->owner);
err_free:
+ dev->iommu->iommu_dev = NULL;
dev_iommu_free(dev);
return ret;
}
@@ -1637,6 +1643,27 @@ struct iommu_group *generic_device_group(struct device *dev)
EXPORT_SYMBOL_GPL(generic_device_group);
/*
+ * Generic device_group call-back function. It just allocates one
+ * iommu-group per iommu driver instance shared by every device
+ * probed by that iommu driver.
+ */
+struct iommu_group *generic_single_device_group(struct device *dev)
+{
+ struct iommu_device *iommu = dev->iommu->iommu_dev;
+
+ if (!iommu->singleton_group) {
+ struct iommu_group *group;
+
+ group = iommu_group_alloc();
+ if (IS_ERR(group))
+ return group;
+ iommu->singleton_group = group;
+ }
+ return iommu_group_ref_get(iommu->singleton_group);
+}
+EXPORT_SYMBOL_GPL(generic_single_device_group);
+
+/*
* Use standard PCI bus topology, isolation features, and DMA alias quirks
* to find or create an IOMMU group for a device.
*/
@@ -1717,26 +1744,29 @@ struct iommu_group *fsl_mc_device_group(struct device *dev)
}
EXPORT_SYMBOL_GPL(fsl_mc_device_group);
-static int iommu_get_def_domain_type(struct device *dev)
-{
- const struct iommu_ops *ops = dev_iommu_ops(dev);
-
- if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
- return IOMMU_DOMAIN_DMA;
-
- if (ops->def_domain_type)
- return ops->def_domain_type(dev);
-
- return 0;
-}
-
static struct iommu_domain *
-__iommu_group_alloc_default_domain(const struct bus_type *bus,
- struct iommu_group *group, int req_type)
+__iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
if (group->default_domain && group->default_domain->type == req_type)
return group->default_domain;
- return __iommu_domain_alloc(bus, req_type);
+ return __iommu_group_domain_alloc(group, req_type);
+}
+
+/*
+ * Returns the iommu_ops for the devices in an iommu group.
+ *
+ * It is assumed that all devices in an iommu group are managed by a single
+ * IOMMU unit. Therefore, this returns the dev_iommu_ops of the first device
+ * in the group.
+ */
+static const struct iommu_ops *group_iommu_ops(struct iommu_group *group)
+{
+ struct group_device *device =
+ list_first_entry(&group->devices, struct group_device, list);
+
+ lockdep_assert_held(&group->mutex);
+
+ return dev_iommu_ops(device->dev);
}
/*
@@ -1746,25 +1776,34 @@ __iommu_group_alloc_default_domain(const struct bus_type *bus,
static struct iommu_domain *
iommu_group_alloc_default_domain(struct iommu_group *group, int req_type)
{
- const struct bus_type *bus =
- list_first_entry(&group->devices, struct group_device, list)
- ->dev->bus;
+ const struct iommu_ops *ops = group_iommu_ops(group);
struct iommu_domain *dom;
lockdep_assert_held(&group->mutex);
+ /*
+ * Allow legacy drivers to specify the domain that will be the default
+ * domain. This should always be either an IDENTITY/BLOCKED/PLATFORM
+ * domain. Do not use in new drivers.
+ */
+ if (ops->default_domain) {
+ if (req_type)
+ return NULL;
+ return ops->default_domain;
+ }
+
if (req_type)
- return __iommu_group_alloc_default_domain(bus, group, req_type);
+ return __iommu_group_alloc_default_domain(group, req_type);
/* The driver gave no guidance on what type to use, try the default */
- dom = __iommu_group_alloc_default_domain(bus, group, iommu_def_domain_type);
+ dom = __iommu_group_alloc_default_domain(group, iommu_def_domain_type);
if (dom)
return dom;
/* Otherwise IDENTITY and DMA_FQ defaults will try DMA */
if (iommu_def_domain_type == IOMMU_DOMAIN_DMA)
return NULL;
- dom = __iommu_group_alloc_default_domain(bus, group, IOMMU_DOMAIN_DMA);
+ dom = __iommu_group_alloc_default_domain(group, IOMMU_DOMAIN_DMA);
if (!dom)
return NULL;
@@ -1808,40 +1847,109 @@ static int iommu_bus_notifier(struct notifier_block *nb,
return 0;
}
-/* A target_type of 0 will select the best domain type and cannot fail */
+/*
+ * Combine the driver's chosen def_domain_type across all the devices in a
+ * group. Drivers must give a consistent result.
+ */
+static int iommu_get_def_domain_type(struct iommu_group *group,
+ struct device *dev, int cur_type)
+{
+ const struct iommu_ops *ops = group_iommu_ops(group);
+ int type;
+
+ if (!ops->def_domain_type)
+ return cur_type;
+
+ type = ops->def_domain_type(dev);
+ if (!type || cur_type == type)
+ return cur_type;
+ if (!cur_type)
+ return type;
+
+ dev_err_ratelimited(
+ dev,
+ "IOMMU driver error, requesting conflicting def_domain_type, %s and %s, for devices in group %u.\n",
+ iommu_domain_type_str(cur_type), iommu_domain_type_str(type),
+ group->id);
+
+ /*
+ * Try to recover, drivers are allowed to force IDENITY or DMA, IDENTITY
+ * takes precedence.
+ */
+ if (type == IOMMU_DOMAIN_IDENTITY)
+ return type;
+ return cur_type;
+}
+
+/*
+ * A target_type of 0 will select the best domain type. 0 can be returned in
+ * this case meaning the global default should be used.
+ */
static int iommu_get_default_domain_type(struct iommu_group *group,
int target_type)
{
- int best_type = target_type;
+ struct device *untrusted = NULL;
struct group_device *gdev;
- struct device *last_dev;
+ int driver_type = 0;
lockdep_assert_held(&group->mutex);
+ /*
+ * ARM32 drivers supporting CONFIG_ARM_DMA_USE_IOMMU can declare an
+ * identity_domain and it will automatically become their default
+ * domain. Later on ARM_DMA_USE_IOMMU will install its UNMANAGED domain.
+ * Override the selection to IDENTITY.
+ */
+ if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
+ static_assert(!(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) &&
+ IS_ENABLED(CONFIG_IOMMU_DMA)));
+ driver_type = IOMMU_DOMAIN_IDENTITY;
+ }
+
for_each_group_device(group, gdev) {
- unsigned int type = iommu_get_def_domain_type(gdev->dev);
-
- if (best_type && type && best_type != type) {
- if (target_type) {
- dev_err_ratelimited(
- gdev->dev,
- "Device cannot be in %s domain\n",
- iommu_domain_type_str(target_type));
+ driver_type = iommu_get_def_domain_type(group, gdev->dev,
+ driver_type);
+
+ if (dev_is_pci(gdev->dev) && to_pci_dev(gdev->dev)->untrusted) {
+ /*
+ * No ARM32 using systems will set untrusted, it cannot
+ * work.
+ */
+ if (WARN_ON(IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)))
return -1;
- }
+ untrusted = gdev->dev;
+ }
+ }
- dev_warn(
- gdev->dev,
- "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
- iommu_domain_type_str(type), dev_name(last_dev),
- iommu_domain_type_str(best_type));
- return 0;
+ /*
+ * If the common dma ops are not selected in kconfig then we cannot use
+ * IOMMU_DOMAIN_DMA at all. Force IDENTITY if nothing else has been
+ * selected.
+ */
+ if (!IS_ENABLED(CONFIG_IOMMU_DMA)) {
+ if (WARN_ON(driver_type == IOMMU_DOMAIN_DMA))
+ return -1;
+ if (!driver_type)
+ driver_type = IOMMU_DOMAIN_IDENTITY;
+ }
+
+ if (untrusted) {
+ if (driver_type && driver_type != IOMMU_DOMAIN_DMA) {
+ dev_err_ratelimited(
+ untrusted,
+ "Device is not trusted, but driver is overriding group %u to %s, refusing to probe.\n",
+ group->id, iommu_domain_type_str(driver_type));
+ return -1;
}
- if (!best_type)
- best_type = type;
- last_dev = gdev->dev;
+ driver_type = IOMMU_DOMAIN_DMA;
}
- return best_type;
+
+ if (target_type) {
+ if (driver_type && target_type != driver_type)
+ return -1;
+ return target_type;
+ }
+ return driver_type;
}
static void iommu_group_do_probe_finalize(struct device *dev)
@@ -1970,16 +2078,24 @@ void iommu_set_fault_handler(struct iommu_domain *domain,
}
EXPORT_SYMBOL_GPL(iommu_set_fault_handler);
-static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
- unsigned type)
+static struct iommu_domain *__iommu_domain_alloc(const struct iommu_ops *ops,
+ struct device *dev,
+ unsigned int type)
{
struct iommu_domain *domain;
unsigned int alloc_type = type & IOMMU_DOMAIN_ALLOC_FLAGS;
- if (bus == NULL || bus->iommu_ops == NULL)
+ if (alloc_type == IOMMU_DOMAIN_IDENTITY && ops->identity_domain)
+ return ops->identity_domain;
+ else if (alloc_type == IOMMU_DOMAIN_BLOCKED && ops->blocked_domain)
+ return ops->blocked_domain;
+ else if (type & __IOMMU_DOMAIN_PAGING && ops->domain_alloc_paging)
+ domain = ops->domain_alloc_paging(dev);
+ else if (ops->domain_alloc)
+ domain = ops->domain_alloc(alloc_type);
+ else
return NULL;
- domain = bus->iommu_ops->domain_alloc(alloc_type);
if (!domain)
return NULL;
@@ -1989,10 +2105,10 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
* may override this later
*/
if (!domain->pgsize_bitmap)
- domain->pgsize_bitmap = bus->iommu_ops->pgsize_bitmap;
+ domain->pgsize_bitmap = ops->pgsize_bitmap;
if (!domain->ops)
- domain->ops = bus->iommu_ops->default_domain_ops;
+ domain->ops = ops->default_domain_ops;
if (iommu_is_dma_domain(domain) && iommu_get_dma_cookie(domain)) {
iommu_domain_free(domain);
@@ -2001,9 +2117,22 @@ static struct iommu_domain *__iommu_domain_alloc(const struct bus_type *bus,
return domain;
}
+static struct iommu_domain *
+__iommu_group_domain_alloc(struct iommu_group *group, unsigned int type)
+{
+ struct device *dev =
+ list_first_entry(&group->devices, struct group_device, list)
+ ->dev;
+
+ return __iommu_domain_alloc(group_iommu_ops(group), dev, type);
+}
+
struct iommu_domain *iommu_domain_alloc(const struct bus_type *bus)
{
- return __iommu_domain_alloc(bus, IOMMU_DOMAIN_UNMANAGED);
+ if (bus == NULL || bus->iommu_ops == NULL)
+ return NULL;
+ return __iommu_domain_alloc(bus->iommu_ops, NULL,
+ IOMMU_DOMAIN_UNMANAGED);
}
EXPORT_SYMBOL_GPL(iommu_domain_alloc);
@@ -2012,7 +2141,8 @@ void iommu_domain_free(struct iommu_domain *domain)
if (domain->type == IOMMU_DOMAIN_SVA)
mmdrop(domain->mm);
iommu_put_dma_cookie(domain);
- domain->ops->free(domain);
+ if (domain->ops->free)
+ domain->ops->free(domain);
}
EXPORT_SYMBOL_GPL(iommu_domain_free);
@@ -2062,10 +2192,10 @@ static int __iommu_attach_device(struct iommu_domain *domain,
*/
int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
{
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
int ret;
- group = iommu_group_get(dev);
if (!group)
return -ENODEV;
@@ -2082,8 +2212,6 @@ int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
out_unlock:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device);
@@ -2098,9 +2226,9 @@ int iommu_deferred_attach(struct device *dev, struct iommu_domain *domain)
void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
{
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
- group = iommu_group_get(dev);
if (!group)
return;
@@ -2112,24 +2240,18 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
out_unlock:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
}
EXPORT_SYMBOL_GPL(iommu_detach_device);
struct iommu_domain *iommu_get_domain_for_dev(struct device *dev)
{
- struct iommu_domain *domain;
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
- group = iommu_group_get(dev);
if (!group)
return NULL;
- domain = group->domain;
-
- iommu_group_put(group);
-
- return domain;
+ return group->domain;
}
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
@@ -2275,21 +2397,8 @@ static int __iommu_group_set_domain_internal(struct iommu_group *group,
if (group->domain == new_domain)
return 0;
- /*
- * New drivers should support default domains, so set_platform_dma()
- * op will never be called. Otherwise the NULL domain represents some
- * platform specific behavior.
- */
- if (!new_domain) {
- for_each_group_device(group, gdev) {
- const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
-
- if (!WARN_ON(!ops->set_platform_dma_ops))
- ops->set_platform_dma_ops(gdev->dev);
- }
- group->domain = NULL;
- return 0;
- }
+ if (WARN_ON(!new_domain))
+ return -EINVAL;
/*
* Changing the domain is done by calling attach_dev() on the new
@@ -2325,19 +2434,15 @@ err_revert:
*/
last_gdev = gdev;
for_each_group_device(group, gdev) {
- const struct iommu_ops *ops = dev_iommu_ops(gdev->dev);
-
/*
- * If set_platform_dma_ops is not present a NULL domain can
- * happen only for first probe, in which case we leave
- * group->domain as NULL and let release clean everything up.
+ * A NULL domain can happen only for first probe, in which case
+ * we leave group->domain as NULL and let release clean
+ * everything up.
*/
if (group->domain)
WARN_ON(__iommu_device_set_domain(
group, gdev->dev, group->domain,
IOMMU_SET_DOMAIN_MUST_SUCCEED));
- else if (ops->set_platform_dma_ops)
- ops->set_platform_dma_ops(gdev->dev);
if (gdev == last_gdev)
break;
}
@@ -2418,30 +2523,6 @@ out_set_count:
return pgsize;
}
-static int __iommu_map_pages(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot,
- gfp_t gfp, size_t *mapped)
-{
- const struct iommu_domain_ops *ops = domain->ops;
- size_t pgsize, count;
- int ret;
-
- pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
-
- pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
- iova, &paddr, pgsize, count);
-
- if (ops->map_pages) {
- ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
- gfp, mapped);
- } else {
- ret = ops->map(domain, iova, paddr, pgsize, prot, gfp);
- *mapped = ret ? 0 : pgsize;
- }
-
- return ret;
-}
-
static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
{
@@ -2452,13 +2533,12 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t orig_paddr = paddr;
int ret = 0;
- if (unlikely(!(ops->map || ops->map_pages) ||
- domain->pgsize_bitmap == 0UL))
- return -ENODEV;
-
if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return -EINVAL;
+ if (WARN_ON(!ops->map_pages || domain->pgsize_bitmap == 0UL))
+ return -ENODEV;
+
/* find out the minimum page size supported */
min_pagesz = 1 << __ffs(domain->pgsize_bitmap);
@@ -2476,10 +2556,14 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova,
pr_debug("map: iova 0x%lx pa %pa size 0x%zx\n", iova, &paddr, size);
while (size) {
- size_t mapped = 0;
+ size_t pgsize, count, mapped = 0;
+
+ pgsize = iommu_pgsize(domain, iova, paddr, size, &count);
- ret = __iommu_map_pages(domain, iova, paddr, size, prot, gfp,
- &mapped);
+ pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx count %zu\n",
+ iova, &paddr, pgsize, count);
+ ret = ops->map_pages(domain, iova, paddr, pgsize, count, prot,
+ gfp, &mapped);
/*
* Some pages may have been mapped, even if an error occurred,
* so we should account for those so they can be unmapped.
@@ -2516,25 +2600,21 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
return -EINVAL;
ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
- if (ret == 0 && ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain, iova, size);
+ if (ret == 0 && ops->iotlb_sync_map) {
+ ret = ops->iotlb_sync_map(domain, iova, size);
+ if (ret)
+ goto out_err;
+ }
return ret;
-}
-EXPORT_SYMBOL_GPL(iommu_map);
-static size_t __iommu_unmap_pages(struct iommu_domain *domain,
- unsigned long iova, size_t size,
- struct iommu_iotlb_gather *iotlb_gather)
-{
- const struct iommu_domain_ops *ops = domain->ops;
- size_t pgsize, count;
+out_err:
+ /* undo mappings already done */
+ iommu_unmap(domain, iova, size);
- pgsize = iommu_pgsize(domain, iova, iova, size, &count);
- return ops->unmap_pages ?
- ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather) :
- ops->unmap(domain, iova, pgsize, iotlb_gather);
+ return ret;
}
+EXPORT_SYMBOL_GPL(iommu_map);
static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long iova, size_t size,
@@ -2545,11 +2625,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
unsigned long orig_iova = iova;
unsigned int min_pagesz;
- if (unlikely(!(ops->unmap || ops->unmap_pages) ||
- domain->pgsize_bitmap == 0UL))
+ if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
return 0;
- if (unlikely(!(domain->type & __IOMMU_DOMAIN_PAGING)))
+ if (WARN_ON(!ops->unmap_pages || domain->pgsize_bitmap == 0UL))
return 0;
/* find out the minimum page size supported */
@@ -2573,9 +2652,10 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
* or we hit an area that isn't mapped.
*/
while (unmapped < size) {
- unmapped_page = __iommu_unmap_pages(domain, iova,
- size - unmapped,
- iotlb_gather);
+ size_t pgsize, count;
+
+ pgsize = iommu_pgsize(domain, iova, iova, size - unmapped, &count);
+ unmapped_page = ops->unmap_pages(domain, iova, pgsize, count, iotlb_gather);
if (!unmapped_page)
break;
@@ -2658,8 +2738,11 @@ next:
sg = sg_next(sg);
}
- if (ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain, iova, mapped);
+ if (ops->iotlb_sync_map) {
+ ret = ops->iotlb_sync_map(domain, iova, mapped);
+ if (ret)
+ goto out_err;
+ }
return mapped;
out_err:
@@ -2957,21 +3040,9 @@ static int iommu_setup_default_domain(struct iommu_group *group,
if (req_type < 0)
return -EINVAL;
- /*
- * There are still some drivers which don't support default domains, so
- * we ignore the failure and leave group->default_domain NULL.
- *
- * We assume that the iommu driver starts up the device in
- * 'set_platform_dma_ops' mode if it does not support default domains.
- */
dom = iommu_group_alloc_default_domain(group, req_type);
- if (!dom) {
- /* Once in default_domain mode we never leave */
- if (group->default_domain)
- return -ENODEV;
- group->default_domain = NULL;
- return 0;
- }
+ if (!dom)
+ return -ENODEV;
if (group->default_domain == dom)
return 0;
@@ -3114,24 +3185,6 @@ out_unlock:
return ret ?: count;
}
-static bool iommu_is_default_domain(struct iommu_group *group)
-{
- if (group->domain == group->default_domain)
- return true;
-
- /*
- * If the default domain was set to identity and it is still an identity
- * domain then we consider this a pass. This happens because of
- * amd_iommu_init_device() replacing the default idenytity domain with an
- * identity domain that has a different configuration for AMDGPU.
- */
- if (group->default_domain &&
- group->default_domain->type == IOMMU_DOMAIN_IDENTITY &&
- group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY)
- return true;
- return false;
-}
-
/**
* iommu_device_use_default_domain() - Device driver wants to handle device
* DMA through the kernel DMA API.
@@ -3142,7 +3195,8 @@ static bool iommu_is_default_domain(struct iommu_group *group)
*/
int iommu_device_use_default_domain(struct device *dev)
{
- struct iommu_group *group = iommu_group_get(dev);
+ /* Caller is the driver core during the pre-probe path */
+ struct iommu_group *group = dev->iommu_group;
int ret = 0;
if (!group)
@@ -3150,7 +3204,7 @@ int iommu_device_use_default_domain(struct device *dev)
mutex_lock(&group->mutex);
if (group->owner_cnt) {
- if (group->owner || !iommu_is_default_domain(group) ||
+ if (group->domain != group->default_domain || group->owner ||
!xa_empty(&group->pasid_array)) {
ret = -EBUSY;
goto unlock_out;
@@ -3161,8 +3215,6 @@ int iommu_device_use_default_domain(struct device *dev)
unlock_out:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
return ret;
}
@@ -3176,7 +3228,8 @@ unlock_out:
*/
void iommu_device_unuse_default_domain(struct device *dev)
{
- struct iommu_group *group = iommu_group_get(dev);
+ /* Caller is the driver core during the post-probe path */
+ struct iommu_group *group = dev->iommu_group;
if (!group)
return;
@@ -3186,26 +3239,22 @@ void iommu_device_unuse_default_domain(struct device *dev)
group->owner_cnt--;
mutex_unlock(&group->mutex);
- iommu_group_put(group);
}
static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
{
- struct group_device *dev =
- list_first_entry(&group->devices, struct group_device, list);
-
if (group->blocking_domain)
return 0;
group->blocking_domain =
- __iommu_domain_alloc(dev->dev->bus, IOMMU_DOMAIN_BLOCKED);
+ __iommu_group_domain_alloc(group, IOMMU_DOMAIN_BLOCKED);
if (!group->blocking_domain) {
/*
* For drivers that do not yet understand IOMMU_DOMAIN_BLOCKED
* create an empty domain instead.
*/
- group->blocking_domain = __iommu_domain_alloc(
- dev->dev->bus, IOMMU_DOMAIN_UNMANAGED);
+ group->blocking_domain = __iommu_group_domain_alloc(
+ group, IOMMU_DOMAIN_UNMANAGED);
if (!group->blocking_domain)
return -EINVAL;
}
@@ -3273,13 +3322,13 @@ EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
*/
int iommu_device_claim_dma_owner(struct device *dev, void *owner)
{
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
int ret = 0;
if (WARN_ON(!owner))
return -EINVAL;
- group = iommu_group_get(dev);
if (!group)
return -ENODEV;
@@ -3296,8 +3345,6 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner)
ret = __iommu_take_dma_ownership(group, owner);
unlock_out:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
return ret;
}
EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
@@ -3335,7 +3382,8 @@ EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
*/
void iommu_device_release_dma_owner(struct device *dev)
{
- struct iommu_group *group = iommu_group_get(dev);
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
mutex_lock(&group->mutex);
if (group->owner_cnt > 1)
@@ -3343,7 +3391,6 @@ void iommu_device_release_dma_owner(struct device *dev)
else
__iommu_release_dma_ownership(group);
mutex_unlock(&group->mutex);
- iommu_group_put(group);
}
EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
@@ -3404,14 +3451,14 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
int iommu_attach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid)
{
- struct iommu_group *group;
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
void *curr;
int ret;
if (!domain->ops->set_dev_pasid)
return -EOPNOTSUPP;
- group = iommu_group_get(dev);
if (!group)
return -ENODEV;
@@ -3429,8 +3476,6 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
}
out_unlock:
mutex_unlock(&group->mutex);
- iommu_group_put(group);
-
return ret;
}
EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
@@ -3447,14 +3492,13 @@ EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
ioasid_t pasid)
{
- struct iommu_group *group = iommu_group_get(dev);
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
mutex_lock(&group->mutex);
__iommu_remove_group_pasid(group, pasid);
WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
mutex_unlock(&group->mutex);
-
- iommu_group_put(group);
}
EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
@@ -3476,10 +3520,10 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
ioasid_t pasid,
unsigned int type)
{
+ /* Caller must be a probed driver on dev */
+ struct iommu_group *group = dev->iommu_group;
struct iommu_domain *domain;
- struct iommu_group *group;
- group = iommu_group_get(dev);
if (!group)
return NULL;
@@ -3488,7 +3532,6 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
if (type && domain && domain->type != type)
domain = ERR_PTR(-EBUSY);
xa_unlock(&group->pasid_array);
- iommu_group_put(group);
return domain;
}
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
index d43a87737c1e..5d93434003d8 100644
--- a/drivers/iommu/iommufd/selftest.c
+++ b/drivers/iommu/iommufd/selftest.c
@@ -123,10 +123,6 @@ struct selftest_obj {
};
};
-static void mock_domain_blocking_free(struct iommu_domain *domain)
-{
-}
-
static int mock_domain_nop_attach(struct iommu_domain *domain,
struct device *dev)
{
@@ -139,7 +135,6 @@ static int mock_domain_nop_attach(struct iommu_domain *domain,
}
static const struct iommu_domain_ops mock_blocking_ops = {
- .free = mock_domain_blocking_free,
.attach_dev = mock_domain_nop_attach,
};
@@ -258,15 +253,6 @@ __mock_domain_alloc_nested(struct mock_iommu_domain *mock_parent,
return &mock_nested->domain;
}
-static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type)
-{
- if (iommu_domain_type == IOMMU_DOMAIN_BLOCKED)
- return &mock_blocking_domain;
- if (iommu_domain_type == IOMMU_DOMAIN_UNMANAGED)
- return mock_domain_alloc_paging(NULL);
- return NULL;
-}
-
static struct iommu_domain *
mock_domain_alloc_user(struct device *dev, u32 flags,
struct iommu_domain *parent,
@@ -446,14 +432,6 @@ static bool mock_domain_capable(struct device *dev, enum iommu_cap cap)
return false;
}
-static void mock_domain_set_plaform_dma_ops(struct device *dev)
-{
- /*
- * mock doesn't setup default domains because we can't hook into the
- * normal probe path
- */
-}
-
static struct iommu_device mock_iommu_device = {
};
@@ -463,13 +441,18 @@ static struct iommu_device *mock_probe_device(struct device *dev)
}
static const struct iommu_ops mock_ops = {
+ /*
+ * IOMMU_DOMAIN_BLOCKED cannot be returned from def_domain_type()
+ * because it is zero.
+ */
+ .default_domain = &mock_blocking_domain,
+ .blocked_domain = &mock_blocking_domain,
.owner = THIS_MODULE,
.pgsize_bitmap = MOCK_IO_PAGE_SIZE,
.hw_info = mock_domain_hw_info,
- .domain_alloc = mock_domain_alloc,
+ .domain_alloc_paging = mock_domain_alloc_paging,
.domain_alloc_user = mock_domain_alloc_user,
.capable = mock_domain_capable,
- .set_platform_dma_ops = mock_domain_set_plaform_dma_ops,
.device_group = generic_device_group,
.probe_device = mock_probe_device,
.default_domain_ops =
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 10b964600948..d30e453d0fb4 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -11,6 +11,7 @@
#include <linux/smp.h>
#include <linux/bitops.h>
#include <linux/cpu.h>
+#include <linux/workqueue.h>
/* The anchor node sits above the top of the usable address space */
#define IOVA_ANCHOR ~0UL
@@ -622,15 +623,21 @@ EXPORT_SYMBOL_GPL(reserve_iova);
/*
* As kmalloc's buffer size is fixed to power of 2, 127 is chosen to
* assure size of 'iova_magazine' to be 1024 bytes, so that no memory
- * will be wasted.
+ * will be wasted. Since only full magazines are inserted into the depot,
+ * we don't need to waste PFN capacity on a separate list head either.
*/
#define IOVA_MAG_SIZE 127
-#define MAX_GLOBAL_MAGS 32 /* magazines per bin */
+
+#define IOVA_DEPOT_DELAY msecs_to_jiffies(100)
struct iova_magazine {
- unsigned long size;
+ union {
+ unsigned long size;
+ struct iova_magazine *next;
+ };
unsigned long pfns[IOVA_MAG_SIZE];
};
+static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1)));
struct iova_cpu_rcache {
spinlock_t lock;
@@ -640,9 +647,11 @@ struct iova_cpu_rcache {
struct iova_rcache {
spinlock_t lock;
- unsigned long depot_size;
- struct iova_magazine *depot[MAX_GLOBAL_MAGS];
+ unsigned int depot_size;
+ struct iova_magazine *depot;
struct iova_cpu_rcache __percpu *cpu_rcaches;
+ struct iova_domain *iovad;
+ struct delayed_work work;
};
static struct iova_magazine *iova_magazine_alloc(gfp_t flags)
@@ -717,6 +726,41 @@ static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn)
mag->pfns[mag->size++] = pfn;
}
+static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache)
+{
+ struct iova_magazine *mag = rcache->depot;
+
+ rcache->depot = mag->next;
+ mag->size = IOVA_MAG_SIZE;
+ rcache->depot_size--;
+ return mag;
+}
+
+static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag)
+{
+ mag->next = rcache->depot;
+ rcache->depot = mag;
+ rcache->depot_size++;
+}
+
+static void iova_depot_work_func(struct work_struct *work)
+{
+ struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work);
+ struct iova_magazine *mag = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rcache->lock, flags);
+ if (rcache->depot_size > num_online_cpus())
+ mag = iova_depot_pop(rcache);
+ spin_unlock_irqrestore(&rcache->lock, flags);
+
+ if (mag) {
+ iova_magazine_free_pfns(mag, rcache->iovad);
+ iova_magazine_free(mag);
+ schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
+ }
+}
+
int iova_domain_init_rcaches(struct iova_domain *iovad)
{
unsigned int cpu;
@@ -734,7 +778,8 @@ int iova_domain_init_rcaches(struct iova_domain *iovad)
rcache = &iovad->rcaches[i];
spin_lock_init(&rcache->lock);
- rcache->depot_size = 0;
+ rcache->iovad = iovad;
+ INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func);
rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache),
cache_line_size());
if (!rcache->cpu_rcaches) {
@@ -776,7 +821,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
struct iova_rcache *rcache,
unsigned long iova_pfn)
{
- struct iova_magazine *mag_to_free = NULL;
struct iova_cpu_rcache *cpu_rcache;
bool can_insert = false;
unsigned long flags;
@@ -794,13 +838,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
if (new_mag) {
spin_lock(&rcache->lock);
- if (rcache->depot_size < MAX_GLOBAL_MAGS) {
- rcache->depot[rcache->depot_size++] =
- cpu_rcache->loaded;
- } else {
- mag_to_free = cpu_rcache->loaded;
- }
+ iova_depot_push(rcache, cpu_rcache->loaded);
spin_unlock(&rcache->lock);
+ schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY);
cpu_rcache->loaded = new_mag;
can_insert = true;
@@ -812,11 +852,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad,
spin_unlock_irqrestore(&cpu_rcache->lock, flags);
- if (mag_to_free) {
- iova_magazine_free_pfns(mag_to_free, iovad);
- iova_magazine_free(mag_to_free);
- }
-
return can_insert;
}
@@ -854,9 +889,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache,
has_pfn = true;
} else {
spin_lock(&rcache->lock);
- if (rcache->depot_size > 0) {
+ if (rcache->depot) {
iova_magazine_free(cpu_rcache->loaded);
- cpu_rcache->loaded = rcache->depot[--rcache->depot_size];
+ cpu_rcache->loaded = iova_depot_pop(rcache);
has_pfn = true;
}
spin_unlock(&rcache->lock);
@@ -895,9 +930,8 @@ static void free_iova_rcaches(struct iova_domain *iovad)
struct iova_rcache *rcache;
struct iova_cpu_rcache *cpu_rcache;
unsigned int cpu;
- int i, j;
- for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+ for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
rcache = &iovad->rcaches[i];
if (!rcache->cpu_rcaches)
break;
@@ -907,8 +941,9 @@ static void free_iova_rcaches(struct iova_domain *iovad)
iova_magazine_free(cpu_rcache->prev);
}
free_percpu(rcache->cpu_rcaches);
- for (j = 0; j < rcache->depot_size; ++j)
- iova_magazine_free(rcache->depot[j]);
+ cancel_delayed_work_sync(&rcache->work);
+ while (rcache->depot)
+ iova_magazine_free(iova_depot_pop(rcache));
}
kfree(iovad->rcaches);
@@ -942,16 +977,16 @@ static void free_global_cached_iovas(struct iova_domain *iovad)
{
struct iova_rcache *rcache;
unsigned long flags;
- int i, j;
- for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
+ for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) {
rcache = &iovad->rcaches[i];
spin_lock_irqsave(&rcache->lock, flags);
- for (j = 0; j < rcache->depot_size; ++j) {
- iova_magazine_free_pfns(rcache->depot[j], iovad);
- iova_magazine_free(rcache->depot[j]);
+ while (rcache->depot) {
+ struct iova_magazine *mag = iova_depot_pop(rcache);
+
+ iova_magazine_free_pfns(mag, iovad);
+ iova_magazine_free(mag);
}
- rcache->depot_size = 0;
spin_unlock_irqrestore(&rcache->lock, flags);
}
}
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 65ff69477c43..ace1fc4bd34b 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -64,7 +64,6 @@ struct ipmmu_vmsa_device {
struct ipmmu_vmsa_domain *domains[IPMMU_CTX_MAX];
s8 utlb_ctx[IPMMU_UTLB_MAX];
- struct iommu_group *group;
struct dma_iommu_mapping *mapping;
};
@@ -295,6 +294,18 @@ static void ipmmu_utlb_enable(struct ipmmu_vmsa_domain *domain,
mmu->utlb_ctx[utlb] = domain->context_id;
}
+/*
+ * Disable MMU translation for the microTLB.
+ */
+static void ipmmu_utlb_disable(struct ipmmu_vmsa_domain *domain,
+ unsigned int utlb)
+{
+ struct ipmmu_vmsa_device *mmu = domain->mmu;
+
+ ipmmu_imuctr_write(mmu, utlb, 0);
+ mmu->utlb_ctx[utlb] = IPMMU_CTX_INVALID;
+}
+
static void ipmmu_tlb_flush_all(void *cookie)
{
struct ipmmu_vmsa_domain *domain = cookie;
@@ -551,13 +562,10 @@ static irqreturn_t ipmmu_irq(int irq, void *dev)
* IOMMU Operations
*/
-static struct iommu_domain *ipmmu_domain_alloc(unsigned type)
+static struct iommu_domain *ipmmu_domain_alloc_paging(struct device *dev)
{
struct ipmmu_vmsa_domain *domain;
- if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
- return NULL;
-
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
@@ -627,6 +635,36 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
return 0;
}
+static int ipmmu_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
+{
+ struct iommu_domain *io_domain = iommu_get_domain_for_dev(dev);
+ struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
+ struct ipmmu_vmsa_domain *domain;
+ unsigned int i;
+
+ if (io_domain == identity_domain || !io_domain)
+ return 0;
+
+ domain = to_vmsa_domain(io_domain);
+ for (i = 0; i < fwspec->num_ids; ++i)
+ ipmmu_utlb_disable(domain, fwspec->ids[i]);
+
+ /*
+ * TODO: Optimize by disabling the context when no device is attached.
+ */
+ return 0;
+}
+
+static struct iommu_domain_ops ipmmu_iommu_identity_ops = {
+ .attach_dev = ipmmu_iommu_identity_attach,
+};
+
+static struct iommu_domain ipmmu_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &ipmmu_iommu_identity_ops,
+};
+
static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -833,28 +871,18 @@ static void ipmmu_release_device(struct device *dev)
arm_iommu_release_mapping(mmu->mapping);
}
-static struct iommu_group *ipmmu_find_group(struct device *dev)
-{
- struct ipmmu_vmsa_device *mmu = to_ipmmu(dev);
- struct iommu_group *group;
-
- if (mmu->group)
- return iommu_group_ref_get(mmu->group);
-
- group = iommu_group_alloc();
- if (!IS_ERR(group))
- mmu->group = group;
-
- return group;
-}
-
static const struct iommu_ops ipmmu_ops = {
- .domain_alloc = ipmmu_domain_alloc,
+ .identity_domain = &ipmmu_iommu_identity_domain,
+ .domain_alloc_paging = ipmmu_domain_alloc_paging,
.probe_device = ipmmu_probe_device,
.release_device = ipmmu_release_device,
.probe_finalize = ipmmu_probe_finalize,
+ /*
+ * FIXME: The device grouping is a fixed property of the hardware's
+ * ability to isolate and control DMA, it should not depend on kconfig.
+ */
.device_group = IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_IOMMU_DMA)
- ? generic_device_group : ipmmu_find_group,
+ ? generic_device_group : generic_single_device_group,
.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K,
.of_xlate = ipmmu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 79d89bad5132..f86af9815d6f 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -302,13 +302,10 @@ static void __program_context(void __iomem *base, int ctx,
SET_M(base, ctx, 1);
}
-static struct iommu_domain *msm_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *msm_iommu_domain_alloc_paging(struct device *dev)
{
struct msm_priv *priv;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
if (!priv)
goto fail_nomem;
@@ -443,15 +440,20 @@ fail:
return ret;
}
-static void msm_iommu_set_platform_dma(struct device *dev)
+static int msm_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct msm_priv *priv = to_msm_priv(domain);
+ struct msm_priv *priv;
unsigned long flags;
struct msm_iommu_dev *iommu;
struct msm_iommu_ctx_dev *master;
- int ret;
+ int ret = 0;
+
+ if (domain == identity_domain || !domain)
+ return 0;
+ priv = to_msm_priv(domain);
free_io_pgtable_ops(priv->iop);
spin_lock_irqsave(&msm_iommu_lock, flags);
@@ -468,8 +470,18 @@ static void msm_iommu_set_platform_dma(struct device *dev)
}
fail:
spin_unlock_irqrestore(&msm_iommu_lock, flags);
+ return ret;
}
+static struct iommu_domain_ops msm_iommu_identity_ops = {
+ .attach_dev = msm_iommu_identity_attach,
+};
+
+static struct iommu_domain msm_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &msm_iommu_identity_ops,
+};
+
static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t pa, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -486,12 +498,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+static int msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
{
struct msm_priv *priv = to_msm_priv(domain);
__flush_iotlb_range(iova, size, SZ_4K, false, priv);
+ return 0;
}
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -675,10 +688,10 @@ fail:
}
static struct iommu_ops msm_iommu_ops = {
- .domain_alloc = msm_iommu_domain_alloc,
+ .identity_domain = &msm_iommu_identity_domain,
+ .domain_alloc_paging = msm_iommu_domain_alloc_paging,
.probe_device = msm_iommu_probe_device,
.device_group = generic_device_group,
- .set_platform_dma_ops = msm_iommu_set_platform_dma,
.pgsize_bitmap = MSM_IOMMU_PGSIZES,
.of_xlate = qcom_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index fab6c347ce57..75279500a4a8 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -688,13 +688,10 @@ update_iova_region:
return 0;
}
-static struct iommu_domain *mtk_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *mtk_iommu_domain_alloc_paging(struct device *dev)
{
struct mtk_iommu_domain *dom;
- if (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
dom = kzalloc(sizeof(*dom), GFP_KERNEL);
if (!dom)
return NULL;
@@ -776,6 +773,28 @@ err_unlock:
return ret;
}
+static int mtk_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
+{
+ struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct mtk_iommu_data *data = dev_iommu_priv_get(dev);
+
+ if (domain == identity_domain || !domain)
+ return 0;
+
+ mtk_iommu_config(data, dev, false, 0);
+ return 0;
+}
+
+static struct iommu_domain_ops mtk_iommu_identity_ops = {
+ .attach_dev = mtk_iommu_identity_attach,
+};
+
+static struct iommu_domain mtk_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &mtk_iommu_identity_ops,
+};
+
static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -817,12 +836,13 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank);
}
-static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+static int mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank);
+ return 0;
}
static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -995,7 +1015,8 @@ static void mtk_iommu_get_resv_regions(struct device *dev,
}
static const struct iommu_ops mtk_iommu_ops = {
- .domain_alloc = mtk_iommu_domain_alloc,
+ .identity_domain = &mtk_iommu_identity_domain,
+ .domain_alloc_paging = mtk_iommu_domain_alloc_paging,
.probe_device = mtk_iommu_probe_device,
.release_device = mtk_iommu_release_device,
.device_group = mtk_iommu_device_group,
diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 8a0a5e5d049f..67e044c1a7d9 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -270,13 +270,10 @@ static int mtk_iommu_v1_domain_finalise(struct mtk_iommu_v1_data *data)
return 0;
}
-static struct iommu_domain *mtk_iommu_v1_domain_alloc(unsigned type)
+static struct iommu_domain *mtk_iommu_v1_domain_alloc_paging(struct device *dev)
{
struct mtk_iommu_v1_domain *dom;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
dom = kzalloc(sizeof(*dom), GFP_KERNEL);
if (!dom)
return NULL;
@@ -319,13 +316,24 @@ static int mtk_iommu_v1_attach_device(struct iommu_domain *domain, struct device
return 0;
}
-static void mtk_iommu_v1_set_platform_dma(struct device *dev)
+static int mtk_iommu_v1_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
struct mtk_iommu_v1_data *data = dev_iommu_priv_get(dev);
mtk_iommu_v1_config(data, dev, false);
+ return 0;
}
+static struct iommu_domain_ops mtk_iommu_v1_identity_ops = {
+ .attach_dev = mtk_iommu_v1_identity_attach,
+};
+
+static struct iommu_domain mtk_iommu_v1_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &mtk_iommu_v1_identity_ops,
+};
+
static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped)
@@ -441,11 +449,6 @@ static int mtk_iommu_v1_create_mapping(struct device *dev, struct of_phandle_arg
return 0;
}
-static int mtk_iommu_v1_def_domain_type(struct device *dev)
-{
- return IOMMU_DOMAIN_UNMANAGED;
-}
-
static struct iommu_device *mtk_iommu_v1_probe_device(struct device *dev)
{
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
@@ -578,14 +581,13 @@ static int mtk_iommu_v1_hw_init(const struct mtk_iommu_v1_data *data)
}
static const struct iommu_ops mtk_iommu_v1_ops = {
- .domain_alloc = mtk_iommu_v1_domain_alloc,
+ .identity_domain = &mtk_iommu_v1_identity_domain,
+ .domain_alloc_paging = mtk_iommu_v1_domain_alloc_paging,
.probe_device = mtk_iommu_v1_probe_device,
.probe_finalize = mtk_iommu_v1_probe_finalize,
.release_device = mtk_iommu_v1_release_device,
- .def_domain_type = mtk_iommu_v1_def_domain_type,
.device_group = generic_device_group,
.pgsize_bitmap = MT2701_IOMMU_PAGE_SIZE,
- .set_platform_dma_ops = mtk_iommu_v1_set_platform_dma,
.owner = THIS_MODULE,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = mtk_iommu_v1_attach_device,
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 537e402f9bba..c66b070841dd 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1225,18 +1225,15 @@ static int omap_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, obj);
if (omap_iommu_can_register(pdev)) {
- obj->group = iommu_group_alloc();
- if (IS_ERR(obj->group))
- return PTR_ERR(obj->group);
-
err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL,
obj->name);
if (err)
- goto out_group;
+ return err;
err = iommu_device_register(&obj->iommu, &omap_iommu_ops, &pdev->dev);
if (err)
goto out_sysfs;
+ obj->has_iommu_driver = true;
}
pm_runtime_enable(obj->dev);
@@ -1252,8 +1249,6 @@ static int omap_iommu_probe(struct platform_device *pdev)
out_sysfs:
iommu_device_sysfs_remove(&obj->iommu);
-out_group:
- iommu_group_put(obj->group);
return err;
}
@@ -1261,10 +1256,7 @@ static void omap_iommu_remove(struct platform_device *pdev)
{
struct omap_iommu *obj = platform_get_drvdata(pdev);
- if (obj->group) {
- iommu_group_put(obj->group);
- obj->group = NULL;
-
+ if (obj->has_iommu_driver) {
iommu_device_sysfs_remove(&obj->iommu);
iommu_device_unregister(&obj->iommu);
}
@@ -1318,7 +1310,8 @@ static u32 iotlb_init_entry(struct iotlb_entry *e, u32 da, u32 pa, int pgsz)
}
static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
- phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
+ phys_addr_t pa, size_t bytes, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
@@ -1356,13 +1349,15 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da,
oiommu = iommu->iommu_dev;
iopgtable_clear_entry(oiommu, da);
}
+ } else {
+ *mapped = bytes;
}
return ret;
}
static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da,
- size_t size, struct iommu_iotlb_gather *gather)
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
{
struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
struct device *dev = omap_domain->dev;
@@ -1555,23 +1550,35 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain,
omap_domain->dev = NULL;
}
-static void omap_iommu_set_platform_dma(struct device *dev)
+static int omap_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct omap_iommu_domain *omap_domain = to_omap_domain(domain);
+ struct omap_iommu_domain *omap_domain;
+
+ if (domain == identity_domain || !domain)
+ return 0;
+ omap_domain = to_omap_domain(domain);
spin_lock(&omap_domain->lock);
_omap_iommu_detach_dev(omap_domain, dev);
spin_unlock(&omap_domain->lock);
+ return 0;
}
-static struct iommu_domain *omap_iommu_domain_alloc(unsigned type)
+static struct iommu_domain_ops omap_iommu_identity_ops = {
+ .attach_dev = omap_iommu_identity_attach,
+};
+
+static struct iommu_domain omap_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &omap_iommu_identity_ops,
+};
+
+static struct iommu_domain *omap_iommu_domain_alloc_paging(struct device *dev)
{
struct omap_iommu_domain *omap_domain;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
omap_domain = kzalloc(sizeof(*omap_domain), GFP_KERNEL);
if (!omap_domain)
return NULL;
@@ -1717,31 +1724,17 @@ static void omap_iommu_release_device(struct device *dev)
}
-static struct iommu_group *omap_iommu_device_group(struct device *dev)
-{
- struct omap_iommu_arch_data *arch_data = dev_iommu_priv_get(dev);
- struct iommu_group *group = ERR_PTR(-EINVAL);
-
- if (!arch_data)
- return ERR_PTR(-ENODEV);
-
- if (arch_data->iommu_dev)
- group = iommu_group_ref_get(arch_data->iommu_dev->group);
-
- return group;
-}
-
static const struct iommu_ops omap_iommu_ops = {
- .domain_alloc = omap_iommu_domain_alloc,
+ .identity_domain = &omap_iommu_identity_domain,
+ .domain_alloc_paging = omap_iommu_domain_alloc_paging,
.probe_device = omap_iommu_probe_device,
.release_device = omap_iommu_release_device,
- .device_group = omap_iommu_device_group,
- .set_platform_dma_ops = omap_iommu_set_platform_dma,
+ .device_group = generic_single_device_group,
.pgsize_bitmap = OMAP_IOMMU_PGSIZES,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = omap_iommu_attach_dev,
- .map = omap_iommu_map,
- .unmap = omap_iommu_unmap,
+ .map_pages = omap_iommu_map,
+ .unmap_pages = omap_iommu_unmap,
.iova_to_phys = omap_iommu_iova_to_phys,
.free = omap_iommu_domain_free,
}
diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h
index 18ee713ede78..27697109ec79 100644
--- a/drivers/iommu/omap-iommu.h
+++ b/drivers/iommu/omap-iommu.h
@@ -80,7 +80,7 @@ struct omap_iommu {
u32 id;
struct iommu_device iommu;
- struct iommu_group *group;
+ bool has_iommu_driver;
u8 pwrst;
};
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index 8ff69fbf9f65..2685861c0a12 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -113,7 +113,6 @@ struct rk_iommu {
struct iommu_device iommu;
struct list_head node; /* entry in rk_iommu_domain.iommus */
struct iommu_domain *domain; /* domain to which iommu is attached */
- struct iommu_group *group;
};
struct rk_iommudata {
@@ -817,7 +816,8 @@ unwind:
}
static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
@@ -850,12 +850,14 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova,
paddr, size, prot);
spin_unlock_irqrestore(&rk_domain->dt_lock, flags);
+ if (!ret)
+ *mapped = size;
return ret;
}
static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova,
- size_t size, struct iommu_iotlb_gather *gather)
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
{
struct rk_iommu_domain *rk_domain = to_rk_domain(domain);
unsigned long flags;
@@ -989,13 +991,8 @@ static int rk_iommu_identity_attach(struct iommu_domain *identity_domain,
return 0;
}
-static void rk_iommu_identity_free(struct iommu_domain *domain)
-{
-}
-
static struct iommu_domain_ops rk_identity_ops = {
.attach_dev = rk_iommu_identity_attach,
- .free = rk_iommu_identity_free,
};
static struct iommu_domain rk_identity_domain = {
@@ -1003,13 +1000,6 @@ static struct iommu_domain rk_identity_domain = {
.ops = &rk_identity_ops,
};
-#ifdef CONFIG_ARM
-static void rk_iommu_set_platform_dma(struct device *dev)
-{
- WARN_ON(rk_iommu_identity_attach(&rk_identity_domain, dev));
-}
-#endif
-
static int rk_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
@@ -1055,16 +1045,10 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
return ret;
}
-static struct iommu_domain *rk_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *rk_iommu_domain_alloc_paging(struct device *dev)
{
struct rk_iommu_domain *rk_domain;
- if (type == IOMMU_DOMAIN_IDENTITY)
- return &rk_identity_domain;
-
- if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
- return NULL;
-
if (!dma_dev)
return NULL;
@@ -1155,15 +1139,6 @@ static void rk_iommu_release_device(struct device *dev)
device_link_del(data->link);
}
-static struct iommu_group *rk_iommu_device_group(struct device *dev)
-{
- struct rk_iommu *iommu;
-
- iommu = rk_iommu_from_dev(dev);
-
- return iommu_group_ref_get(iommu->group);
-}
-
static int rk_iommu_of_xlate(struct device *dev,
struct of_phandle_args *args)
{
@@ -1186,19 +1161,17 @@ static int rk_iommu_of_xlate(struct device *dev,
}
static const struct iommu_ops rk_iommu_ops = {
- .domain_alloc = rk_iommu_domain_alloc,
+ .identity_domain = &rk_identity_domain,
+ .domain_alloc_paging = rk_iommu_domain_alloc_paging,
.probe_device = rk_iommu_probe_device,
.release_device = rk_iommu_release_device,
- .device_group = rk_iommu_device_group,
-#ifdef CONFIG_ARM
- .set_platform_dma_ops = rk_iommu_set_platform_dma,
-#endif
+ .device_group = generic_single_device_group,
.pgsize_bitmap = RK_IOMMU_PGSIZE_BITMAP,
.of_xlate = rk_iommu_of_xlate,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = rk_iommu_attach_device,
- .map = rk_iommu_map,
- .unmap = rk_iommu_unmap,
+ .map_pages = rk_iommu_map,
+ .unmap_pages = rk_iommu_unmap,
.iova_to_phys = rk_iommu_iova_to_phys,
.free = rk_iommu_domain_free,
}
@@ -1280,15 +1253,9 @@ static int rk_iommu_probe(struct platform_device *pdev)
if (err)
return err;
- iommu->group = iommu_group_alloc();
- if (IS_ERR(iommu->group)) {
- err = PTR_ERR(iommu->group);
- goto err_unprepare_clocks;
- }
-
err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev));
if (err)
- goto err_put_group;
+ goto err_unprepare_clocks;
err = iommu_device_register(&iommu->iommu, &rk_iommu_ops, dev);
if (err)
@@ -1325,8 +1292,6 @@ err_pm_disable:
pm_runtime_disable(dev);
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
-err_put_group:
- iommu_group_put(iommu->group);
err_unprepare_clocks:
clk_bulk_unprepare(iommu->num_clocks, iommu->clocks);
return err;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index fbf59a8db29b..9a5196f523de 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -14,16 +14,300 @@
#include <linux/rcupdate.h>
#include <asm/pci_dma.h>
+#include "dma-iommu.h"
+
static const struct iommu_ops s390_iommu_ops;
+static struct kmem_cache *dma_region_table_cache;
+static struct kmem_cache *dma_page_table_cache;
+
+static u64 s390_iommu_aperture;
+static u32 s390_iommu_aperture_factor = 1;
+
struct s390_domain {
struct iommu_domain domain;
struct list_head devices;
+ struct zpci_iommu_ctrs ctrs;
unsigned long *dma_table;
spinlock_t list_lock;
struct rcu_head rcu;
};
+static inline unsigned int calc_rtx(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_sx(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_px(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
+{
+ *entry &= ZPCI_PTE_FLAG_MASK;
+ *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
+}
+
+static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
+{
+ *entry &= ZPCI_RTE_FLAG_MASK;
+ *entry |= (sto & ZPCI_RTE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_RTX;
+}
+
+static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
+{
+ *entry &= ZPCI_STE_FLAG_MASK;
+ *entry |= (pto & ZPCI_STE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_SX;
+}
+
+static inline void validate_rt_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry &= ~ZPCI_TABLE_OFFSET_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+ *entry |= ZPCI_TABLE_LEN_RTX;
+}
+
+static inline void validate_st_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+}
+
+static inline void invalidate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_INVALID;
+}
+
+static inline void validate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_VALID;
+}
+
+static inline void entry_set_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_PROTECTED;
+}
+
+static inline void entry_clr_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_UNPROTECTED;
+}
+
+static inline int reg_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static inline int pt_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static inline unsigned long *get_rt_sto(unsigned long entry)
+{
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+ return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+ else
+ return NULL;
+}
+
+static inline unsigned long *get_st_pto(unsigned long entry)
+{
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+ return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
+ else
+ return NULL;
+}
+
+static int __init dma_alloc_cpu_table_caches(void)
+{
+ dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
+ ZPCI_TABLE_SIZE,
+ ZPCI_TABLE_ALIGN,
+ 0, NULL);
+ if (!dma_region_table_cache)
+ return -ENOMEM;
+
+ dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
+ ZPCI_PT_SIZE,
+ ZPCI_PT_ALIGN,
+ 0, NULL);
+ if (!dma_page_table_cache) {
+ kmem_cache_destroy(dma_region_table_cache);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_region_table_cache, gfp);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
+ *entry = ZPCI_TABLE_INVALID;
+ return table;
+}
+
+static void dma_free_cpu_table(void *table)
+{
+ kmem_cache_free(dma_region_table_cache, table);
+}
+
+static void dma_free_page_table(void *table)
+{
+ kmem_cache_free(dma_page_table_cache, table);
+}
+
+static void dma_free_seg_table(unsigned long entry)
+{
+ unsigned long *sto = get_rt_sto(entry);
+ int sx;
+
+ for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
+ if (reg_entry_isvalid(sto[sx]))
+ dma_free_page_table(get_st_pto(sto[sx]));
+
+ dma_free_cpu_table(sto);
+}
+
+static void dma_cleanup_tables(unsigned long *table)
+{
+ int rtx;
+
+ if (!table)
+ return;
+
+ for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+ if (reg_entry_isvalid(table[rtx]))
+ dma_free_seg_table(table[rtx]);
+
+ dma_free_cpu_table(table);
+}
+
+static unsigned long *dma_alloc_page_table(gfp_t gfp)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_page_table_cache, gfp);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
+ *entry = ZPCI_PTE_INVALID;
+ return table;
+}
+
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
+{
+ unsigned long old_rte, rte;
+ unsigned long *sto;
+
+ rte = READ_ONCE(*rtep);
+ if (reg_entry_isvalid(rte)) {
+ sto = get_rt_sto(rte);
+ } else {
+ sto = dma_alloc_cpu_table(gfp);
+ if (!sto)
+ return NULL;
+
+ set_rt_sto(&rte, virt_to_phys(sto));
+ validate_rt_entry(&rte);
+ entry_clr_protected(&rte);
+
+ old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
+ if (old_rte != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_cpu_table(sto);
+ sto = get_rt_sto(old_rte);
+ }
+ }
+ return sto;
+}
+
+static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
+{
+ unsigned long old_ste, ste;
+ unsigned long *pto;
+
+ ste = READ_ONCE(*step);
+ if (reg_entry_isvalid(ste)) {
+ pto = get_st_pto(ste);
+ } else {
+ pto = dma_alloc_page_table(gfp);
+ if (!pto)
+ return NULL;
+ set_st_pto(&ste, virt_to_phys(pto));
+ validate_st_entry(&ste);
+ entry_clr_protected(&ste);
+
+ old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
+ if (old_ste != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_page_table(pto);
+ pto = get_st_pto(old_ste);
+ }
+ }
+ return pto;
+}
+
+static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
+{
+ unsigned long *sto, *pto;
+ unsigned int rtx, sx, px;
+
+ rtx = calc_rtx(dma_addr);
+ sto = dma_get_seg_table_origin(&rto[rtx], gfp);
+ if (!sto)
+ return NULL;
+
+ sx = calc_sx(dma_addr);
+ pto = dma_get_page_table_origin(&sto[sx], gfp);
+ if (!pto)
+ return NULL;
+
+ px = calc_px(dma_addr);
+ return &pto[px];
+}
+
+static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
+{
+ unsigned long pte;
+
+ pte = READ_ONCE(*ptep);
+ if (flags & ZPCI_PTE_INVALID) {
+ invalidate_pt_entry(&pte);
+ } else {
+ set_pt_pfaa(&pte, page_addr);
+ validate_pt_entry(&pte);
+ }
+
+ if (flags & ZPCI_TABLE_PROTECTED)
+ entry_set_protected(&pte);
+ else
+ entry_clr_protected(&pte);
+
+ xchg(ptep, pte);
+}
+
static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
{
return container_of(dom, struct s390_domain, domain);
@@ -31,21 +315,22 @@ static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
+ case IOMMU_CAP_DEFERRED_FLUSH:
+ return zdev->pft != PCI_FUNC_TYPE_ISM;
default:
return false;
}
}
-static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
+static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
{
struct s390_domain *s390_domain;
- if (domain_type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
if (!s390_domain)
return NULL;
@@ -84,14 +369,13 @@ static void s390_domain_free(struct iommu_domain *domain)
call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
}
-static void __s390_iommu_detach_device(struct zpci_dev *zdev)
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
{
- struct s390_domain *s390_domain = zdev->s390_domain;
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags;
- if (!s390_domain)
- return;
-
spin_lock_irqsave(&s390_domain->list_lock, flags);
list_del_rcu(&zdev->iommu_list);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
@@ -118,9 +402,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return -EINVAL;
if (zdev->s390_domain)
- __s390_iommu_detach_device(zdev);
- else if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
+ s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(s390_domain->dma_table), &status);
@@ -130,7 +412,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
*/
if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO;
- zdev->dma_table = s390_domain->dma_table;
zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain;
@@ -142,14 +423,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return 0;
}
-static void s390_iommu_set_platform_dma(struct device *dev)
-{
- struct zpci_dev *zdev = to_zpci_dev(dev);
-
- __s390_iommu_detach_device(zdev);
- zpci_dma_init_device(zdev);
-}
-
static void s390_iommu_get_resv_regions(struct device *dev,
struct list_head *list)
{
@@ -190,6 +463,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
+ if (zdev->tlb_refresh)
+ dev->iommu->shadow_on_flush = 1;
+
return &zdev->iommu_dev;
}
@@ -202,7 +478,13 @@ static void s390_iommu_release_device(struct device *dev)
* to the device, but keep it attached to other devices in the group.
*/
if (zdev)
- __s390_iommu_detach_device(zdev);
+ s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
+}
+
+static int zpci_refresh_all(struct zpci_dev *zdev)
+{
+ return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
+ zdev->end_dma - zdev->start_dma + 1);
}
static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
@@ -212,8 +494,8 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
- zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
+ atomic64_inc(&s390_domain->ctrs.global_rpcits);
+ zpci_refresh_all(zdev);
}
rcu_read_unlock();
}
@@ -231,26 +513,40 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
+ atomic64_inc(&s390_domain->ctrs.sync_rpcits);
zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
size);
}
rcu_read_unlock();
}
-static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev;
+ int ret = 0;
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
if (!zdev->tlb_refresh)
continue;
- zpci_refresh_trans((u64)zdev->fh << 32,
- iova, size);
+ atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
+ ret = zpci_refresh_trans((u64)zdev->fh << 32,
+ iova, size);
+ /*
+ * let the hypervisor discover invalidated entries
+ * allowing it to free IOVAs and unpin pages
+ */
+ if (ret == -ENOMEM) {
+ ret = zpci_refresh_all(zdev);
+ if (ret)
+ break;
+ }
}
rcu_read_unlock();
+
+ return ret;
}
static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
@@ -330,16 +626,15 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
if (!IS_ALIGNED(iova | paddr, pgsize))
return -EINVAL;
- if (!(prot & IOMMU_READ))
- return -EINVAL;
-
if (!(prot & IOMMU_WRITE))
flags |= ZPCI_TABLE_PROTECTED;
rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
- pgcount, flags, gfp);
- if (!rc)
+ pgcount, flags, gfp);
+ if (!rc) {
*mapped = size;
+ atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
+ }
return rc;
}
@@ -395,12 +690,26 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
return 0;
iommu_iotlb_gather_add_range(gather, iova, size);
+ atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
return size;
}
+static void s390_iommu_probe_finalize(struct device *dev)
+{
+ iommu_setup_dma_ops(dev, 0, U64_MAX);
+}
+
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
+{
+ if (!zdev || !zdev->s390_domain)
+ return NULL;
+ return &zdev->s390_domain->ctrs;
+}
+
int zpci_init_iommu(struct zpci_dev *zdev)
{
+ u64 aperture_size;
int rc = 0;
rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
@@ -412,6 +721,12 @@ int zpci_init_iommu(struct zpci_dev *zdev)
if (rc)
goto out_sysfs;
+ zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
+ aperture_size = min3(s390_iommu_aperture,
+ ZPCI_TABLE_SIZE_RT - zdev->start_dma,
+ zdev->end_dma - zdev->start_dma + 1);
+ zdev->end_dma = zdev->start_dma + aperture_size - 1;
+
return 0;
out_sysfs:
@@ -427,13 +742,52 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
iommu_device_sysfs_remove(&zdev->iommu_dev);
}
+static int __init s390_iommu_setup(char *str)
+{
+ if (!strcmp(str, "strict")) {
+ pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
+ iommu_set_dma_strict();
+ }
+ return 1;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
+
+static int __init s390_iommu_aperture_setup(char *str)
+{
+ if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
+ s390_iommu_aperture_factor = 1;
+ return 1;
+}
+
+__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
+
+static int __init s390_iommu_init(void)
+{
+ int rc;
+
+ iommu_dma_forcedac = true;
+ s390_iommu_aperture = (u64)virt_to_phys(high_memory);
+ if (!s390_iommu_aperture_factor)
+ s390_iommu_aperture = ULONG_MAX;
+ else
+ s390_iommu_aperture *= s390_iommu_aperture_factor;
+
+ rc = dma_alloc_cpu_table_caches();
+ if (rc)
+ return rc;
+
+ return rc;
+}
+subsys_initcall(s390_iommu_init);
+
static const struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
- .domain_alloc = s390_domain_alloc,
+ .domain_alloc_paging = s390_domain_alloc_paging,
.probe_device = s390_iommu_probe_device,
+ .probe_finalize = s390_iommu_probe_finalize,
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
- .set_platform_dma_ops = s390_iommu_set_platform_dma,
.pgsize_bitmap = SZ_4K,
.get_resv_regions = s390_iommu_get_resv_regions,
.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 2fa9afebd4f5..2eb9fb46703b 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -70,7 +70,6 @@ struct sprd_iommu_device {
void __iomem *base;
struct device *dev;
struct iommu_device iommu;
- struct iommu_group *group;
struct clk *eb;
};
@@ -134,13 +133,10 @@ sprd_iommu_pgt_size(struct iommu_domain *domain)
SPRD_IOMMU_PAGE_SHIFT) * sizeof(u32);
}
-static struct iommu_domain *sprd_iommu_domain_alloc(unsigned int domain_type)
+static struct iommu_domain *sprd_iommu_domain_alloc_paging(struct device *dev)
{
struct sprd_iommu_domain *dom;
- if (domain_type != IOMMU_DOMAIN_DMA && domain_type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
dom = kzalloc(sizeof(*dom), GFP_KERNEL);
if (!dom)
return NULL;
@@ -345,8 +341,8 @@ static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
return size;
}
-static void sprd_iommu_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int sprd_iommu_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct sprd_iommu_domain *dom = to_sprd_domain(domain);
unsigned int reg;
@@ -358,6 +354,7 @@ static void sprd_iommu_sync_map(struct iommu_domain *domain,
/* clear IOMMU TLB buffer after page table updated */
sprd_iommu_write(dom->sdev, reg, 0xffffffff);
+ return 0;
}
static void sprd_iommu_sync(struct iommu_domain *domain,
@@ -399,13 +396,6 @@ static struct iommu_device *sprd_iommu_probe_device(struct device *dev)
return &sdev->iommu;
}
-static struct iommu_group *sprd_iommu_device_group(struct device *dev)
-{
- struct sprd_iommu_device *sdev = dev_iommu_priv_get(dev);
-
- return iommu_group_ref_get(sdev->group);
-}
-
static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
struct platform_device *pdev;
@@ -421,9 +411,9 @@ static int sprd_iommu_of_xlate(struct device *dev, struct of_phandle_args *args)
static const struct iommu_ops sprd_iommu_ops = {
- .domain_alloc = sprd_iommu_domain_alloc,
+ .domain_alloc_paging = sprd_iommu_domain_alloc_paging,
.probe_device = sprd_iommu_probe_device,
- .device_group = sprd_iommu_device_group,
+ .device_group = generic_single_device_group,
.of_xlate = sprd_iommu_of_xlate,
.pgsize_bitmap = SPRD_IOMMU_PAGE_SIZE,
.owner = THIS_MODULE,
@@ -496,16 +486,9 @@ static int sprd_iommu_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, sdev);
sdev->dev = dev;
- /* All the client devices are in the same iommu-group */
- sdev->group = iommu_group_alloc();
- if (IS_ERR(sdev->group)) {
- ret = PTR_ERR(sdev->group);
- goto free_page;
- }
-
ret = iommu_device_sysfs_add(&sdev->iommu, dev, NULL, dev_name(dev));
if (ret)
- goto put_group;
+ goto free_page;
ret = iommu_device_register(&sdev->iommu, &sprd_iommu_ops, dev);
if (ret)
@@ -530,8 +513,6 @@ unregister_iommu:
iommu_device_unregister(&sdev->iommu);
remove_sysfs:
iommu_device_sysfs_remove(&sdev->iommu);
-put_group:
- iommu_group_put(sdev->group);
free_page:
dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa);
return ret;
@@ -543,9 +524,6 @@ static void sprd_iommu_remove(struct platform_device *pdev)
dma_free_coherent(sdev->dev, SPRD_IOMMU_PAGE_SIZE, sdev->prot_page_va, sdev->prot_page_pa);
- iommu_group_put(sdev->group);
- sdev->group = NULL;
-
platform_set_drvdata(pdev, NULL);
iommu_device_sysfs_remove(&sdev->iommu);
iommu_device_unregister(&sdev->iommu);
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 74c5cb93e900..41484a5a399b 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -107,7 +107,6 @@ struct sun50i_iommu {
struct clk *clk;
struct iommu_domain *domain;
- struct iommu_group *group;
struct kmem_cache *pt_pool;
};
@@ -402,8 +401,8 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain)
spin_unlock_irqrestore(&iommu->iommu_lock, flags);
}
-static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
struct sun50i_iommu *iommu = sun50i_domain->iommu;
@@ -412,6 +411,8 @@ static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
spin_lock_irqsave(&iommu->iommu_lock, flags);
sun50i_iommu_zap_range(iommu, iova, size);
spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ return 0;
}
static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
@@ -589,7 +590,8 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain,
}
static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
struct sun50i_iommu *iommu = sun50i_domain->iommu;
@@ -616,13 +618,14 @@ static int sun50i_iommu_map(struct iommu_domain *domain, unsigned long iova,
*pte_addr = sun50i_mk_pte(paddr, prot);
sun50i_table_flush(sun50i_domain, pte_addr, 1);
+ *mapped = size;
out:
return ret;
}
static size_t sun50i_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
{
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
phys_addr_t pt_phys;
@@ -667,14 +670,11 @@ static phys_addr_t sun50i_iommu_iova_to_phys(struct iommu_domain *domain,
sun50i_iova_get_page_offset(iova);
}
-static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
+static struct iommu_domain *
+sun50i_iommu_domain_alloc_paging(struct device *dev)
{
struct sun50i_iommu_domain *sun50i_domain;
- if (type != IOMMU_DOMAIN_DMA &&
- type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
sun50i_domain = kzalloc(sizeof(*sun50i_domain), GFP_KERNEL);
if (!sun50i_domain)
return NULL;
@@ -757,21 +757,32 @@ static void sun50i_iommu_detach_domain(struct sun50i_iommu *iommu,
iommu->domain = NULL;
}
-static void sun50i_iommu_detach_device(struct iommu_domain *domain,
- struct device *dev)
+static int sun50i_iommu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
- struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
struct sun50i_iommu *iommu = dev_iommu_priv_get(dev);
+ struct sun50i_iommu_domain *sun50i_domain;
dev_dbg(dev, "Detaching from IOMMU domain\n");
- if (iommu->domain != domain)
- return;
+ if (iommu->domain == identity_domain)
+ return 0;
+ sun50i_domain = to_sun50i_domain(iommu->domain);
if (refcount_dec_and_test(&sun50i_domain->refcnt))
sun50i_iommu_detach_domain(iommu, sun50i_domain);
+ return 0;
}
+static struct iommu_domain_ops sun50i_iommu_identity_ops = {
+ .attach_dev = sun50i_iommu_identity_attach,
+};
+
+static struct iommu_domain sun50i_iommu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &sun50i_iommu_identity_ops,
+};
+
static int sun50i_iommu_attach_device(struct iommu_domain *domain,
struct device *dev)
{
@@ -789,8 +800,7 @@ static int sun50i_iommu_attach_device(struct iommu_domain *domain,
if (iommu->domain == domain)
return 0;
- if (iommu->domain)
- sun50i_iommu_detach_device(iommu->domain, dev);
+ sun50i_iommu_identity_attach(&sun50i_iommu_identity_domain, dev);
sun50i_iommu_attach_domain(iommu, sun50i_domain);
@@ -808,13 +818,6 @@ static struct iommu_device *sun50i_iommu_probe_device(struct device *dev)
return &iommu->iommu;
}
-static struct iommu_group *sun50i_iommu_device_group(struct device *dev)
-{
- struct sun50i_iommu *iommu = sun50i_iommu_from_dev(dev);
-
- return iommu_group_ref_get(iommu->group);
-}
-
static int sun50i_iommu_of_xlate(struct device *dev,
struct of_phandle_args *args)
{
@@ -827,9 +830,10 @@ static int sun50i_iommu_of_xlate(struct device *dev,
}
static const struct iommu_ops sun50i_iommu_ops = {
+ .identity_domain = &sun50i_iommu_identity_domain,
.pgsize_bitmap = SZ_4K,
- .device_group = sun50i_iommu_device_group,
- .domain_alloc = sun50i_iommu_domain_alloc,
+ .device_group = generic_single_device_group,
+ .domain_alloc_paging = sun50i_iommu_domain_alloc_paging,
.of_xlate = sun50i_iommu_of_xlate,
.probe_device = sun50i_iommu_probe_device,
.default_domain_ops = &(const struct iommu_domain_ops) {
@@ -838,8 +842,8 @@ static const struct iommu_ops sun50i_iommu_ops = {
.iotlb_sync_map = sun50i_iommu_iotlb_sync_map,
.iotlb_sync = sun50i_iommu_iotlb_sync,
.iova_to_phys = sun50i_iommu_iova_to_phys,
- .map = sun50i_iommu_map,
- .unmap = sun50i_iommu_unmap,
+ .map_pages = sun50i_iommu_map,
+ .unmap_pages = sun50i_iommu_unmap,
.free = sun50i_iommu_domain_free,
}
};
@@ -985,6 +989,7 @@ static int sun50i_iommu_probe(struct platform_device *pdev)
if (!iommu)
return -ENOMEM;
spin_lock_init(&iommu->iommu_lock);
+ iommu->domain = &sun50i_iommu_identity_domain;
platform_set_drvdata(pdev, iommu);
iommu->dev = &pdev->dev;
@@ -995,42 +1000,36 @@ static int sun50i_iommu_probe(struct platform_device *pdev)
if (!iommu->pt_pool)
return -ENOMEM;
- iommu->group = iommu_group_alloc();
- if (IS_ERR(iommu->group)) {
- ret = PTR_ERR(iommu->group);
- goto err_free_cache;
- }
-
iommu->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(iommu->base)) {
ret = PTR_ERR(iommu->base);
- goto err_free_group;
+ goto err_free_cache;
}
irq = platform_get_irq(pdev, 0);
if (irq < 0) {
ret = irq;
- goto err_free_group;
+ goto err_free_cache;
}
iommu->clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(iommu->clk)) {
dev_err(&pdev->dev, "Couldn't get our clock.\n");
ret = PTR_ERR(iommu->clk);
- goto err_free_group;
+ goto err_free_cache;
}
iommu->reset = devm_reset_control_get(&pdev->dev, NULL);
if (IS_ERR(iommu->reset)) {
dev_err(&pdev->dev, "Couldn't get our reset line.\n");
ret = PTR_ERR(iommu->reset);
- goto err_free_group;
+ goto err_free_cache;
}
ret = iommu_device_sysfs_add(&iommu->iommu, &pdev->dev,
NULL, dev_name(&pdev->dev));
if (ret)
- goto err_free_group;
+ goto err_free_cache;
ret = iommu_device_register(&iommu->iommu, &sun50i_iommu_ops, &pdev->dev);
if (ret)
@@ -1049,9 +1048,6 @@ err_unregister:
err_remove_sysfs:
iommu_device_sysfs_remove(&iommu->iommu);
-err_free_group:
- iommu_group_put(iommu->group);
-
err_free_cache:
kmem_cache_destroy(iommu->pt_pool);
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
deleted file mode 100644
index a482ff838b53..000000000000
--- a/drivers/iommu/tegra-gart.c
+++ /dev/null
@@ -1,371 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * IOMMU API for Graphics Address Relocation Table on Tegra20
- *
- * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved.
- *
- * Author: Hiroshi DOYU <[email protected]>
- */
-
-#define dev_fmt(fmt) "gart: " fmt
-
-#include <linux/io.h>
-#include <linux/iommu.h>
-#include <linux/moduleparam.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/vmalloc.h>
-
-#include <soc/tegra/mc.h>
-
-#define GART_REG_BASE 0x24
-#define GART_CONFIG (0x24 - GART_REG_BASE)
-#define GART_ENTRY_ADDR (0x28 - GART_REG_BASE)
-#define GART_ENTRY_DATA (0x2c - GART_REG_BASE)
-
-#define GART_ENTRY_PHYS_ADDR_VALID BIT(31)
-
-#define GART_PAGE_SHIFT 12
-#define GART_PAGE_SIZE (1 << GART_PAGE_SHIFT)
-#define GART_PAGE_MASK GENMASK(30, GART_PAGE_SHIFT)
-
-/* bitmap of the page sizes currently supported */
-#define GART_IOMMU_PGSIZES (GART_PAGE_SIZE)
-
-struct gart_device {
- void __iomem *regs;
- u32 *savedata;
- unsigned long iovmm_base; /* offset to vmm_area start */
- unsigned long iovmm_end; /* offset to vmm_area end */
- spinlock_t pte_lock; /* for pagetable */
- spinlock_t dom_lock; /* for active domain */
- unsigned int active_devices; /* number of active devices */
- struct iommu_domain *active_domain; /* current active domain */
- struct iommu_device iommu; /* IOMMU Core handle */
- struct device *dev;
-};
-
-static struct gart_device *gart_handle; /* unique for a system */
-
-static bool gart_debug;
-
-/*
- * Any interaction between any block on PPSB and a block on APB or AHB
- * must have these read-back to ensure the APB/AHB bus transaction is
- * complete before initiating activity on the PPSB block.
- */
-#define FLUSH_GART_REGS(gart) readl_relaxed((gart)->regs + GART_CONFIG)
-
-#define for_each_gart_pte(gart, iova) \
- for (iova = gart->iovmm_base; \
- iova < gart->iovmm_end; \
- iova += GART_PAGE_SIZE)
-
-static inline void gart_set_pte(struct gart_device *gart,
- unsigned long iova, unsigned long pte)
-{
- writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR);
- writel_relaxed(pte, gart->regs + GART_ENTRY_DATA);
-}
-
-static inline unsigned long gart_read_pte(struct gart_device *gart,
- unsigned long iova)
-{
- unsigned long pte;
-
- writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR);
- pte = readl_relaxed(gart->regs + GART_ENTRY_DATA);
-
- return pte;
-}
-
-static void do_gart_setup(struct gart_device *gart, const u32 *data)
-{
- unsigned long iova;
-
- for_each_gart_pte(gart, iova)
- gart_set_pte(gart, iova, data ? *(data++) : 0);
-
- writel_relaxed(1, gart->regs + GART_CONFIG);
- FLUSH_GART_REGS(gart);
-}
-
-static inline bool gart_iova_range_invalid(struct gart_device *gart,
- unsigned long iova, size_t bytes)
-{
- return unlikely(iova < gart->iovmm_base || bytes != GART_PAGE_SIZE ||
- iova + bytes > gart->iovmm_end);
-}
-
-static inline bool gart_pte_valid(struct gart_device *gart, unsigned long iova)
-{
- return !!(gart_read_pte(gart, iova) & GART_ENTRY_PHYS_ADDR_VALID);
-}
-
-static int gart_iommu_attach_dev(struct iommu_domain *domain,
- struct device *dev)
-{
- struct gart_device *gart = gart_handle;
- int ret = 0;
-
- spin_lock(&gart->dom_lock);
-
- if (gart->active_domain && gart->active_domain != domain) {
- ret = -EINVAL;
- } else if (dev_iommu_priv_get(dev) != domain) {
- dev_iommu_priv_set(dev, domain);
- gart->active_domain = domain;
- gart->active_devices++;
- }
-
- spin_unlock(&gart->dom_lock);
-
- return ret;
-}
-
-static void gart_iommu_set_platform_dma(struct device *dev)
-{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- struct gart_device *gart = gart_handle;
-
- spin_lock(&gart->dom_lock);
-
- if (dev_iommu_priv_get(dev) == domain) {
- dev_iommu_priv_set(dev, NULL);
-
- if (--gart->active_devices == 0)
- gart->active_domain = NULL;
- }
-
- spin_unlock(&gart->dom_lock);
-}
-
-static struct iommu_domain *gart_iommu_domain_alloc(unsigned type)
-{
- struct iommu_domain *domain;
-
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (domain) {
- domain->geometry.aperture_start = gart_handle->iovmm_base;
- domain->geometry.aperture_end = gart_handle->iovmm_end - 1;
- domain->geometry.force_aperture = true;
- }
-
- return domain;
-}
-
-static void gart_iommu_domain_free(struct iommu_domain *domain)
-{
- WARN_ON(gart_handle->active_domain == domain);
- kfree(domain);
-}
-
-static inline int __gart_iommu_map(struct gart_device *gart, unsigned long iova,
- unsigned long pa)
-{
- if (unlikely(gart_debug && gart_pte_valid(gart, iova))) {
- dev_err(gart->dev, "Page entry is in-use\n");
- return -EINVAL;
- }
-
- gart_set_pte(gart, iova, GART_ENTRY_PHYS_ADDR_VALID | pa);
-
- return 0;
-}
-
-static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t pa, size_t bytes, int prot, gfp_t gfp)
-{
- struct gart_device *gart = gart_handle;
- int ret;
-
- if (gart_iova_range_invalid(gart, iova, bytes))
- return -EINVAL;
-
- spin_lock(&gart->pte_lock);
- ret = __gart_iommu_map(gart, iova, (unsigned long)pa);
- spin_unlock(&gart->pte_lock);
-
- return ret;
-}
-
-static inline int __gart_iommu_unmap(struct gart_device *gart,
- unsigned long iova)
-{
- if (unlikely(gart_debug && !gart_pte_valid(gart, iova))) {
- dev_err(gart->dev, "Page entry is invalid\n");
- return -EINVAL;
- }
-
- gart_set_pte(gart, iova, 0);
-
- return 0;
-}
-
-static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t bytes, struct iommu_iotlb_gather *gather)
-{
- struct gart_device *gart = gart_handle;
- int err;
-
- if (gart_iova_range_invalid(gart, iova, bytes))
- return 0;
-
- spin_lock(&gart->pte_lock);
- err = __gart_iommu_unmap(gart, iova);
- spin_unlock(&gart->pte_lock);
-
- return err ? 0 : bytes;
-}
-
-static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain,
- dma_addr_t iova)
-{
- struct gart_device *gart = gart_handle;
- unsigned long pte;
-
- if (gart_iova_range_invalid(gart, iova, GART_PAGE_SIZE))
- return -EINVAL;
-
- spin_lock(&gart->pte_lock);
- pte = gart_read_pte(gart, iova);
- spin_unlock(&gart->pte_lock);
-
- return pte & GART_PAGE_MASK;
-}
-
-static struct iommu_device *gart_iommu_probe_device(struct device *dev)
-{
- if (!dev_iommu_fwspec_get(dev))
- return ERR_PTR(-ENODEV);
-
- return &gart_handle->iommu;
-}
-
-static int gart_iommu_of_xlate(struct device *dev,
- struct of_phandle_args *args)
-{
- return 0;
-}
-
-static void gart_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
- size_t size)
-{
- FLUSH_GART_REGS(gart_handle);
-}
-
-static void gart_iommu_sync(struct iommu_domain *domain,
- struct iommu_iotlb_gather *gather)
-{
- size_t length = gather->end - gather->start + 1;
-
- gart_iommu_sync_map(domain, gather->start, length);
-}
-
-static const struct iommu_ops gart_iommu_ops = {
- .domain_alloc = gart_iommu_domain_alloc,
- .probe_device = gart_iommu_probe_device,
- .device_group = generic_device_group,
- .set_platform_dma_ops = gart_iommu_set_platform_dma,
- .pgsize_bitmap = GART_IOMMU_PGSIZES,
- .of_xlate = gart_iommu_of_xlate,
- .default_domain_ops = &(const struct iommu_domain_ops) {
- .attach_dev = gart_iommu_attach_dev,
- .map = gart_iommu_map,
- .unmap = gart_iommu_unmap,
- .iova_to_phys = gart_iommu_iova_to_phys,
- .iotlb_sync_map = gart_iommu_sync_map,
- .iotlb_sync = gart_iommu_sync,
- .free = gart_iommu_domain_free,
- }
-};
-
-int tegra_gart_suspend(struct gart_device *gart)
-{
- u32 *data = gart->savedata;
- unsigned long iova;
-
- /*
- * All GART users shall be suspended at this point. Disable
- * address translation to trap all GART accesses as invalid
- * memory accesses.
- */
- writel_relaxed(0, gart->regs + GART_CONFIG);
- FLUSH_GART_REGS(gart);
-
- for_each_gart_pte(gart, iova)
- *(data++) = gart_read_pte(gart, iova);
-
- return 0;
-}
-
-int tegra_gart_resume(struct gart_device *gart)
-{
- do_gart_setup(gart, gart->savedata);
-
- return 0;
-}
-
-struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc)
-{
- struct gart_device *gart;
- struct resource *res;
- int err;
-
- BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT);
-
- /* the GART memory aperture is required */
- res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1);
- if (!res) {
- dev_err(dev, "Memory aperture resource unavailable\n");
- return ERR_PTR(-ENXIO);
- }
-
- gart = kzalloc(sizeof(*gart), GFP_KERNEL);
- if (!gart)
- return ERR_PTR(-ENOMEM);
-
- gart_handle = gart;
-
- gart->dev = dev;
- gart->regs = mc->regs + GART_REG_BASE;
- gart->iovmm_base = res->start;
- gart->iovmm_end = res->end + 1;
- spin_lock_init(&gart->pte_lock);
- spin_lock_init(&gart->dom_lock);
-
- do_gart_setup(gart, NULL);
-
- err = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart");
- if (err)
- goto free_gart;
-
- err = iommu_device_register(&gart->iommu, &gart_iommu_ops, dev);
- if (err)
- goto remove_sysfs;
-
- gart->savedata = vmalloc(resource_size(res) / GART_PAGE_SIZE *
- sizeof(u32));
- if (!gart->savedata) {
- err = -ENOMEM;
- goto unregister_iommu;
- }
-
- return gart;
-
-unregister_iommu:
- iommu_device_unregister(&gart->iommu);
-remove_sysfs:
- iommu_device_sysfs_remove(&gart->iommu);
-free_gart:
- kfree(gart);
-
- return ERR_PTR(err);
-}
-
-module_param(gart_debug, bool, 0644);
-MODULE_PARM_DESC(gart_debug, "Enable GART debugging");
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index e445f80d0226..310871728ab4 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -272,13 +272,10 @@ static void tegra_smmu_free_asid(struct tegra_smmu *smmu, unsigned int id)
clear_bit(id, smmu->asids);
}
-static struct iommu_domain *tegra_smmu_domain_alloc(unsigned type)
+static struct iommu_domain *tegra_smmu_domain_alloc_paging(struct device *dev)
{
struct tegra_smmu_as *as;
- if (type != IOMMU_DOMAIN_UNMANAGED)
- return NULL;
-
as = kzalloc(sizeof(*as), GFP_KERNEL);
if (!as)
return NULL;
@@ -511,23 +508,39 @@ disable:
return err;
}
-static void tegra_smmu_set_platform_dma(struct device *dev)
+static int tegra_smmu_identity_attach(struct iommu_domain *identity_domain,
+ struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
- struct tegra_smmu_as *as = to_smmu_as(domain);
- struct tegra_smmu *smmu = as->smmu;
+ struct tegra_smmu_as *as;
+ struct tegra_smmu *smmu;
unsigned int index;
if (!fwspec)
- return;
+ return -ENODEV;
+ if (domain == identity_domain || !domain)
+ return 0;
+
+ as = to_smmu_as(domain);
+ smmu = as->smmu;
for (index = 0; index < fwspec->num_ids; index++) {
tegra_smmu_disable(smmu, fwspec->ids[index], as->id);
tegra_smmu_as_unprepare(smmu, as);
}
+ return 0;
}
+static struct iommu_domain_ops tegra_smmu_identity_ops = {
+ .attach_dev = tegra_smmu_identity_attach,
+};
+
+static struct iommu_domain tegra_smmu_identity_domain = {
+ .type = IOMMU_DOMAIN_IDENTITY,
+ .ops = &tegra_smmu_identity_ops,
+};
+
static void tegra_smmu_set_pde(struct tegra_smmu_as *as, unsigned long iova,
u32 value)
{
@@ -751,7 +764,8 @@ __tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
}
static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+ phys_addr_t paddr, size_t size, size_t count,
+ int prot, gfp_t gfp, size_t *mapped)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
unsigned long flags;
@@ -761,11 +775,14 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova,
ret = __tegra_smmu_map(domain, iova, paddr, size, prot, gfp, &flags);
spin_unlock_irqrestore(&as->lock, flags);
+ if (!ret)
+ *mapped = size;
+
return ret;
}
static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *gather)
+ size_t size, size_t count, struct iommu_iotlb_gather *gather)
{
struct tegra_smmu_as *as = to_smmu_as(domain);
unsigned long flags;
@@ -962,17 +979,28 @@ static int tegra_smmu_of_xlate(struct device *dev,
return iommu_fwspec_add_ids(dev, &id, 1);
}
+static int tegra_smmu_def_domain_type(struct device *dev)
+{
+ /*
+ * FIXME: For now we want to run all translation in IDENTITY mode, due
+ * to some device quirks. Better would be to just quirk the troubled
+ * devices.
+ */
+ return IOMMU_DOMAIN_IDENTITY;
+}
+
static const struct iommu_ops tegra_smmu_ops = {
- .domain_alloc = tegra_smmu_domain_alloc,
+ .identity_domain = &tegra_smmu_identity_domain,
+ .def_domain_type = &tegra_smmu_def_domain_type,
+ .domain_alloc_paging = tegra_smmu_domain_alloc_paging,
.probe_device = tegra_smmu_probe_device,
.device_group = tegra_smmu_device_group,
- .set_platform_dma_ops = tegra_smmu_set_platform_dma,
.of_xlate = tegra_smmu_of_xlate,
.pgsize_bitmap = SZ_4K,
.default_domain_ops = &(const struct iommu_domain_ops) {
.attach_dev = tegra_smmu_attach_dev,
- .map = tegra_smmu_map,
- .unmap = tegra_smmu_unmap,
+ .map_pages = tegra_smmu_map,
+ .unmap_pages = tegra_smmu_unmap,
.iova_to_phys = tegra_smmu_iova_to_phys,
.free = tegra_smmu_domain_free,
}
@@ -1056,8 +1084,6 @@ DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
{
smmu->debugfs = debugfs_create_dir("smmu", NULL);
- if (!smmu->debugfs)
- return;
debugfs_create_file("swgroups", S_IRUGO, smmu->debugfs, smmu,
&tegra_smmu_swgroups_fops);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 17dcd826f5c2..379ebe03efb6 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -85,7 +85,7 @@ struct viommu_request {
void *writeback;
unsigned int write_offset;
unsigned int len;
- char buf[];
+ char buf[] __counted_by(len);
};
#define VIOMMU_FAULT_RESV_MASK 0xffffff00
@@ -230,7 +230,7 @@ static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len,
if (write_offset <= 0)
return -EINVAL;
- req = kzalloc(sizeof(*req) + len, GFP_ATOMIC);
+ req = kzalloc(struct_size(req, buf, len), GFP_ATOMIC);
if (!req)
return -ENOMEM;
diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c
index bf03abb94e68..68d82a682bf6 100644
--- a/drivers/leds/rgb/leds-qcom-lpg.c
+++ b/drivers/leds/rgb/leds-qcom-lpg.c
@@ -1085,7 +1085,6 @@ static const struct pwm_ops lpg_pwm_ops = {
.request = lpg_pwm_request,
.apply = lpg_pwm_apply,
.get_state = lpg_pwm_get_state,
- .owner = THIS_MODULE,
};
static int lpg_add_pwm(struct lpg *lpg)
diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 67d6e70b4eab..a083921a8968 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -979,35 +979,6 @@ static int tegra_mc_probe(struct platform_device *pdev)
}
}
- if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && !mc->soc->smmu) {
- mc->gart = tegra_gart_probe(&pdev->dev, mc);
- if (IS_ERR(mc->gart)) {
- dev_err(&pdev->dev, "failed to probe GART: %ld\n",
- PTR_ERR(mc->gart));
- mc->gart = NULL;
- }
- }
-
- return 0;
-}
-
-static int __maybe_unused tegra_mc_suspend(struct device *dev)
-{
- struct tegra_mc *mc = dev_get_drvdata(dev);
-
- if (mc->soc->ops && mc->soc->ops->suspend)
- return mc->soc->ops->suspend(mc);
-
- return 0;
-}
-
-static int __maybe_unused tegra_mc_resume(struct device *dev)
-{
- struct tegra_mc *mc = dev_get_drvdata(dev);
-
- if (mc->soc->ops && mc->soc->ops->resume)
- return mc->soc->ops->resume(mc);
-
return 0;
}
@@ -1020,15 +991,10 @@ static void tegra_mc_sync_state(struct device *dev)
icc_sync_state(dev);
}
-static const struct dev_pm_ops tegra_mc_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(tegra_mc_suspend, tegra_mc_resume)
-};
-
static struct platform_driver tegra_mc_driver = {
.driver = {
.name = "tegra-mc",
.of_match_table = tegra_mc_of_match,
- .pm = &tegra_mc_pm_ops,
.suppress_bind_attrs = true,
.sync_state = tegra_mc_sync_state,
},
diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c
index 544bfd216a22..aa4b97d5e732 100644
--- a/drivers/memory/tegra/tegra20.c
+++ b/drivers/memory/tegra/tegra20.c
@@ -688,32 +688,6 @@ static int tegra20_mc_probe(struct tegra_mc *mc)
return 0;
}
-static int tegra20_mc_suspend(struct tegra_mc *mc)
-{
- int err;
-
- if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) {
- err = tegra_gart_suspend(mc->gart);
- if (err < 0)
- return err;
- }
-
- return 0;
-}
-
-static int tegra20_mc_resume(struct tegra_mc *mc)
-{
- int err;
-
- if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) {
- err = tegra_gart_resume(mc->gart);
- if (err < 0)
- return err;
- }
-
- return 0;
-}
-
static irqreturn_t tegra20_mc_handle_irq(int irq, void *data)
{
struct tegra_mc *mc = data;
@@ -789,8 +763,6 @@ static irqreturn_t tegra20_mc_handle_irq(int irq, void *data)
static const struct tegra_mc_ops tegra20_mc_ops = {
.probe = tegra20_mc_probe,
- .suspend = tegra20_mc_suspend,
- .resume = tegra20_mc_resume,
.handle_irq = tegra20_mc_handle_irq,
};
diff --git a/drivers/misc/lkdtm/cfi.c b/drivers/misc/lkdtm/cfi.c
index fc28714ae3a6..6a33889d0902 100644
--- a/drivers/misc/lkdtm/cfi.c
+++ b/drivers/misc/lkdtm/cfi.c
@@ -68,12 +68,20 @@ static void lkdtm_CFI_FORWARD_PROTO(void)
#define no_pac_addr(addr) \
((__force __typeof__(addr))((uintptr_t)(addr) | PAGE_OFFSET))
+#ifdef CONFIG_RISCV
+/* https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#frame-pointer-convention */
+#define FRAME_RA_OFFSET (-1)
+#else
+#define FRAME_RA_OFFSET 1
+#endif
+
/* The ultimate ROP gadget. */
static noinline __no_ret_protection
void set_return_addr_unchecked(unsigned long *expected, unsigned long *addr)
{
/* Use of volatile is to make sure final write isn't seen as a dead store. */
- unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+ unsigned long * volatile *ret_addr =
+ (unsigned long **)__builtin_frame_address(0) + FRAME_RA_OFFSET;
/* Make sure we've found the right place on the stack before writing it. */
if (no_pac_addr(*ret_addr) == expected)
@@ -88,7 +96,8 @@ static noinline
void set_return_addr(unsigned long *expected, unsigned long *addr)
{
/* Use of volatile is to make sure final write isn't seen as a dead store. */
- unsigned long * volatile *ret_addr = (unsigned long **)__builtin_frame_address(0) + 1;
+ unsigned long * volatile *ret_addr =
+ (unsigned long **)__builtin_frame_address(0) + FRAME_RA_OFFSET;
/* Make sure we've found the right place on the stack before writing it. */
if (no_pac_addr(*ret_addr) == expected)
diff --git a/drivers/pwm/Kconfig b/drivers/pwm/Kconfig
index 8ebcddf91f7b..4b956d661755 100644
--- a/drivers/pwm/Kconfig
+++ b/drivers/pwm/Kconfig
@@ -173,8 +173,8 @@ config PWM_CLPS711X
will be called pwm-clps711x.
config PWM_CRC
- bool "Intel Crystalcove (CRC) PWM support"
- depends on X86 && INTEL_SOC_PMIC
+ tristate "Intel Crystalcove (CRC) PWM support"
+ depends on INTEL_SOC_PMIC
help
Generic PWM framework driver for Crystalcove (CRC) PMIC based PWM
control.
@@ -186,9 +186,19 @@ config PWM_CROS_EC
PWM driver for exposing a PWM attached to the ChromeOS Embedded
Controller.
+config PWM_DWC_CORE
+ tristate
+ depends on HAS_IOMEM
+ help
+ PWM driver for Synopsys DWC PWM Controller.
+
+ To compile this driver as a module, build the dependecies as
+ modules, this will be called pwm-dwc-core.
+
config PWM_DWC
- tristate "DesignWare PWM Controller"
- depends on PCI
+ tristate "DesignWare PWM Controller (PCI bus)"
+ depends on HAS_IOMEM && PCI
+ select PWM_DWC_CORE
help
PWM driver for Synopsys DWC PWM Controller attached to a PCI bus.
@@ -407,7 +417,7 @@ config PWM_MEDIATEK
config PWM_MICROCHIP_CORE
tristate "Microchip corePWM PWM support"
- depends on SOC_MICROCHIP_POLARFIRE || COMPILE_TEST
+ depends on ARCH_MICROCHIP_POLARFIRE || COMPILE_TEST
depends on HAS_IOMEM && OF
help
PWM driver for Microchip FPGA soft IP core.
diff --git a/drivers/pwm/Makefile b/drivers/pwm/Makefile
index c822389c2a24..c5ec9e168ee7 100644
--- a/drivers/pwm/Makefile
+++ b/drivers/pwm/Makefile
@@ -15,6 +15,7 @@ obj-$(CONFIG_PWM_CLK) += pwm-clk.o
obj-$(CONFIG_PWM_CLPS711X) += pwm-clps711x.o
obj-$(CONFIG_PWM_CRC) += pwm-crc.o
obj-$(CONFIG_PWM_CROS_EC) += pwm-cros-ec.o
+obj-$(CONFIG_PWM_DWC_CORE) += pwm-dwc-core.o
obj-$(CONFIG_PWM_DWC) += pwm-dwc.o
obj-$(CONFIG_PWM_EP93XX) += pwm-ep93xx.o
obj-$(CONFIG_PWM_FSL_FTM) += pwm-fsl-ftm.o
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index dc66e3405bf5..29078486534d 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -89,13 +89,13 @@ static int pwm_device_request(struct pwm_device *pwm, const char *label)
if (test_bit(PWMF_REQUESTED, &pwm->flags))
return -EBUSY;
- if (!try_module_get(pwm->chip->ops->owner))
+ if (!try_module_get(pwm->chip->owner))
return -ENODEV;
if (pwm->chip->ops->request) {
err = pwm->chip->ops->request(pwm->chip, pwm);
if (err) {
- module_put(pwm->chip->ops->owner);
+ module_put(pwm->chip->owner);
return err;
}
}
@@ -208,36 +208,6 @@ static void of_pwmchip_remove(struct pwm_chip *chip)
of_node_put(chip->dev->of_node);
}
-/**
- * pwm_set_chip_data() - set private chip data for a PWM
- * @pwm: PWM device
- * @data: pointer to chip-specific data
- *
- * Returns: 0 on success or a negative error code on failure.
- */
-int pwm_set_chip_data(struct pwm_device *pwm, void *data)
-{
- if (!pwm)
- return -EINVAL;
-
- pwm->chip_data = data;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(pwm_set_chip_data);
-
-/**
- * pwm_get_chip_data() - get private chip data for a PWM
- * @pwm: PWM device
- *
- * Returns: A pointer to the chip-private data for the PWM device.
- */
-void *pwm_get_chip_data(struct pwm_device *pwm)
-{
- return pwm ? pwm->chip_data : NULL;
-}
-EXPORT_SYMBOL_GPL(pwm_get_chip_data);
-
static bool pwm_ops_check(const struct pwm_chip *chip)
{
const struct pwm_ops *ops = chip->ops;
@@ -253,14 +223,16 @@ static bool pwm_ops_check(const struct pwm_chip *chip)
}
/**
- * pwmchip_add() - register a new PWM chip
+ * __pwmchip_add() - register a new PWM chip
* @chip: the PWM chip to add
+ * @owner: reference to the module providing the chip.
*
- * Register a new PWM chip.
+ * Register a new PWM chip. @owner is supposed to be THIS_MODULE, use the
+ * pwmchip_add wrapper to do this right.
*
* Returns: 0 on success or a negative error code on failure.
*/
-int pwmchip_add(struct pwm_chip *chip)
+int __pwmchip_add(struct pwm_chip *chip, struct module *owner)
{
struct pwm_device *pwm;
unsigned int i;
@@ -272,6 +244,8 @@ int pwmchip_add(struct pwm_chip *chip)
if (!pwm_ops_check(chip))
return -EINVAL;
+ chip->owner = owner;
+
chip->pwms = kcalloc(chip->npwm, sizeof(*pwm), GFP_KERNEL);
if (!chip->pwms)
return -ENOMEM;
@@ -306,7 +280,7 @@ int pwmchip_add(struct pwm_chip *chip)
return 0;
}
-EXPORT_SYMBOL_GPL(pwmchip_add);
+EXPORT_SYMBOL_GPL(__pwmchip_add);
/**
* pwmchip_remove() - remove a PWM chip
@@ -338,17 +312,17 @@ static void devm_pwmchip_remove(void *data)
pwmchip_remove(chip);
}
-int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip)
+int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner)
{
int ret;
- ret = pwmchip_add(chip);
+ ret = __pwmchip_add(chip, owner);
if (ret)
return ret;
return devm_add_action_or_reset(dev, devm_pwmchip_remove, chip);
}
-EXPORT_SYMBOL_GPL(devm_pwmchip_add);
+EXPORT_SYMBOL_GPL(__devm_pwmchip_add);
/**
* pwm_request_from_chip() - request a PWM device relative to a PWM chip
@@ -976,10 +950,9 @@ void pwm_put(struct pwm_device *pwm)
if (pwm->chip->ops->free)
pwm->chip->ops->free(pwm->chip, pwm);
- pwm_set_chip_data(pwm, NULL);
pwm->label = NULL;
- module_put(pwm->chip->ops->owner);
+ module_put(pwm->chip->owner);
out:
mutex_unlock(&pwm_lock);
}
diff --git a/drivers/pwm/pwm-ab8500.c b/drivers/pwm/pwm-ab8500.c
index 583a7d69c741..670d33daea84 100644
--- a/drivers/pwm/pwm-ab8500.c
+++ b/drivers/pwm/pwm-ab8500.c
@@ -181,7 +181,6 @@ static int ab8500_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops ab8500_pwm_ops = {
.apply = ab8500_pwm_apply,
.get_state = ab8500_pwm_get_state,
- .owner = THIS_MODULE,
};
static int ab8500_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-apple.c b/drivers/pwm/pwm-apple.c
index 8e7d67fb5fbe..4d755b628d9e 100644
--- a/drivers/pwm/pwm-apple.c
+++ b/drivers/pwm/pwm-apple.c
@@ -99,7 +99,6 @@ static int apple_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops apple_pwm_ops = {
.apply = apple_pwm_apply,
.get_state = apple_pwm_get_state,
- .owner = THIS_MODULE,
};
static int apple_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index e271d920151e..07920e034757 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -170,7 +170,6 @@ static int atmel_hlcdc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops atmel_hlcdc_pwm_ops = {
.apply = atmel_hlcdc_pwm_apply,
- .owner = THIS_MODULE,
};
static const struct atmel_hlcdc_pwm_errata atmel_hlcdc_pwm_at91sam9x5_errata = {
diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index c00dd37c5fbd..98b33c016c3c 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c
@@ -364,7 +364,6 @@ static const struct pwm_ops atmel_tcb_pwm_ops = {
.request = atmel_tcb_pwm_request,
.free = atmel_tcb_pwm_free,
.apply = atmel_tcb_pwm_apply,
- .owner = THIS_MODULE,
};
static struct atmel_tcb_config tcb_rm9200_config = {
diff --git a/drivers/pwm/pwm-atmel.c b/drivers/pwm/pwm-atmel.c
index 1f73325d1bea..47bcc8a3bf9d 100644
--- a/drivers/pwm/pwm-atmel.c
+++ b/drivers/pwm/pwm-atmel.c
@@ -402,7 +402,6 @@ static int atmel_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops atmel_pwm_ops = {
.apply = atmel_pwm_apply,
.get_state = atmel_pwm_get_state,
- .owner = THIS_MODULE,
};
static const struct atmel_pwm_data atmel_sam9rl_pwm_data = {
@@ -547,7 +546,7 @@ disable_clk:
static struct platform_driver atmel_pwm_driver = {
.driver = {
.name = "atmel-pwm",
- .of_match_table = of_match_ptr(atmel_pwm_dt_ids),
+ .of_match_table = atmel_pwm_dt_ids,
},
.probe = atmel_pwm_probe,
};
diff --git a/drivers/pwm/pwm-bcm-iproc.c b/drivers/pwm/pwm-bcm-iproc.c
index 7d70b6f186a6..758254025683 100644
--- a/drivers/pwm/pwm-bcm-iproc.c
+++ b/drivers/pwm/pwm-bcm-iproc.c
@@ -183,7 +183,6 @@ static int iproc_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops iproc_pwm_ops = {
.apply = iproc_pwmc_apply,
.get_state = iproc_pwmc_get_state,
- .owner = THIS_MODULE,
};
static int iproc_pwmc_probe(struct platform_device *pdev)
@@ -207,18 +206,10 @@ static int iproc_pwmc_probe(struct platform_device *pdev)
if (IS_ERR(ip->base))
return PTR_ERR(ip->base);
- ip->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(ip->clk)) {
- dev_err(&pdev->dev, "failed to get clock: %ld\n",
- PTR_ERR(ip->clk));
- return PTR_ERR(ip->clk);
- }
-
- ret = clk_prepare_enable(ip->clk);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to enable clock: %d\n", ret);
- return ret;
- }
+ ip->clk = devm_clk_get_enabled(&pdev->dev, NULL);
+ if (IS_ERR(ip->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(ip->clk),
+ "failed to get clock\n");
/* Set full drive and normal polarity for all channels */
value = readl(ip->base + IPROC_PWM_CTRL_OFFSET);
@@ -230,22 +221,12 @@ static int iproc_pwmc_probe(struct platform_device *pdev)
writel(value, ip->base + IPROC_PWM_CTRL_OFFSET);
- ret = pwmchip_add(&ip->chip);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
- clk_disable_unprepare(ip->clk);
- }
-
- return ret;
-}
-
-static void iproc_pwmc_remove(struct platform_device *pdev)
-{
- struct iproc_pwmc *ip = platform_get_drvdata(pdev);
+ ret = devm_pwmchip_add(&pdev->dev, &ip->chip);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret,
+ "failed to add PWM chip\n");
- pwmchip_remove(&ip->chip);
-
- clk_disable_unprepare(ip->clk);
+ return 0;
}
static const struct of_device_id bcm_iproc_pwmc_dt[] = {
@@ -260,7 +241,6 @@ static struct platform_driver iproc_pwmc_driver = {
.of_match_table = bcm_iproc_pwmc_dt,
},
.probe = iproc_pwmc_probe,
- .remove_new = iproc_pwmc_remove,
};
module_platform_driver(iproc_pwmc_driver);
diff --git a/drivers/pwm/pwm-bcm-kona.c b/drivers/pwm/pwm-bcm-kona.c
index e5b00cc9f7a7..15d6ed03c3ce 100644
--- a/drivers/pwm/pwm-bcm-kona.c
+++ b/drivers/pwm/pwm-bcm-kona.c
@@ -269,7 +269,6 @@ static int kona_pwmc_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops kona_pwm_ops = {
.apply = kona_pwmc_apply,
- .owner = THIS_MODULE,
};
static int kona_pwmc_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-bcm2835.c b/drivers/pwm/pwm-bcm2835.c
index bdfc2a5ec0d6..9777babd5b95 100644
--- a/drivers/pwm/pwm-bcm2835.c
+++ b/drivers/pwm/pwm-bcm2835.c
@@ -129,7 +129,6 @@ static const struct pwm_ops bcm2835_pwm_ops = {
.request = bcm2835_pwm_request,
.free = bcm2835_pwm_free,
.apply = bcm2835_pwm_apply,
- .owner = THIS_MODULE,
};
static int bcm2835_pwm_probe(struct platform_device *pdev)
@@ -147,41 +146,42 @@ static int bcm2835_pwm_probe(struct platform_device *pdev)
if (IS_ERR(pc->base))
return PTR_ERR(pc->base);
- pc->clk = devm_clk_get(&pdev->dev, NULL);
+ pc->clk = devm_clk_get_enabled(&pdev->dev, NULL);
if (IS_ERR(pc->clk))
return dev_err_probe(&pdev->dev, PTR_ERR(pc->clk),
"clock not found\n");
- ret = clk_prepare_enable(pc->clk);
- if (ret)
- return ret;
-
pc->chip.dev = &pdev->dev;
pc->chip.ops = &bcm2835_pwm_ops;
pc->chip.npwm = 2;
- platform_set_drvdata(pdev, pc);
-
- ret = pwmchip_add(&pc->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
if (ret < 0)
- goto add_fail;
+ return dev_err_probe(&pdev->dev, ret,
+ "failed to add pwmchip\n");
return 0;
+}
+
+static int bcm2835_pwm_suspend(struct device *dev)
+{
+ struct bcm2835_pwm *pc = dev_get_drvdata(dev);
-add_fail:
clk_disable_unprepare(pc->clk);
- return ret;
+
+ return 0;
}
-static void bcm2835_pwm_remove(struct platform_device *pdev)
+static int bcm2835_pwm_resume(struct device *dev)
{
- struct bcm2835_pwm *pc = platform_get_drvdata(pdev);
-
- pwmchip_remove(&pc->chip);
+ struct bcm2835_pwm *pc = dev_get_drvdata(dev);
- clk_disable_unprepare(pc->clk);
+ return clk_prepare_enable(pc->clk);
}
+static DEFINE_SIMPLE_DEV_PM_OPS(bcm2835_pwm_pm_ops, bcm2835_pwm_suspend,
+ bcm2835_pwm_resume);
+
static const struct of_device_id bcm2835_pwm_of_match[] = {
{ .compatible = "brcm,bcm2835-pwm", },
{ /* sentinel */ }
@@ -192,9 +192,9 @@ static struct platform_driver bcm2835_pwm_driver = {
.driver = {
.name = "bcm2835-pwm",
.of_match_table = bcm2835_pwm_of_match,
+ .pm = pm_ptr(&bcm2835_pwm_pm_ops),
},
.probe = bcm2835_pwm_probe,
- .remove_new = bcm2835_pwm_remove,
};
module_platform_driver(bcm2835_pwm_driver);
diff --git a/drivers/pwm/pwm-berlin.c b/drivers/pwm/pwm-berlin.c
index 0971c666afd1..ba2d79991769 100644
--- a/drivers/pwm/pwm-berlin.c
+++ b/drivers/pwm/pwm-berlin.c
@@ -39,6 +39,8 @@
#define BERLIN_PWM_TCNT 0xc
#define BERLIN_PWM_MAX_TCNT 65535
+#define BERLIN_PWM_NUMPWMS 4
+
struct berlin_pwm_channel {
u32 enable;
u32 ctrl;
@@ -50,6 +52,7 @@ struct berlin_pwm_chip {
struct pwm_chip chip;
struct clk *clk;
void __iomem *base;
+ struct berlin_pwm_channel channel[BERLIN_PWM_NUMPWMS];
};
static inline struct berlin_pwm_chip *to_berlin_pwm_chip(struct pwm_chip *chip)
@@ -70,24 +73,6 @@ static inline void berlin_pwm_writel(struct berlin_pwm_chip *bpc,
writel_relaxed(value, bpc->base + channel * 0x10 + offset);
}
-static int berlin_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- struct berlin_pwm_channel *channel;
-
- channel = kzalloc(sizeof(*channel), GFP_KERNEL);
- if (!channel)
- return -ENOMEM;
-
- return pwm_set_chip_data(pwm, channel);
-}
-
-static void berlin_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- struct berlin_pwm_channel *channel = pwm_get_chip_data(pwm);
-
- kfree(channel);
-}
-
static int berlin_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
u64 duty_ns, u64 period_ns)
{
@@ -202,10 +187,7 @@ static int berlin_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
}
static const struct pwm_ops berlin_pwm_ops = {
- .request = berlin_pwm_request,
- .free = berlin_pwm_free,
.apply = berlin_pwm_apply,
- .owner = THIS_MODULE,
};
static const struct of_device_id berlin_pwm_match[] = {
@@ -227,39 +209,23 @@ static int berlin_pwm_probe(struct platform_device *pdev)
if (IS_ERR(bpc->base))
return PTR_ERR(bpc->base);
- bpc->clk = devm_clk_get(&pdev->dev, NULL);
+ bpc->clk = devm_clk_get_enabled(&pdev->dev, NULL);
if (IS_ERR(bpc->clk))
return PTR_ERR(bpc->clk);
- ret = clk_prepare_enable(bpc->clk);
- if (ret)
- return ret;
-
bpc->chip.dev = &pdev->dev;
bpc->chip.ops = &berlin_pwm_ops;
- bpc->chip.npwm = 4;
+ bpc->chip.npwm = BERLIN_PWM_NUMPWMS;
- ret = pwmchip_add(&bpc->chip);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
- clk_disable_unprepare(bpc->clk);
- return ret;
- }
+ ret = devm_pwmchip_add(&pdev->dev, &bpc->chip);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
platform_set_drvdata(pdev, bpc);
return 0;
}
-static void berlin_pwm_remove(struct platform_device *pdev)
-{
- struct berlin_pwm_chip *bpc = platform_get_drvdata(pdev);
-
- pwmchip_remove(&bpc->chip);
-
- clk_disable_unprepare(bpc->clk);
-}
-
#ifdef CONFIG_PM_SLEEP
static int berlin_pwm_suspend(struct device *dev)
{
@@ -267,11 +233,7 @@ static int berlin_pwm_suspend(struct device *dev)
unsigned int i;
for (i = 0; i < bpc->chip.npwm; i++) {
- struct berlin_pwm_channel *channel;
-
- channel = pwm_get_chip_data(&bpc->chip.pwms[i]);
- if (!channel)
- continue;
+ struct berlin_pwm_channel *channel = &bpc->channel[i];
channel->enable = berlin_pwm_readl(bpc, i, BERLIN_PWM_ENABLE);
channel->ctrl = berlin_pwm_readl(bpc, i, BERLIN_PWM_CONTROL);
@@ -295,11 +257,7 @@ static int berlin_pwm_resume(struct device *dev)
return ret;
for (i = 0; i < bpc->chip.npwm; i++) {
- struct berlin_pwm_channel *channel;
-
- channel = pwm_get_chip_data(&bpc->chip.pwms[i]);
- if (!channel)
- continue;
+ struct berlin_pwm_channel *channel = &bpc->channel[i];
berlin_pwm_writel(bpc, i, channel->ctrl, BERLIN_PWM_CONTROL);
berlin_pwm_writel(bpc, i, channel->duty, BERLIN_PWM_DUTY);
@@ -316,7 +274,6 @@ static SIMPLE_DEV_PM_OPS(berlin_pwm_pm_ops, berlin_pwm_suspend,
static struct platform_driver berlin_pwm_driver = {
.probe = berlin_pwm_probe,
- .remove_new = berlin_pwm_remove,
.driver = {
.name = "berlin-pwm",
.of_match_table = berlin_pwm_match,
diff --git a/drivers/pwm/pwm-brcmstb.c b/drivers/pwm/pwm-brcmstb.c
index a3faa9a3de7c..b723c2d4f485 100644
--- a/drivers/pwm/pwm-brcmstb.c
+++ b/drivers/pwm/pwm-brcmstb.c
@@ -220,7 +220,6 @@ static int brcmstb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops brcmstb_pwm_ops = {
.apply = brcmstb_pwm_apply,
- .owner = THIS_MODULE,
};
static const struct of_device_id brcmstb_pwm_of_match[] = {
@@ -238,17 +237,10 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
if (!p)
return -ENOMEM;
- p->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(p->clk)) {
- dev_err(&pdev->dev, "failed to obtain clock\n");
- return PTR_ERR(p->clk);
- }
-
- ret = clk_prepare_enable(p->clk);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to enable clock: %d\n", ret);
- return ret;
- }
+ p->clk = devm_clk_get_enabled(&pdev->dev, NULL);
+ if (IS_ERR(p->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(p->clk),
+ "failed to obtain clock\n");
platform_set_drvdata(pdev, p);
@@ -257,30 +249,14 @@ static int brcmstb_pwm_probe(struct platform_device *pdev)
p->chip.npwm = 2;
p->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(p->base)) {
- ret = PTR_ERR(p->base);
- goto out_clk;
- }
+ if (IS_ERR(p->base))
+ return PTR_ERR(p->base);
- ret = pwmchip_add(&p->chip);
- if (ret) {
- dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
- goto out_clk;
- }
+ ret = devm_pwmchip_add(&pdev->dev, &p->chip);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
return 0;
-
-out_clk:
- clk_disable_unprepare(p->clk);
- return ret;
-}
-
-static void brcmstb_pwm_remove(struct platform_device *pdev)
-{
- struct brcmstb_pwm *p = platform_get_drvdata(pdev);
-
- pwmchip_remove(&p->chip);
- clk_disable_unprepare(p->clk);
}
#ifdef CONFIG_PM_SLEEP
@@ -288,7 +264,7 @@ static int brcmstb_pwm_suspend(struct device *dev)
{
struct brcmstb_pwm *p = dev_get_drvdata(dev);
- clk_disable(p->clk);
+ clk_disable_unprepare(p->clk);
return 0;
}
@@ -297,9 +273,7 @@ static int brcmstb_pwm_resume(struct device *dev)
{
struct brcmstb_pwm *p = dev_get_drvdata(dev);
- clk_enable(p->clk);
-
- return 0;
+ return clk_prepare_enable(p->clk);
}
#endif
@@ -308,7 +282,6 @@ static SIMPLE_DEV_PM_OPS(brcmstb_pwm_pm_ops, brcmstb_pwm_suspend,
static struct platform_driver brcmstb_pwm_driver = {
.probe = brcmstb_pwm_probe,
- .remove_new = brcmstb_pwm_remove,
.driver = {
.name = "pwm-brcmstb",
.of_match_table = brcmstb_pwm_of_match,
diff --git a/drivers/pwm/pwm-clk.c b/drivers/pwm/pwm-clk.c
index 0ee4d2aee4df..9dd88b386907 100644
--- a/drivers/pwm/pwm-clk.c
+++ b/drivers/pwm/pwm-clk.c
@@ -77,7 +77,6 @@ static int pwm_clk_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops pwm_clk_ops = {
.apply = pwm_clk_apply,
- .owner = THIS_MODULE,
};
static int pwm_clk_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-clps711x.c b/drivers/pwm/pwm-clps711x.c
index b0d91142da8d..42179b3f7ec3 100644
--- a/drivers/pwm/pwm-clps711x.c
+++ b/drivers/pwm/pwm-clps711x.c
@@ -72,7 +72,6 @@ static int clps711x_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops clps711x_pwm_ops = {
.request = clps711x_pwm_request,
.apply = clps711x_pwm_apply,
- .owner = THIS_MODULE,
};
static struct pwm_device *clps711x_pwm_xlate(struct pwm_chip *chip,
diff --git a/drivers/pwm/pwm-crc.c b/drivers/pwm/pwm-crc.c
index b9f063dc6b5f..2b0b659eee97 100644
--- a/drivers/pwm/pwm-crc.c
+++ b/drivers/pwm/pwm-crc.c
@@ -184,5 +184,8 @@ static struct platform_driver crystalcove_pwm_driver = {
.name = "crystal_cove_pwm",
},
};
+module_platform_driver(crystalcove_pwm_driver);
-builtin_platform_driver(crystalcove_pwm_driver);
+MODULE_ALIAS("platform:crystal_cove_pwm");
+MODULE_DESCRIPTION("Intel Crystalcove (CRC) PWM support");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pwm/pwm-cros-ec.c b/drivers/pwm/pwm-cros-ec.c
index baaac0c33aa0..4fbd23e4ef69 100644
--- a/drivers/pwm/pwm-cros-ec.c
+++ b/drivers/pwm/pwm-cros-ec.c
@@ -22,12 +22,14 @@
* @ec: Pointer to EC device
* @chip: PWM controller chip
* @use_pwm_type: Use PWM types instead of generic channels
+ * @channel: array with per-channel data
*/
struct cros_ec_pwm_device {
struct device *dev;
struct cros_ec_device *ec;
struct pwm_chip chip;
bool use_pwm_type;
+ struct cros_ec_pwm *channel;
};
/**
@@ -43,26 +45,6 @@ static inline struct cros_ec_pwm_device *pwm_to_cros_ec_pwm(struct pwm_chip *chi
return container_of(chip, struct cros_ec_pwm_device, chip);
}
-static int cros_ec_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- struct cros_ec_pwm *channel;
-
- channel = kzalloc(sizeof(*channel), GFP_KERNEL);
- if (!channel)
- return -ENOMEM;
-
- pwm_set_chip_data(pwm, channel);
-
- return 0;
-}
-
-static void cros_ec_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- struct cros_ec_pwm *channel = pwm_get_chip_data(pwm);
-
- kfree(channel);
-}
-
static int cros_ec_dt_type_to_pwm_type(u8 dt_index, u8 *pwm_type)
{
switch (dt_index) {
@@ -158,7 +140,7 @@ static int cros_ec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
const struct pwm_state *state)
{
struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip);
- struct cros_ec_pwm *channel = pwm_get_chip_data(pwm);
+ struct cros_ec_pwm *channel = &ec_pwm->channel[pwm->hwpwm];
u16 duty_cycle;
int ret;
@@ -188,7 +170,7 @@ static int cros_ec_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
struct pwm_state *state)
{
struct cros_ec_pwm_device *ec_pwm = pwm_to_cros_ec_pwm(chip);
- struct cros_ec_pwm *channel = pwm_get_chip_data(pwm);
+ struct cros_ec_pwm *channel = &ec_pwm->channel[pwm->hwpwm];
int ret;
ret = cros_ec_pwm_get_duty(ec_pwm, pwm->hwpwm);
@@ -237,11 +219,8 @@ cros_ec_pwm_xlate(struct pwm_chip *chip, const struct of_phandle_args *args)
}
static const struct pwm_ops cros_ec_pwm_ops = {
- .request = cros_ec_pwm_request,
- .free = cros_ec_pwm_free,
.get_state = cros_ec_pwm_get_state,
.apply = cros_ec_pwm_apply,
- .owner = THIS_MODULE,
};
/*
@@ -286,10 +265,8 @@ static int cros_ec_pwm_probe(struct platform_device *pdev)
struct pwm_chip *chip;
int ret;
- if (!ec) {
- dev_err(dev, "no parent EC device\n");
- return -EINVAL;
- }
+ if (!ec)
+ return dev_err_probe(dev, -EINVAL, "no parent EC device\n");
ec_pwm = devm_kzalloc(dev, sizeof(*ec_pwm), GFP_KERNEL);
if (!ec_pwm)
@@ -310,32 +287,23 @@ static int cros_ec_pwm_probe(struct platform_device *pdev)
chip->npwm = CROS_EC_PWM_DT_COUNT;
} else {
ret = cros_ec_num_pwms(ec_pwm);
- if (ret < 0) {
- dev_err(dev, "Couldn't find PWMs: %d\n", ret);
- return ret;
- }
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "Couldn't find PWMs\n");
chip->npwm = ret;
}
- dev_dbg(dev, "Probed %u PWMs\n", chip->npwm);
-
- ret = pwmchip_add(chip);
- if (ret < 0) {
- dev_err(dev, "cannot register PWM: %d\n", ret);
- return ret;
- }
-
- platform_set_drvdata(pdev, ec_pwm);
+ ec_pwm->channel = devm_kcalloc(dev, chip->npwm, sizeof(*ec_pwm->channel),
+ GFP_KERNEL);
+ if (!ec_pwm->channel)
+ return -ENOMEM;
- return ret;
-}
+ dev_dbg(dev, "Probed %u PWMs\n", chip->npwm);
-static void cros_ec_pwm_remove(struct platform_device *dev)
-{
- struct cros_ec_pwm_device *ec_pwm = platform_get_drvdata(dev);
- struct pwm_chip *chip = &ec_pwm->chip;
+ ret = devm_pwmchip_add(dev, chip);
+ if (ret < 0)
+ return dev_err_probe(dev, ret, "cannot register PWM\n");
- pwmchip_remove(chip);
+ return 0;
}
#ifdef CONFIG_OF
@@ -349,7 +317,6 @@ MODULE_DEVICE_TABLE(of, cros_ec_pwm_of_match);
static struct platform_driver cros_ec_pwm_driver = {
.probe = cros_ec_pwm_probe,
- .remove_new = cros_ec_pwm_remove,
.driver = {
.name = "cros-ec-pwm",
.of_match_table = of_match_ptr(cros_ec_pwm_of_match),
diff --git a/drivers/pwm/pwm-dwc-core.c b/drivers/pwm/pwm-dwc-core.c
new file mode 100644
index 000000000000..ea63dd741f5c
--- /dev/null
+++ b/drivers/pwm/pwm-dwc-core.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DesignWare PWM Controller driver core
+ *
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * Author: Felipe Balbi (Intel)
+ * Author: Jarkko Nikula <[email protected]>
+ * Author: Raymond Tan <[email protected]>
+ */
+
+#define DEFAULT_SYMBOL_NAMESPACE dwc_pwm
+
+#include <linux/bitops.h>
+#include <linux/export.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pm_runtime.h>
+#include <linux/pwm.h>
+
+#include "pwm-dwc.h"
+
+static void __dwc_pwm_set_enable(struct dwc_pwm *dwc, int pwm, int enabled)
+{
+ u32 reg;
+
+ reg = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm));
+
+ if (enabled)
+ reg |= DWC_TIM_CTRL_EN;
+ else
+ reg &= ~DWC_TIM_CTRL_EN;
+
+ dwc_pwm_writel(dwc, reg, DWC_TIM_CTRL(pwm));
+}
+
+static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc,
+ struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ u64 tmp;
+ u32 ctrl;
+ u32 high;
+ u32 low;
+
+ /*
+ * Calculate width of low and high period in terms of input clock
+ * periods and check are the result within HW limits between 1 and
+ * 2^32 periods.
+ */
+ tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, dwc->clk_ns);
+ if (tmp < 1 || tmp > (1ULL << 32))
+ return -ERANGE;
+ low = tmp - 1;
+
+ tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle,
+ dwc->clk_ns);
+ if (tmp < 1 || tmp > (1ULL << 32))
+ return -ERANGE;
+ high = tmp - 1;
+
+ /*
+ * Specification says timer usage flow is to disable timer, then
+ * program it followed by enable. It also says Load Count is loaded
+ * into timer after it is enabled - either after a disable or
+ * a reset. Based on measurements it happens also without disable
+ * whenever Load Count is updated. But follow the specification.
+ */
+ __dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
+
+ /*
+ * Write Load Count and Load Count 2 registers. Former defines the
+ * width of low period and latter the width of high period in terms
+ * multiple of input clock periods:
+ * Width = ((Count + 1) * input clock period).
+ */
+ dwc_pwm_writel(dwc, low, DWC_TIM_LD_CNT(pwm->hwpwm));
+ dwc_pwm_writel(dwc, high, DWC_TIM_LD_CNT2(pwm->hwpwm));
+
+ /*
+ * Set user-defined mode, timer reloads from Load Count registers
+ * when it counts down to 0.
+ * Set PWM mode, it makes output to toggle and width of low and high
+ * periods are set by Load Count registers.
+ */
+ ctrl = DWC_TIM_CTRL_MODE_USER | DWC_TIM_CTRL_PWM;
+ dwc_pwm_writel(dwc, ctrl, DWC_TIM_CTRL(pwm->hwpwm));
+
+ /*
+ * Enable timer. Output starts from low period.
+ */
+ __dwc_pwm_set_enable(dwc, pwm->hwpwm, state->enabled);
+
+ return 0;
+}
+
+static int dwc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ struct dwc_pwm *dwc = to_dwc_pwm(chip);
+
+ if (state->polarity != PWM_POLARITY_INVERSED)
+ return -EINVAL;
+
+ if (state->enabled) {
+ if (!pwm->state.enabled)
+ pm_runtime_get_sync(chip->dev);
+ return __dwc_pwm_configure_timer(dwc, pwm, state);
+ } else {
+ if (pwm->state.enabled) {
+ __dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
+ pm_runtime_put_sync(chip->dev);
+ }
+ }
+
+ return 0;
+}
+
+static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ struct dwc_pwm *dwc = to_dwc_pwm(chip);
+ u64 duty, period;
+ u32 ctrl, ld, ld2;
+
+ pm_runtime_get_sync(chip->dev);
+
+ ctrl = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm->hwpwm));
+ ld = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm));
+ ld2 = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm));
+
+ state->enabled = !!(ctrl & DWC_TIM_CTRL_EN);
+
+ /*
+ * If we're not in PWM, technically the output is a 50-50
+ * based on the timer load-count only.
+ */
+ if (ctrl & DWC_TIM_CTRL_PWM) {
+ duty = (ld + 1) * dwc->clk_ns;
+ period = (ld2 + 1) * dwc->clk_ns;
+ period += duty;
+ } else {
+ duty = (ld + 1) * dwc->clk_ns;
+ period = duty * 2;
+ }
+
+ state->polarity = PWM_POLARITY_INVERSED;
+ state->period = period;
+ state->duty_cycle = duty;
+
+ pm_runtime_put_sync(chip->dev);
+
+ return 0;
+}
+
+static const struct pwm_ops dwc_pwm_ops = {
+ .apply = dwc_pwm_apply,
+ .get_state = dwc_pwm_get_state,
+};
+
+struct dwc_pwm *dwc_pwm_alloc(struct device *dev)
+{
+ struct dwc_pwm *dwc;
+
+ dwc = devm_kzalloc(dev, sizeof(*dwc), GFP_KERNEL);
+ if (!dwc)
+ return NULL;
+
+ dwc->clk_ns = 10;
+ dwc->chip.dev = dev;
+ dwc->chip.ops = &dwc_pwm_ops;
+ dwc->chip.npwm = DWC_TIMERS_TOTAL;
+
+ dev_set_drvdata(dev, dwc);
+ return dwc;
+}
+EXPORT_SYMBOL_GPL(dwc_pwm_alloc);
+
+MODULE_AUTHOR("Felipe Balbi (Intel)");
+MODULE_AUTHOR("Jarkko Nikula <[email protected]>");
+MODULE_AUTHOR("Raymond Tan <[email protected]>");
+MODULE_DESCRIPTION("DesignWare PWM Controller");
+MODULE_LICENSE("GPL");
diff --git a/drivers/pwm/pwm-dwc.c b/drivers/pwm/pwm-dwc.c
index 3bbb26c862c3..bd9cadb497d7 100644
--- a/drivers/pwm/pwm-dwc.c
+++ b/drivers/pwm/pwm-dwc.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * DesignWare PWM Controller driver
+ * DesignWare PWM Controller driver (PCI part)
*
* Copyright (C) 2018-2020 Intel Corporation
*
@@ -13,6 +13,8 @@
* periods are one or more input clock periods long.
*/
+#define DEFAULT_MOUDLE_NAMESPACE dwc_pwm
+
#include <linux/bitops.h>
#include <linux/export.h>
#include <linux/kernel.h>
@@ -21,198 +23,7 @@
#include <linux/pm_runtime.h>
#include <linux/pwm.h>
-#define DWC_TIM_LD_CNT(n) ((n) * 0x14)
-#define DWC_TIM_LD_CNT2(n) (((n) * 4) + 0xb0)
-#define DWC_TIM_CUR_VAL(n) (((n) * 0x14) + 0x04)
-#define DWC_TIM_CTRL(n) (((n) * 0x14) + 0x08)
-#define DWC_TIM_EOI(n) (((n) * 0x14) + 0x0c)
-#define DWC_TIM_INT_STS(n) (((n) * 0x14) + 0x10)
-
-#define DWC_TIMERS_INT_STS 0xa0
-#define DWC_TIMERS_EOI 0xa4
-#define DWC_TIMERS_RAW_INT_STS 0xa8
-#define DWC_TIMERS_COMP_VERSION 0xac
-
-#define DWC_TIMERS_TOTAL 8
-#define DWC_CLK_PERIOD_NS 10
-
-/* Timer Control Register */
-#define DWC_TIM_CTRL_EN BIT(0)
-#define DWC_TIM_CTRL_MODE BIT(1)
-#define DWC_TIM_CTRL_MODE_FREE (0 << 1)
-#define DWC_TIM_CTRL_MODE_USER (1 << 1)
-#define DWC_TIM_CTRL_INT_MASK BIT(2)
-#define DWC_TIM_CTRL_PWM BIT(3)
-
-struct dwc_pwm_ctx {
- u32 cnt;
- u32 cnt2;
- u32 ctrl;
-};
-
-struct dwc_pwm {
- struct pwm_chip chip;
- void __iomem *base;
- struct dwc_pwm_ctx ctx[DWC_TIMERS_TOTAL];
-};
-#define to_dwc_pwm(p) (container_of((p), struct dwc_pwm, chip))
-
-static inline u32 dwc_pwm_readl(struct dwc_pwm *dwc, u32 offset)
-{
- return readl(dwc->base + offset);
-}
-
-static inline void dwc_pwm_writel(struct dwc_pwm *dwc, u32 value, u32 offset)
-{
- writel(value, dwc->base + offset);
-}
-
-static void __dwc_pwm_set_enable(struct dwc_pwm *dwc, int pwm, int enabled)
-{
- u32 reg;
-
- reg = dwc_pwm_readl(dwc, DWC_TIM_CTRL(pwm));
-
- if (enabled)
- reg |= DWC_TIM_CTRL_EN;
- else
- reg &= ~DWC_TIM_CTRL_EN;
-
- dwc_pwm_writel(dwc, reg, DWC_TIM_CTRL(pwm));
-}
-
-static int __dwc_pwm_configure_timer(struct dwc_pwm *dwc,
- struct pwm_device *pwm,
- const struct pwm_state *state)
-{
- u64 tmp;
- u32 ctrl;
- u32 high;
- u32 low;
-
- /*
- * Calculate width of low and high period in terms of input clock
- * periods and check are the result within HW limits between 1 and
- * 2^32 periods.
- */
- tmp = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, DWC_CLK_PERIOD_NS);
- if (tmp < 1 || tmp > (1ULL << 32))
- return -ERANGE;
- low = tmp - 1;
-
- tmp = DIV_ROUND_CLOSEST_ULL(state->period - state->duty_cycle,
- DWC_CLK_PERIOD_NS);
- if (tmp < 1 || tmp > (1ULL << 32))
- return -ERANGE;
- high = tmp - 1;
-
- /*
- * Specification says timer usage flow is to disable timer, then
- * program it followed by enable. It also says Load Count is loaded
- * into timer after it is enabled - either after a disable or
- * a reset. Based on measurements it happens also without disable
- * whenever Load Count is updated. But follow the specification.
- */
- __dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
-
- /*
- * Write Load Count and Load Count 2 registers. Former defines the
- * width of low period and latter the width of high period in terms
- * multiple of input clock periods:
- * Width = ((Count + 1) * input clock period).
- */
- dwc_pwm_writel(dwc, low, DWC_TIM_LD_CNT(pwm->hwpwm));
- dwc_pwm_writel(dwc, high, DWC_TIM_LD_CNT2(pwm->hwpwm));
-
- /*
- * Set user-defined mode, timer reloads from Load Count registers
- * when it counts down to 0.
- * Set PWM mode, it makes output to toggle and width of low and high
- * periods are set by Load Count registers.
- */
- ctrl = DWC_TIM_CTRL_MODE_USER | DWC_TIM_CTRL_PWM;
- dwc_pwm_writel(dwc, ctrl, DWC_TIM_CTRL(pwm->hwpwm));
-
- /*
- * Enable timer. Output starts from low period.
- */
- __dwc_pwm_set_enable(dwc, pwm->hwpwm, state->enabled);
-
- return 0;
-}
-
-static int dwc_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
- const struct pwm_state *state)
-{
- struct dwc_pwm *dwc = to_dwc_pwm(chip);
-
- if (state->polarity != PWM_POLARITY_INVERSED)
- return -EINVAL;
-
- if (state->enabled) {
- if (!pwm->state.enabled)
- pm_runtime_get_sync(chip->dev);
- return __dwc_pwm_configure_timer(dwc, pwm, state);
- } else {
- if (pwm->state.enabled) {
- __dwc_pwm_set_enable(dwc, pwm->hwpwm, false);
- pm_runtime_put_sync(chip->dev);
- }
- }
-
- return 0;
-}
-
-static int dwc_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
- struct pwm_state *state)
-{
- struct dwc_pwm *dwc = to_dwc_pwm(chip);
- u64 duty, period;
-
- pm_runtime_get_sync(chip->dev);
-
- state->enabled = !!(dwc_pwm_readl(dwc,
- DWC_TIM_CTRL(pwm->hwpwm)) & DWC_TIM_CTRL_EN);
-
- duty = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT(pwm->hwpwm));
- duty += 1;
- duty *= DWC_CLK_PERIOD_NS;
- state->duty_cycle = duty;
-
- period = dwc_pwm_readl(dwc, DWC_TIM_LD_CNT2(pwm->hwpwm));
- period += 1;
- period *= DWC_CLK_PERIOD_NS;
- period += duty;
- state->period = period;
-
- state->polarity = PWM_POLARITY_INVERSED;
-
- pm_runtime_put_sync(chip->dev);
-
- return 0;
-}
-
-static const struct pwm_ops dwc_pwm_ops = {
- .apply = dwc_pwm_apply,
- .get_state = dwc_pwm_get_state,
- .owner = THIS_MODULE,
-};
-
-static struct dwc_pwm *dwc_pwm_alloc(struct device *dev)
-{
- struct dwc_pwm *dwc;
-
- dwc = devm_kzalloc(dev, sizeof(*dwc), GFP_KERNEL);
- if (!dwc)
- return NULL;
-
- dwc->chip.dev = dev;
- dwc->chip.ops = &dwc_pwm_ops;
- dwc->chip.npwm = DWC_TIMERS_TOTAL;
-
- dev_set_drvdata(dev, dwc);
- return dwc;
-}
+#include "pwm-dwc.h"
static int dwc_pwm_probe(struct pci_dev *pci, const struct pci_device_id *id)
{
diff --git a/drivers/pwm/pwm-dwc.h b/drivers/pwm/pwm-dwc.h
new file mode 100644
index 000000000000..64795247c54c
--- /dev/null
+++ b/drivers/pwm/pwm-dwc.h
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DesignWare PWM Controller driver
+ *
+ * Copyright (C) 2018-2020 Intel Corporation
+ *
+ * Author: Felipe Balbi (Intel)
+ * Author: Jarkko Nikula <[email protected]>
+ * Author: Raymond Tan <[email protected]>
+ */
+
+MODULE_IMPORT_NS(dwc_pwm);
+
+#define DWC_TIM_LD_CNT(n) ((n) * 0x14)
+#define DWC_TIM_LD_CNT2(n) (((n) * 4) + 0xb0)
+#define DWC_TIM_CUR_VAL(n) (((n) * 0x14) + 0x04)
+#define DWC_TIM_CTRL(n) (((n) * 0x14) + 0x08)
+#define DWC_TIM_EOI(n) (((n) * 0x14) + 0x0c)
+#define DWC_TIM_INT_STS(n) (((n) * 0x14) + 0x10)
+
+#define DWC_TIMERS_INT_STS 0xa0
+#define DWC_TIMERS_EOI 0xa4
+#define DWC_TIMERS_RAW_INT_STS 0xa8
+#define DWC_TIMERS_COMP_VERSION 0xac
+
+#define DWC_TIMERS_TOTAL 8
+
+/* Timer Control Register */
+#define DWC_TIM_CTRL_EN BIT(0)
+#define DWC_TIM_CTRL_MODE BIT(1)
+#define DWC_TIM_CTRL_MODE_FREE (0 << 1)
+#define DWC_TIM_CTRL_MODE_USER (1 << 1)
+#define DWC_TIM_CTRL_INT_MASK BIT(2)
+#define DWC_TIM_CTRL_PWM BIT(3)
+
+struct dwc_pwm_ctx {
+ u32 cnt;
+ u32 cnt2;
+ u32 ctrl;
+};
+
+struct dwc_pwm {
+ struct pwm_chip chip;
+ void __iomem *base;
+ unsigned int clk_ns;
+ struct dwc_pwm_ctx ctx[DWC_TIMERS_TOTAL];
+};
+#define to_dwc_pwm(p) (container_of((p), struct dwc_pwm, chip))
+
+static inline u32 dwc_pwm_readl(struct dwc_pwm *dwc, u32 offset)
+{
+ return readl(dwc->base + offset);
+}
+
+static inline void dwc_pwm_writel(struct dwc_pwm *dwc, u32 value, u32 offset)
+{
+ writel(value, dwc->base + offset);
+}
+
+extern struct dwc_pwm *dwc_pwm_alloc(struct device *dev);
diff --git a/drivers/pwm/pwm-ep93xx.c b/drivers/pwm/pwm-ep93xx.c
index c45a75e65c86..51e072572a87 100644
--- a/drivers/pwm/pwm-ep93xx.c
+++ b/drivers/pwm/pwm-ep93xx.c
@@ -159,7 +159,6 @@ static const struct pwm_ops ep93xx_pwm_ops = {
.request = ep93xx_pwm_request,
.free = ep93xx_pwm_free,
.apply = ep93xx_pwm_apply,
- .owner = THIS_MODULE,
};
static int ep93xx_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-fsl-ftm.c b/drivers/pwm/pwm-fsl-ftm.c
index b7c6045c5d08..d1b6d1aa4773 100644
--- a/drivers/pwm/pwm-fsl-ftm.c
+++ b/drivers/pwm/pwm-fsl-ftm.c
@@ -350,7 +350,6 @@ static const struct pwm_ops fsl_pwm_ops = {
.request = fsl_pwm_request,
.free = fsl_pwm_free,
.apply = fsl_pwm_apply,
- .owner = THIS_MODULE,
};
static int fsl_pwm_init(struct fsl_pwm_chip *fpc)
diff --git a/drivers/pwm/pwm-hibvt.c b/drivers/pwm/pwm-hibvt.c
index f7ba6fe9a349..c435776e2f78 100644
--- a/drivers/pwm/pwm-hibvt.c
+++ b/drivers/pwm/pwm-hibvt.c
@@ -185,7 +185,6 @@ static const struct pwm_ops hibvt_pwm_ops = {
.get_state = hibvt_pwm_get_state,
.apply = hibvt_pwm_apply,
- .owner = THIS_MODULE,
};
static int hibvt_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-img.c b/drivers/pwm/pwm-img.c
index 326af85888e7..116fa060e302 100644
--- a/drivers/pwm/pwm-img.c
+++ b/drivers/pwm/pwm-img.c
@@ -208,7 +208,6 @@ static int img_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops img_pwm_ops = {
.apply = img_pwm_apply,
- .owner = THIS_MODULE,
};
static const struct img_pwm_soc_data pistachio_pwm = {
diff --git a/drivers/pwm/pwm-imx-tpm.c b/drivers/pwm/pwm-imx-tpm.c
index 98ab65c89685..dc6aafeb9f7b 100644
--- a/drivers/pwm/pwm-imx-tpm.c
+++ b/drivers/pwm/pwm-imx-tpm.c
@@ -332,7 +332,6 @@ static const struct pwm_ops imx_tpm_pwm_ops = {
.free = pwm_imx_tpm_free,
.get_state = pwm_imx_tpm_get_state,
.apply = pwm_imx_tpm_apply,
- .owner = THIS_MODULE,
};
static int pwm_imx_tpm_probe(struct platform_device *pdev)
@@ -351,18 +350,11 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev)
if (IS_ERR(tpm->base))
return PTR_ERR(tpm->base);
- tpm->clk = devm_clk_get(&pdev->dev, NULL);
+ tpm->clk = devm_clk_get_enabled(&pdev->dev, NULL);
if (IS_ERR(tpm->clk))
return dev_err_probe(&pdev->dev, PTR_ERR(tpm->clk),
"failed to get PWM clock\n");
- ret = clk_prepare_enable(tpm->clk);
- if (ret) {
- dev_err(&pdev->dev,
- "failed to prepare or enable clock: %d\n", ret);
- return ret;
- }
-
tpm->chip.dev = &pdev->dev;
tpm->chip.ops = &imx_tpm_pwm_ops;
@@ -372,22 +364,11 @@ static int pwm_imx_tpm_probe(struct platform_device *pdev)
mutex_init(&tpm->lock);
- ret = pwmchip_add(&tpm->chip);
- if (ret) {
- dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
- clk_disable_unprepare(tpm->clk);
- }
-
- return ret;
-}
-
-static void pwm_imx_tpm_remove(struct platform_device *pdev)
-{
- struct imx_tpm_pwm_chip *tpm = platform_get_drvdata(pdev);
-
- pwmchip_remove(&tpm->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &tpm->chip);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
- clk_disable_unprepare(tpm->clk);
+ return 0;
}
static int __maybe_unused pwm_imx_tpm_suspend(struct device *dev)
@@ -437,7 +418,6 @@ static struct platform_driver imx_tpm_pwm_driver = {
.pm = &imx_tpm_pwm_pm,
},
.probe = pwm_imx_tpm_probe,
- .remove_new = pwm_imx_tpm_remove,
};
module_platform_driver(imx_tpm_pwm_driver);
diff --git a/drivers/pwm/pwm-imx1.c b/drivers/pwm/pwm-imx1.c
index 0651983bed19..d175d895f22a 100644
--- a/drivers/pwm/pwm-imx1.c
+++ b/drivers/pwm/pwm-imx1.c
@@ -146,7 +146,6 @@ static int pwm_imx1_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops pwm_imx1_ops = {
.apply = pwm_imx1_apply,
- .owner = THIS_MODULE,
};
static const struct of_device_id pwm_imx1_dt_ids[] = {
diff --git a/drivers/pwm/pwm-imx27.c b/drivers/pwm/pwm-imx27.c
index 29a3089c534c..7d9bc43f12b0 100644
--- a/drivers/pwm/pwm-imx27.c
+++ b/drivers/pwm/pwm-imx27.c
@@ -296,7 +296,6 @@ static int pwm_imx27_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops pwm_imx27_ops = {
.apply = pwm_imx27_apply,
.get_state = pwm_imx27_get_state,
- .owner = THIS_MODULE,
};
static const struct of_device_id pwm_imx27_dt_ids[] = {
diff --git a/drivers/pwm/pwm-intel-lgm.c b/drivers/pwm/pwm-intel-lgm.c
index 0cd7dd548e82..54ecae7f937e 100644
--- a/drivers/pwm/pwm-intel-lgm.c
+++ b/drivers/pwm/pwm-intel-lgm.c
@@ -107,7 +107,6 @@ static int lgm_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops lgm_pwm_ops = {
.get_state = lgm_pwm_get_state,
.apply = lgm_pwm_apply,
- .owner = THIS_MODULE,
};
static void lgm_pwm_init(struct lgm_pwm_chip *pc)
diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c
index 47b3141135f3..378ab036edfe 100644
--- a/drivers/pwm/pwm-iqs620a.c
+++ b/drivers/pwm/pwm-iqs620a.c
@@ -166,7 +166,6 @@ static int iqs620_pwm_notifier(struct notifier_block *notifier,
static const struct pwm_ops iqs620_pwm_ops = {
.apply = iqs620_pwm_apply,
.get_state = iqs620_pwm_get_state,
- .owner = THIS_MODULE,
};
static void iqs620_pwm_notifier_unregister(void *context)
diff --git a/drivers/pwm/pwm-jz4740.c b/drivers/pwm/pwm-jz4740.c
index ef1293f2a897..e9375de60ad6 100644
--- a/drivers/pwm/pwm-jz4740.c
+++ b/drivers/pwm/pwm-jz4740.c
@@ -27,6 +27,7 @@ struct soc_info {
struct jz4740_pwm_chip {
struct pwm_chip chip;
struct regmap *map;
+ struct clk *clk[];
};
static inline struct jz4740_pwm_chip *to_jz4740(struct pwm_chip *chip)
@@ -70,14 +71,15 @@ static int jz4740_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
return err;
}
- pwm_set_chip_data(pwm, clk);
+ jz->clk[pwm->hwpwm] = clk;
return 0;
}
static void jz4740_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
{
- struct clk *clk = pwm_get_chip_data(pwm);
+ struct jz4740_pwm_chip *jz = to_jz4740(chip);
+ struct clk *clk = jz->clk[pwm->hwpwm];
clk_disable_unprepare(clk);
clk_put(clk);
@@ -121,9 +123,9 @@ static void jz4740_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
const struct pwm_state *state)
{
- struct jz4740_pwm_chip *jz4740 = to_jz4740(pwm->chip);
+ struct jz4740_pwm_chip *jz = to_jz4740(pwm->chip);
unsigned long long tmp = 0xffffull * NSEC_PER_SEC;
- struct clk *clk = pwm_get_chip_data(pwm);
+ struct clk *clk = jz->clk[pwm->hwpwm];
unsigned long period, duty;
long rate;
int err;
@@ -173,16 +175,16 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
}
/* Reset counter to 0 */
- regmap_write(jz4740->map, TCU_REG_TCNTc(pwm->hwpwm), 0);
+ regmap_write(jz->map, TCU_REG_TCNTc(pwm->hwpwm), 0);
/* Set duty */
- regmap_write(jz4740->map, TCU_REG_TDHRc(pwm->hwpwm), duty);
+ regmap_write(jz->map, TCU_REG_TDHRc(pwm->hwpwm), duty);
/* Set period */
- regmap_write(jz4740->map, TCU_REG_TDFRc(pwm->hwpwm), period);
+ regmap_write(jz->map, TCU_REG_TDFRc(pwm->hwpwm), period);
/* Set abrupt shutdown */
- regmap_set_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm),
+ regmap_set_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm),
TCU_TCSR_PWM_SD);
/*
@@ -199,10 +201,10 @@ static int jz4740_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
* state instead of its inactive state.
*/
if ((state->polarity == PWM_POLARITY_NORMAL) ^ state->enabled)
- regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm),
+ regmap_update_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm),
TCU_TCSR_PWM_INITL_HIGH, 0);
else
- regmap_update_bits(jz4740->map, TCU_REG_TCSRc(pwm->hwpwm),
+ regmap_update_bits(jz->map, TCU_REG_TCSRc(pwm->hwpwm),
TCU_TCSR_PWM_INITL_HIGH,
TCU_TCSR_PWM_INITL_HIGH);
@@ -216,34 +218,34 @@ static const struct pwm_ops jz4740_pwm_ops = {
.request = jz4740_pwm_request,
.free = jz4740_pwm_free,
.apply = jz4740_pwm_apply,
- .owner = THIS_MODULE,
};
static int jz4740_pwm_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct jz4740_pwm_chip *jz4740;
+ struct jz4740_pwm_chip *jz;
const struct soc_info *info;
info = device_get_match_data(dev);
if (!info)
return -EINVAL;
- jz4740 = devm_kzalloc(dev, sizeof(*jz4740), GFP_KERNEL);
- if (!jz4740)
+ jz = devm_kzalloc(dev, struct_size(jz, clk, info->num_pwms),
+ GFP_KERNEL);
+ if (!jz)
return -ENOMEM;
- jz4740->map = device_node_to_regmap(dev->parent->of_node);
- if (IS_ERR(jz4740->map)) {
- dev_err(dev, "regmap not found: %ld\n", PTR_ERR(jz4740->map));
- return PTR_ERR(jz4740->map);
+ jz->map = device_node_to_regmap(dev->parent->of_node);
+ if (IS_ERR(jz->map)) {
+ dev_err(dev, "regmap not found: %ld\n", PTR_ERR(jz->map));
+ return PTR_ERR(jz->map);
}
- jz4740->chip.dev = dev;
- jz4740->chip.ops = &jz4740_pwm_ops;
- jz4740->chip.npwm = info->num_pwms;
+ jz->chip.dev = dev;
+ jz->chip.ops = &jz4740_pwm_ops;
+ jz->chip.npwm = info->num_pwms;
- return devm_pwmchip_add(dev, &jz4740->chip);
+ return devm_pwmchip_add(dev, &jz->chip);
}
static const struct soc_info jz4740_soc_info = {
diff --git a/drivers/pwm/pwm-keembay.c b/drivers/pwm/pwm-keembay.c
index ac02d8bb4a0b..ac824ecc3f64 100644
--- a/drivers/pwm/pwm-keembay.c
+++ b/drivers/pwm/pwm-keembay.c
@@ -178,7 +178,6 @@ static int keembay_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
}
static const struct pwm_ops keembay_pwm_ops = {
- .owner = THIS_MODULE,
.apply = keembay_pwm_apply,
.get_state = keembay_pwm_get_state,
};
diff --git a/drivers/pwm/pwm-lp3943.c b/drivers/pwm/pwm-lp3943.c
index 4b133a17f4be..32350a357278 100644
--- a/drivers/pwm/pwm-lp3943.c
+++ b/drivers/pwm/pwm-lp3943.c
@@ -23,6 +23,7 @@ struct lp3943_pwm {
struct pwm_chip chip;
struct lp3943 *lp3943;
struct lp3943_platform_data *pdata;
+ struct lp3943_pwm_map pwm_map[LP3943_NUM_PWMS];
};
static inline struct lp3943_pwm *to_lp3943_pwm(struct pwm_chip *chip)
@@ -35,13 +36,9 @@ lp3943_pwm_request_map(struct lp3943_pwm *lp3943_pwm, int hwpwm)
{
struct lp3943_platform_data *pdata = lp3943_pwm->pdata;
struct lp3943 *lp3943 = lp3943_pwm->lp3943;
- struct lp3943_pwm_map *pwm_map;
+ struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[hwpwm];
int i, offset;
- pwm_map = kzalloc(sizeof(*pwm_map), GFP_KERNEL);
- if (!pwm_map)
- return ERR_PTR(-ENOMEM);
-
pwm_map->output = pdata->pwms[hwpwm]->output;
pwm_map->num_outputs = pdata->pwms[hwpwm]->num_outputs;
@@ -49,10 +46,8 @@ lp3943_pwm_request_map(struct lp3943_pwm *lp3943_pwm, int hwpwm)
offset = pwm_map->output[i];
/* Return an error if the pin is already assigned */
- if (test_and_set_bit(offset, &lp3943->pin_used)) {
- kfree(pwm_map);
+ if (test_and_set_bit(offset, &lp3943->pin_used))
return ERR_PTR(-EBUSY);
- }
}
return pwm_map;
@@ -67,7 +62,7 @@ static int lp3943_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
if (IS_ERR(pwm_map))
return PTR_ERR(pwm_map);
- return pwm_set_chip_data(pwm, pwm_map);
+ return 0;
}
static void lp3943_pwm_free_map(struct lp3943_pwm *lp3943_pwm,
@@ -80,14 +75,12 @@ static void lp3943_pwm_free_map(struct lp3943_pwm *lp3943_pwm,
offset = pwm_map->output[i];
clear_bit(offset, &lp3943->pin_used);
}
-
- kfree(pwm_map);
}
static void lp3943_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip);
- struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm);
+ struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm];
lp3943_pwm_free_map(lp3943_pwm, pwm_map);
}
@@ -159,7 +152,7 @@ static int lp3943_pwm_set_mode(struct lp3943_pwm *lp3943_pwm,
static int lp3943_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip);
- struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm);
+ struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm];
u8 val;
if (pwm->hwpwm == 0)
@@ -178,7 +171,7 @@ static int lp3943_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
static void lp3943_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct lp3943_pwm *lp3943_pwm = to_lp3943_pwm(chip);
- struct lp3943_pwm_map *pwm_map = pwm_get_chip_data(pwm);
+ struct lp3943_pwm_map *pwm_map = &lp3943_pwm->pwm_map[pwm->hwpwm];
/*
* LP3943 outputs are open-drain, so the pin should be configured
@@ -216,7 +209,6 @@ static const struct pwm_ops lp3943_pwm_ops = {
.request = lp3943_pwm_request,
.free = lp3943_pwm_free,
.apply = lp3943_pwm_apply,
- .owner = THIS_MODULE,
};
static int lp3943_pwm_parse_dt(struct device *dev,
diff --git a/drivers/pwm/pwm-lpc18xx-sct.c b/drivers/pwm/pwm-lpc18xx-sct.c
index 7a19a840bca5..ef7d0da137ed 100644
--- a/drivers/pwm/pwm-lpc18xx-sct.c
+++ b/drivers/pwm/pwm-lpc18xx-sct.c
@@ -341,7 +341,6 @@ static const struct pwm_ops lpc18xx_pwm_ops = {
.apply = lpc18xx_pwm_apply,
.request = lpc18xx_pwm_request,
.free = lpc18xx_pwm_free,
- .owner = THIS_MODULE,
};
static const struct of_device_id lpc18xx_pwm_of_match[] = {
diff --git a/drivers/pwm/pwm-lpc32xx.c b/drivers/pwm/pwm-lpc32xx.c
index 806f0bb3ad6d..78f664e41e6e 100644
--- a/drivers/pwm/pwm-lpc32xx.c
+++ b/drivers/pwm/pwm-lpc32xx.c
@@ -115,7 +115,6 @@ static int lpc32xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops lpc32xx_pwm_ops = {
.apply = lpc32xx_pwm_apply,
- .owner = THIS_MODULE,
};
static int lpc32xx_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-lpss.c b/drivers/pwm/pwm-lpss.c
index 23fe332b2394..a6ea3ce7e019 100644
--- a/drivers/pwm/pwm-lpss.c
+++ b/drivers/pwm/pwm-lpss.c
@@ -243,7 +243,6 @@ static int pwm_lpss_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops pwm_lpss_ops = {
.apply = pwm_lpss_apply,
.get_state = pwm_lpss_get_state,
- .owner = THIS_MODULE,
};
struct pwm_lpss_chip *devm_pwm_lpss_probe(struct device *dev, void __iomem *base,
diff --git a/drivers/pwm/pwm-mediatek.c b/drivers/pwm/pwm-mediatek.c
index 6adb0ed01906..373abfd25acb 100644
--- a/drivers/pwm/pwm-mediatek.c
+++ b/drivers/pwm/pwm-mediatek.c
@@ -229,7 +229,6 @@ static int pwm_mediatek_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops pwm_mediatek_ops = {
.apply = pwm_mediatek_apply,
- .owner = THIS_MODULE,
};
static int pwm_mediatek_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c
index 25519cddc2a9..5bea53243ed2 100644
--- a/drivers/pwm/pwm-meson.c
+++ b/drivers/pwm/pwm-meson.c
@@ -335,7 +335,6 @@ static const struct pwm_ops meson_pwm_ops = {
.free = meson_pwm_free,
.apply = meson_pwm_apply,
.get_state = meson_pwm_get_state,
- .owner = THIS_MODULE,
};
static const char * const pwm_meson8b_parent_names[] = {
diff --git a/drivers/pwm/pwm-microchip-core.c b/drivers/pwm/pwm-microchip-core.c
index e7525c98105e..c0c53968f3e9 100644
--- a/drivers/pwm/pwm-microchip-core.c
+++ b/drivers/pwm/pwm-microchip-core.c
@@ -435,7 +435,6 @@ static int mchp_core_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm
static const struct pwm_ops mchp_core_pwm_ops = {
.apply = mchp_core_pwm_apply,
.get_state = mchp_core_pwm_get_state,
- .owner = THIS_MODULE,
};
static const struct of_device_id mchp_core_of_match[] = {
diff --git a/drivers/pwm/pwm-mtk-disp.c b/drivers/pwm/pwm-mtk-disp.c
index a83bd6e18b07..a72f7be36996 100644
--- a/drivers/pwm/pwm-mtk-disp.c
+++ b/drivers/pwm/pwm-mtk-disp.c
@@ -227,7 +227,6 @@ static int mtk_disp_pwm_get_state(struct pwm_chip *chip,
static const struct pwm_ops mtk_disp_pwm_ops = {
.apply = mtk_disp_pwm_apply,
.get_state = mtk_disp_pwm_get_state,
- .owner = THIS_MODULE,
};
static int mtk_disp_pwm_probe(struct platform_device *pdev)
@@ -247,34 +246,25 @@ static int mtk_disp_pwm_probe(struct platform_device *pdev)
mdp->clk_main = devm_clk_get(&pdev->dev, "main");
if (IS_ERR(mdp->clk_main))
- return PTR_ERR(mdp->clk_main);
+ return dev_err_probe(&pdev->dev, PTR_ERR(mdp->clk_main),
+ "Failed to get main clock\n");
mdp->clk_mm = devm_clk_get(&pdev->dev, "mm");
if (IS_ERR(mdp->clk_mm))
- return PTR_ERR(mdp->clk_mm);
+ return dev_err_probe(&pdev->dev, PTR_ERR(mdp->clk_mm),
+ "Failed to get mm clock\n");
mdp->chip.dev = &pdev->dev;
mdp->chip.ops = &mtk_disp_pwm_ops;
mdp->chip.npwm = 1;
- ret = pwmchip_add(&mdp->chip);
- if (ret < 0) {
- dev_err(&pdev->dev, "pwmchip_add() failed: %pe\n", ERR_PTR(ret));
- return ret;
- }
-
- platform_set_drvdata(pdev, mdp);
+ ret = devm_pwmchip_add(&pdev->dev, &mdp->chip);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "pwmchip_add() failed\n");
return 0;
}
-static void mtk_disp_pwm_remove(struct platform_device *pdev)
-{
- struct mtk_disp_pwm *mdp = platform_get_drvdata(pdev);
-
- pwmchip_remove(&mdp->chip);
-}
-
static const struct mtk_pwm_data mt2701_pwm_data = {
.enable_mask = BIT(16),
.con0 = 0xa8,
@@ -320,7 +310,6 @@ static struct platform_driver mtk_disp_pwm_driver = {
.of_match_table = mtk_disp_pwm_of_match,
},
.probe = mtk_disp_pwm_probe,
- .remove_new = mtk_disp_pwm_remove,
};
module_platform_driver(mtk_disp_pwm_driver);
diff --git a/drivers/pwm/pwm-mxs.c b/drivers/pwm/pwm-mxs.c
index 766dbc58dad8..1b5e787d78f1 100644
--- a/drivers/pwm/pwm-mxs.c
+++ b/drivers/pwm/pwm-mxs.c
@@ -115,7 +115,6 @@ static int mxs_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops mxs_pwm_ops = {
.apply = mxs_pwm_apply,
- .owner = THIS_MODULE,
};
static int mxs_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-ntxec.c b/drivers/pwm/pwm-ntxec.c
index 7514ea384ec5..78606039eda2 100644
--- a/drivers/pwm/pwm-ntxec.c
+++ b/drivers/pwm/pwm-ntxec.c
@@ -126,7 +126,6 @@ static int ntxec_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm_dev,
}
static const struct pwm_ops ntxec_pwm_ops = {
- .owner = THIS_MODULE,
.apply = ntxec_pwm_apply,
/*
* No .get_state callback, because the current state cannot be read
diff --git a/drivers/pwm/pwm-omap-dmtimer.c b/drivers/pwm/pwm-omap-dmtimer.c
index 4889fbd8a431..13161e08dd6e 100644
--- a/drivers/pwm/pwm-omap-dmtimer.c
+++ b/drivers/pwm/pwm-omap-dmtimer.c
@@ -311,7 +311,6 @@ unlock_mutex:
static const struct pwm_ops pwm_omap_dmtimer_ops = {
.apply = pwm_omap_dmtimer_apply,
- .owner = THIS_MODULE,
};
static int pwm_omap_dmtimer_probe(struct platform_device *pdev)
@@ -466,7 +465,7 @@ MODULE_DEVICE_TABLE(of, pwm_omap_dmtimer_of_match);
static struct platform_driver pwm_omap_dmtimer_driver = {
.driver = {
.name = "omap-dmtimer-pwm",
- .of_match_table = of_match_ptr(pwm_omap_dmtimer_of_match),
+ .of_match_table = pwm_omap_dmtimer_of_match,
},
.probe = pwm_omap_dmtimer_probe,
.remove_new = pwm_omap_dmtimer_remove,
diff --git a/drivers/pwm/pwm-pca9685.c b/drivers/pwm/pwm-pca9685.c
index 3038a68412a7..e79b1de8c4d8 100644
--- a/drivers/pwm/pwm-pca9685.c
+++ b/drivers/pwm/pwm-pca9685.c
@@ -505,7 +505,6 @@ static const struct pwm_ops pca9685_pwm_ops = {
.get_state = pca9685_pwm_get_state,
.request = pca9685_pwm_request,
.free = pca9685_pwm_free,
- .owner = THIS_MODULE,
};
static const struct regmap_config pca9685_regmap_i2c_config = {
diff --git a/drivers/pwm/pwm-pxa.c b/drivers/pwm/pwm-pxa.c
index 1e475ed10180..76685f926c75 100644
--- a/drivers/pwm/pwm-pxa.c
+++ b/drivers/pwm/pwm-pxa.c
@@ -24,7 +24,7 @@
#include <linux/clk.h>
#include <linux/io.h>
#include <linux/pwm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <asm/div64.h>
@@ -135,7 +135,6 @@ static int pxa_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops pxa_pwm_ops = {
.apply = pxa_pwm_apply,
- .owner = THIS_MODULE,
};
#ifdef CONFIG_OF
diff --git a/drivers/pwm/pwm-raspberrypi-poe.c b/drivers/pwm/pwm-raspberrypi-poe.c
index 2939b71a7ba7..1ad814fdec6b 100644
--- a/drivers/pwm/pwm-raspberrypi-poe.c
+++ b/drivers/pwm/pwm-raspberrypi-poe.c
@@ -135,7 +135,6 @@ static int raspberrypi_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops raspberrypi_pwm_ops = {
.get_state = raspberrypi_pwm_get_state,
.apply = raspberrypi_pwm_apply,
- .owner = THIS_MODULE,
};
static int raspberrypi_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-rcar.c b/drivers/pwm/pwm-rcar.c
index 5b5f357c44de..13269f55fccf 100644
--- a/drivers/pwm/pwm-rcar.c
+++ b/drivers/pwm/pwm-rcar.c
@@ -198,7 +198,6 @@ static const struct pwm_ops rcar_pwm_ops = {
.request = rcar_pwm_request,
.free = rcar_pwm_free,
.apply = rcar_pwm_apply,
- .owner = THIS_MODULE,
};
static int rcar_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-renesas-tpu.c b/drivers/pwm/pwm-renesas-tpu.c
index d7311614c846..4239f2c3e8b2 100644
--- a/drivers/pwm/pwm-renesas-tpu.c
+++ b/drivers/pwm/pwm-renesas-tpu.c
@@ -85,6 +85,7 @@ struct tpu_device {
void __iomem *base;
struct clk *clk;
+ struct tpu_pwm_device tpd[TPU_CHANNEL_MAX];
};
#define to_tpu_device(c) container_of(c, struct tpu_device, chip)
@@ -215,9 +216,7 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
if (pwm->hwpwm >= TPU_CHANNEL_MAX)
return -EINVAL;
- tpd = kzalloc(sizeof(*tpd), GFP_KERNEL);
- if (tpd == NULL)
- return -ENOMEM;
+ tpd = &tpu->tpd[pwm->hwpwm];
tpd->tpu = tpu;
tpd->channel = pwm->hwpwm;
@@ -228,24 +227,22 @@ static int tpu_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
tpd->timer_on = false;
- pwm_set_chip_data(pwm, tpd);
-
return 0;
}
static void tpu_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm)
{
- struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
+ struct tpu_device *tpu = to_tpu_device(chip);
+ struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
tpu_pwm_timer_stop(tpd);
- kfree(tpd);
}
static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
u64 duty_ns, u64 period_ns, bool enabled)
{
- struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
struct tpu_device *tpu = to_tpu_device(chip);
+ struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
unsigned int prescaler;
bool duty_only = false;
u32 clk_rate;
@@ -353,7 +350,8 @@ static int tpu_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
static int tpu_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
enum pwm_polarity polarity)
{
- struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
+ struct tpu_device *tpu = to_tpu_device(chip);
+ struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
tpd->polarity = polarity;
@@ -362,7 +360,8 @@ static int tpu_pwm_set_polarity(struct pwm_chip *chip, struct pwm_device *pwm,
static int tpu_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
{
- struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
+ struct tpu_device *tpu = to_tpu_device(chip);
+ struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
int ret;
ret = tpu_pwm_timer_start(tpd);
@@ -384,7 +383,8 @@ static int tpu_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
static void tpu_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
{
- struct tpu_pwm_device *tpd = pwm_get_chip_data(pwm);
+ struct tpu_device *tpu = to_tpu_device(chip);
+ struct tpu_pwm_device *tpd = &tpu->tpd[pwm->hwpwm];
/* The timer must be running to modify the pin output configuration. */
tpu_pwm_timer_start(tpd);
@@ -431,7 +431,6 @@ static const struct pwm_ops tpu_pwm_ops = {
.request = tpu_pwm_request,
.free = tpu_pwm_free,
.apply = tpu_pwm_apply,
- .owner = THIS_MODULE,
};
/* -----------------------------------------------------------------------------
diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c
index 03ee18fb82d5..cce4381e188a 100644
--- a/drivers/pwm/pwm-rockchip.c
+++ b/drivers/pwm/pwm-rockchip.c
@@ -228,7 +228,6 @@ out:
static const struct pwm_ops rockchip_pwm_ops = {
.get_state = rockchip_pwm_get_state,
.apply = rockchip_pwm_apply,
- .owner = THIS_MODULE,
};
static const struct rockchip_pwm_data pwm_data_v1 = {
diff --git a/drivers/pwm/pwm-rz-mtu3.c b/drivers/pwm/pwm-rz-mtu3.c
index a56cecb0e46e..bdda315b3bd3 100644
--- a/drivers/pwm/pwm-rz-mtu3.c
+++ b/drivers/pwm/pwm-rz-mtu3.c
@@ -438,7 +438,6 @@ static const struct pwm_ops rz_mtu3_pwm_ops = {
.free = rz_mtu3_pwm_free,
.get_state = rz_mtu3_pwm_get_state,
.apply = rz_mtu3_pwm_apply,
- .owner = THIS_MODULE,
};
static int rz_mtu3_pwm_pm_runtime_suspend(struct device *dev)
diff --git a/drivers/pwm/pwm-samsung.c b/drivers/pwm/pwm-samsung.c
index e8828f57ab15..568491ed6829 100644
--- a/drivers/pwm/pwm-samsung.c
+++ b/drivers/pwm/pwm-samsung.c
@@ -77,6 +77,7 @@ struct samsung_pwm_channel {
* @base_clk: base clock used to drive the timers
* @tclk0: external clock 0 (can be ERR_PTR if not present)
* @tclk1: external clock 1 (can be ERR_PTR if not present)
+ * @channel: per channel driver data
*/
struct samsung_pwm_chip {
struct pwm_chip chip;
@@ -88,6 +89,7 @@ struct samsung_pwm_chip {
struct clk *base_clk;
struct clk *tclk0;
struct clk *tclk1;
+ struct samsung_pwm_channel channel[SAMSUNG_PWM_NUM];
};
#ifndef CONFIG_CLKSRC_SAMSUNG_PWM
@@ -117,21 +119,21 @@ static inline unsigned int to_tcon_channel(unsigned int channel)
return (channel == 0) ? 0 : (channel + 1);
}
-static void __pwm_samsung_manual_update(struct samsung_pwm_chip *chip,
+static void __pwm_samsung_manual_update(struct samsung_pwm_chip *our_chip,
struct pwm_device *pwm)
{
unsigned int tcon_chan = to_tcon_channel(pwm->hwpwm);
u32 tcon;
- tcon = readl(chip->base + REG_TCON);
+ tcon = readl(our_chip->base + REG_TCON);
tcon |= TCON_MANUALUPDATE(tcon_chan);
- writel(tcon, chip->base + REG_TCON);
+ writel(tcon, our_chip->base + REG_TCON);
tcon &= ~TCON_MANUALUPDATE(tcon_chan);
- writel(tcon, chip->base + REG_TCON);
+ writel(tcon, our_chip->base + REG_TCON);
}
-static void pwm_samsung_set_divisor(struct samsung_pwm_chip *pwm,
+static void pwm_samsung_set_divisor(struct samsung_pwm_chip *our_chip,
unsigned int channel, u8 divisor)
{
u8 shift = TCFG1_SHIFT(channel);
@@ -139,39 +141,39 @@ static void pwm_samsung_set_divisor(struct samsung_pwm_chip *pwm,
u32 reg;
u8 bits;
- bits = (fls(divisor) - 1) - pwm->variant.div_base;
+ bits = (fls(divisor) - 1) - our_chip->variant.div_base;
spin_lock_irqsave(&samsung_pwm_lock, flags);
- reg = readl(pwm->base + REG_TCFG1);
+ reg = readl(our_chip->base + REG_TCFG1);
reg &= ~(TCFG1_MUX_MASK << shift);
reg |= bits << shift;
- writel(reg, pwm->base + REG_TCFG1);
+ writel(reg, our_chip->base + REG_TCFG1);
spin_unlock_irqrestore(&samsung_pwm_lock, flags);
}
-static int pwm_samsung_is_tdiv(struct samsung_pwm_chip *chip, unsigned int chan)
+static int pwm_samsung_is_tdiv(struct samsung_pwm_chip *our_chip, unsigned int chan)
{
- struct samsung_pwm_variant *variant = &chip->variant;
+ struct samsung_pwm_variant *variant = &our_chip->variant;
u32 reg;
- reg = readl(chip->base + REG_TCFG1);
+ reg = readl(our_chip->base + REG_TCFG1);
reg >>= TCFG1_SHIFT(chan);
reg &= TCFG1_MUX_MASK;
return (BIT(reg) & variant->tclk_mask) == 0;
}
-static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *chip,
+static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *our_chip,
unsigned int chan)
{
unsigned long rate;
u32 reg;
- rate = clk_get_rate(chip->base_clk);
+ rate = clk_get_rate(our_chip->base_clk);
- reg = readl(chip->base + REG_TCFG0);
+ reg = readl(our_chip->base + REG_TCFG0);
if (chan >= 2)
reg >>= TCFG0_PRESCALER1_SHIFT;
reg &= TCFG0_PRESCALER_MASK;
@@ -179,28 +181,28 @@ static unsigned long pwm_samsung_get_tin_rate(struct samsung_pwm_chip *chip,
return rate / (reg + 1);
}
-static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip,
+static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *our_chip,
unsigned int chan, unsigned long freq)
{
- struct samsung_pwm_variant *variant = &chip->variant;
+ struct samsung_pwm_variant *variant = &our_chip->variant;
unsigned long rate;
struct clk *clk;
u8 div;
- if (!pwm_samsung_is_tdiv(chip, chan)) {
- clk = (chan < 2) ? chip->tclk0 : chip->tclk1;
+ if (!pwm_samsung_is_tdiv(our_chip, chan)) {
+ clk = (chan < 2) ? our_chip->tclk0 : our_chip->tclk1;
if (!IS_ERR(clk)) {
rate = clk_get_rate(clk);
if (rate)
return rate;
}
- dev_warn(chip->chip.dev,
+ dev_warn(our_chip->chip.dev,
"tclk of PWM %d is inoperational, using tdiv\n", chan);
}
- rate = pwm_samsung_get_tin_rate(chip, chan);
- dev_dbg(chip->chip.dev, "tin parent at %lu\n", rate);
+ rate = pwm_samsung_get_tin_rate(our_chip, chan);
+ dev_dbg(our_chip->chip.dev, "tin parent at %lu\n", rate);
/*
* Compare minimum PWM frequency that can be achieved with possible
@@ -220,7 +222,7 @@ static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip,
div = variant->div_base;
}
- pwm_samsung_set_divisor(chip, chan, BIT(div));
+ pwm_samsung_set_divisor(our_chip, chan, BIT(div));
return rate >> div;
}
@@ -228,7 +230,6 @@ static unsigned long pwm_samsung_calc_tin(struct samsung_pwm_chip *chip,
static int pwm_samsung_request(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip);
- struct samsung_pwm_channel *our_chan;
if (!(our_chip->variant.output_mask & BIT(pwm->hwpwm))) {
dev_warn(chip->dev,
@@ -237,20 +238,11 @@ static int pwm_samsung_request(struct pwm_chip *chip, struct pwm_device *pwm)
return -EINVAL;
}
- our_chan = kzalloc(sizeof(*our_chan), GFP_KERNEL);
- if (!our_chan)
- return -ENOMEM;
-
- pwm_set_chip_data(pwm, our_chan);
+ memset(&our_chip->channel[pwm->hwpwm], 0, sizeof(our_chip->channel[pwm->hwpwm]));
return 0;
}
-static void pwm_samsung_free(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- kfree(pwm_get_chip_data(pwm));
-}
-
static int pwm_samsung_enable(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip);
@@ -302,14 +294,14 @@ static void pwm_samsung_disable(struct pwm_chip *chip, struct pwm_device *pwm)
spin_unlock_irqrestore(&samsung_pwm_lock, flags);
}
-static void pwm_samsung_manual_update(struct samsung_pwm_chip *chip,
+static void pwm_samsung_manual_update(struct samsung_pwm_chip *our_chip,
struct pwm_device *pwm)
{
unsigned long flags;
spin_lock_irqsave(&samsung_pwm_lock, flags);
- __pwm_samsung_manual_update(chip, pwm);
+ __pwm_samsung_manual_update(our_chip, pwm);
spin_unlock_irqrestore(&samsung_pwm_lock, flags);
}
@@ -318,7 +310,7 @@ static int __pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
int duty_ns, int period_ns, bool force_period)
{
struct samsung_pwm_chip *our_chip = to_samsung_pwm_chip(chip);
- struct samsung_pwm_channel *chan = pwm_get_chip_data(pwm);
+ struct samsung_pwm_channel *chan = &our_chip->channel[pwm->hwpwm];
u32 tin_ns = chan->tin_ns, tcnt, tcmp, oldtcmp;
tcnt = readl(our_chip->base + REG_TCNTB(pwm->hwpwm));
@@ -393,7 +385,7 @@ static int pwm_samsung_config(struct pwm_chip *chip, struct pwm_device *pwm,
return __pwm_samsung_config(chip, pwm, duty_ns, period_ns, false);
}
-static void pwm_samsung_set_invert(struct samsung_pwm_chip *chip,
+static void pwm_samsung_set_invert(struct samsung_pwm_chip *our_chip,
unsigned int channel, bool invert)
{
unsigned int tcon_chan = to_tcon_channel(channel);
@@ -402,17 +394,17 @@ static void pwm_samsung_set_invert(struct samsung_pwm_chip *chip,
spin_lock_irqsave(&samsung_pwm_lock, flags);
- tcon = readl(chip->base + REG_TCON);
+ tcon = readl(our_chip->base + REG_TCON);
if (invert) {
- chip->inverter_mask |= BIT(channel);
+ our_chip->inverter_mask |= BIT(channel);
tcon |= TCON_INVERT(tcon_chan);
} else {
- chip->inverter_mask &= ~BIT(channel);
+ our_chip->inverter_mask &= ~BIT(channel);
tcon &= ~TCON_INVERT(tcon_chan);
}
- writel(tcon, chip->base + REG_TCON);
+ writel(tcon, our_chip->base + REG_TCON);
spin_unlock_irqrestore(&samsung_pwm_lock, flags);
}
@@ -473,9 +465,7 @@ static int pwm_samsung_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops pwm_samsung_ops = {
.request = pwm_samsung_request,
- .free = pwm_samsung_free,
.apply = pwm_samsung_apply,
- .owner = THIS_MODULE,
};
#ifdef CONFIG_OF
@@ -517,9 +507,9 @@ static const struct of_device_id samsung_pwm_matches[] = {
};
MODULE_DEVICE_TABLE(of, samsung_pwm_matches);
-static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip)
+static int pwm_samsung_parse_dt(struct samsung_pwm_chip *our_chip)
{
- struct device_node *np = chip->chip.dev->of_node;
+ struct device_node *np = our_chip->chip.dev->of_node;
const struct of_device_id *match;
struct property *prop;
const __be32 *cur;
@@ -529,22 +519,22 @@ static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip)
if (!match)
return -ENODEV;
- memcpy(&chip->variant, match->data, sizeof(chip->variant));
+ memcpy(&our_chip->variant, match->data, sizeof(our_chip->variant));
of_property_for_each_u32(np, "samsung,pwm-outputs", prop, cur, val) {
if (val >= SAMSUNG_PWM_NUM) {
- dev_err(chip->chip.dev,
+ dev_err(our_chip->chip.dev,
"%s: invalid channel index in samsung,pwm-outputs property\n",
__func__);
continue;
}
- chip->variant.output_mask |= BIT(val);
+ our_chip->variant.output_mask |= BIT(val);
}
return 0;
}
#else
-static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip)
+static int pwm_samsung_parse_dt(struct samsung_pwm_chip *our_chip)
{
return -ENODEV;
}
@@ -553,21 +543,21 @@ static int pwm_samsung_parse_dt(struct samsung_pwm_chip *chip)
static int pwm_samsung_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct samsung_pwm_chip *chip;
+ struct samsung_pwm_chip *our_chip;
unsigned int chan;
int ret;
- chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL);
- if (chip == NULL)
+ our_chip = devm_kzalloc(&pdev->dev, sizeof(*our_chip), GFP_KERNEL);
+ if (our_chip == NULL)
return -ENOMEM;
- chip->chip.dev = &pdev->dev;
- chip->chip.ops = &pwm_samsung_ops;
- chip->chip.npwm = SAMSUNG_PWM_NUM;
- chip->inverter_mask = BIT(SAMSUNG_PWM_NUM) - 1;
+ our_chip->chip.dev = &pdev->dev;
+ our_chip->chip.ops = &pwm_samsung_ops;
+ our_chip->chip.npwm = SAMSUNG_PWM_NUM;
+ our_chip->inverter_mask = BIT(SAMSUNG_PWM_NUM) - 1;
if (IS_ENABLED(CONFIG_OF) && pdev->dev.of_node) {
- ret = pwm_samsung_parse_dt(chip);
+ ret = pwm_samsung_parse_dt(our_chip);
if (ret)
return ret;
} else {
@@ -576,58 +566,58 @@ static int pwm_samsung_probe(struct platform_device *pdev)
return -EINVAL;
}
- memcpy(&chip->variant, pdev->dev.platform_data,
- sizeof(chip->variant));
+ memcpy(&our_chip->variant, pdev->dev.platform_data,
+ sizeof(our_chip->variant));
}
- chip->base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(chip->base))
- return PTR_ERR(chip->base);
+ our_chip->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(our_chip->base))
+ return PTR_ERR(our_chip->base);
- chip->base_clk = devm_clk_get(&pdev->dev, "timers");
- if (IS_ERR(chip->base_clk)) {
+ our_chip->base_clk = devm_clk_get(&pdev->dev, "timers");
+ if (IS_ERR(our_chip->base_clk)) {
dev_err(dev, "failed to get timer base clk\n");
- return PTR_ERR(chip->base_clk);
+ return PTR_ERR(our_chip->base_clk);
}
- ret = clk_prepare_enable(chip->base_clk);
+ ret = clk_prepare_enable(our_chip->base_clk);
if (ret < 0) {
dev_err(dev, "failed to enable base clock\n");
return ret;
}
for (chan = 0; chan < SAMSUNG_PWM_NUM; ++chan)
- if (chip->variant.output_mask & BIT(chan))
- pwm_samsung_set_invert(chip, chan, true);
+ if (our_chip->variant.output_mask & BIT(chan))
+ pwm_samsung_set_invert(our_chip, chan, true);
/* Following clocks are optional. */
- chip->tclk0 = devm_clk_get(&pdev->dev, "pwm-tclk0");
- chip->tclk1 = devm_clk_get(&pdev->dev, "pwm-tclk1");
+ our_chip->tclk0 = devm_clk_get(&pdev->dev, "pwm-tclk0");
+ our_chip->tclk1 = devm_clk_get(&pdev->dev, "pwm-tclk1");
- platform_set_drvdata(pdev, chip);
+ platform_set_drvdata(pdev, our_chip);
- ret = pwmchip_add(&chip->chip);
+ ret = pwmchip_add(&our_chip->chip);
if (ret < 0) {
dev_err(dev, "failed to register PWM chip\n");
- clk_disable_unprepare(chip->base_clk);
+ clk_disable_unprepare(our_chip->base_clk);
return ret;
}
dev_dbg(dev, "base_clk at %lu, tclk0 at %lu, tclk1 at %lu\n",
- clk_get_rate(chip->base_clk),
- !IS_ERR(chip->tclk0) ? clk_get_rate(chip->tclk0) : 0,
- !IS_ERR(chip->tclk1) ? clk_get_rate(chip->tclk1) : 0);
+ clk_get_rate(our_chip->base_clk),
+ !IS_ERR(our_chip->tclk0) ? clk_get_rate(our_chip->tclk0) : 0,
+ !IS_ERR(our_chip->tclk1) ? clk_get_rate(our_chip->tclk1) : 0);
return 0;
}
static void pwm_samsung_remove(struct platform_device *pdev)
{
- struct samsung_pwm_chip *chip = platform_get_drvdata(pdev);
+ struct samsung_pwm_chip *our_chip = platform_get_drvdata(pdev);
- pwmchip_remove(&chip->chip);
+ pwmchip_remove(&our_chip->chip);
- clk_disable_unprepare(chip->base_clk);
+ clk_disable_unprepare(our_chip->base_clk);
}
#ifdef CONFIG_PM_SLEEP
@@ -639,9 +629,9 @@ static int pwm_samsung_resume(struct device *dev)
for (i = 0; i < SAMSUNG_PWM_NUM; i++) {
struct pwm_device *pwm = &chip->pwms[i];
- struct samsung_pwm_channel *chan = pwm_get_chip_data(pwm);
+ struct samsung_pwm_channel *chan = &our_chip->channel[i];
- if (!chan)
+ if (!(pwm->flags & PWMF_REQUESTED))
continue;
if (our_chip->variant.output_mask & BIT(i))
diff --git a/drivers/pwm/pwm-sifive.c b/drivers/pwm/pwm-sifive.c
index eabddb7c7820..089e50bdbbf0 100644
--- a/drivers/pwm/pwm-sifive.c
+++ b/drivers/pwm/pwm-sifive.c
@@ -203,7 +203,6 @@ static const struct pwm_ops pwm_sifive_ops = {
.free = pwm_sifive_free,
.get_state = pwm_sifive_get_state,
.apply = pwm_sifive_apply,
- .owner = THIS_MODULE,
};
static int pwm_sifive_clock_notifier(struct notifier_block *nb,
diff --git a/drivers/pwm/pwm-sl28cpld.c b/drivers/pwm/pwm-sl28cpld.c
index 9e42e3a74ad6..88b01ff9e460 100644
--- a/drivers/pwm/pwm-sl28cpld.c
+++ b/drivers/pwm/pwm-sl28cpld.c
@@ -200,7 +200,6 @@ static int sl28cpld_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops sl28cpld_pwm_ops = {
.apply = sl28cpld_pwm_apply,
.get_state = sl28cpld_pwm_get_state,
- .owner = THIS_MODULE,
};
static int sl28cpld_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-spear.c b/drivers/pwm/pwm-spear.c
index 4e1cfd8d7c03..ff991319feef 100644
--- a/drivers/pwm/pwm-spear.c
+++ b/drivers/pwm/pwm-spear.c
@@ -189,7 +189,6 @@ static int spear_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops spear_pwm_ops = {
.apply = spear_pwm_apply,
- .owner = THIS_MODULE,
};
static int spear_pwm_probe(struct platform_device *pdev)
@@ -207,26 +206,21 @@ static int spear_pwm_probe(struct platform_device *pdev)
if (IS_ERR(pc->mmio_base))
return PTR_ERR(pc->mmio_base);
- pc->clk = devm_clk_get(&pdev->dev, NULL);
+ pc->clk = devm_clk_get_prepared(&pdev->dev, NULL);
if (IS_ERR(pc->clk))
- return PTR_ERR(pc->clk);
-
- platform_set_drvdata(pdev, pc);
+ return dev_err_probe(&pdev->dev, PTR_ERR(pc->clk),
+ "Failed to get clock\n");
pc->chip.dev = &pdev->dev;
pc->chip.ops = &spear_pwm_ops;
pc->chip.npwm = NUM_PWM;
- ret = clk_prepare(pc->clk);
- if (ret)
- return ret;
-
if (of_device_is_compatible(np, "st,spear1340-pwm")) {
ret = clk_enable(pc->clk);
- if (ret) {
- clk_unprepare(pc->clk);
- return ret;
- }
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret,
+ "Failed to enable clk\n");
+
/*
* Following enables PWM chip, channels would still be
* enabled individually through their control register
@@ -238,23 +232,11 @@ static int spear_pwm_probe(struct platform_device *pdev)
clk_disable(pc->clk);
}
- ret = pwmchip_add(&pc->chip);
- if (ret < 0) {
- clk_unprepare(pc->clk);
- dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
- }
+ ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "pwmchip_add() failed\n");
- return ret;
-}
-
-static void spear_pwm_remove(struct platform_device *pdev)
-{
- struct spear_pwm_chip *pc = platform_get_drvdata(pdev);
-
- pwmchip_remove(&pc->chip);
-
- /* clk was prepared in probe, hence unprepare it here */
- clk_unprepare(pc->clk);
+ return 0;
}
static const struct of_device_id spear_pwm_of_match[] = {
@@ -271,7 +253,6 @@ static struct platform_driver spear_pwm_driver = {
.of_match_table = spear_pwm_of_match,
},
.probe = spear_pwm_probe,
- .remove_new = spear_pwm_remove,
};
module_platform_driver(spear_pwm_driver);
diff --git a/drivers/pwm/pwm-sprd.c b/drivers/pwm/pwm-sprd.c
index 1499c8c1fe37..77939e161006 100644
--- a/drivers/pwm/pwm-sprd.c
+++ b/drivers/pwm/pwm-sprd.c
@@ -40,6 +40,11 @@ struct sprd_pwm_chip {
struct sprd_pwm_chn chn[SPRD_PWM_CHN_NUM];
};
+static inline struct sprd_pwm_chip* sprd_pwm_from_chip(struct pwm_chip *chip)
+{
+ return container_of(chip, struct sprd_pwm_chip, chip);
+}
+
/*
* The list of clocks required by PWM channels, and each channel has 2 clocks:
* enable clock and pwm clock.
@@ -69,8 +74,7 @@ static void sprd_pwm_write(struct sprd_pwm_chip *spc, u32 hwid,
static int sprd_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
struct pwm_state *state)
{
- struct sprd_pwm_chip *spc =
- container_of(chip, struct sprd_pwm_chip, chip);
+ struct sprd_pwm_chip *spc = sprd_pwm_from_chip(chip);
struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
u32 val, duty, prescale;
u64 tmp;
@@ -162,8 +166,7 @@ static int sprd_pwm_config(struct sprd_pwm_chip *spc, struct pwm_device *pwm,
static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
const struct pwm_state *state)
{
- struct sprd_pwm_chip *spc =
- container_of(chip, struct sprd_pwm_chip, chip);
+ struct sprd_pwm_chip *spc = sprd_pwm_from_chip(chip);
struct sprd_pwm_chn *chn = &spc->chn[pwm->hwpwm];
struct pwm_state *cstate = &pwm->state;
int ret;
@@ -210,7 +213,6 @@ static int sprd_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops sprd_pwm_ops = {
.apply = sprd_pwm_apply,
.get_state = sprd_pwm_get_state,
- .owner = THIS_MODULE,
};
static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc)
@@ -240,10 +242,8 @@ static int sprd_pwm_clk_init(struct sprd_pwm_chip *spc)
chn->clk_rate = clk_get_rate(clk_pwm);
}
- if (!i) {
- dev_err(spc->dev, "no available PWM channels\n");
- return -ENODEV;
- }
+ if (!i)
+ return dev_err_probe(spc->dev, -ENODEV, "no available PWM channels\n");
spc->num_pwms = i;
@@ -264,7 +264,6 @@ static int sprd_pwm_probe(struct platform_device *pdev)
return PTR_ERR(spc->base);
spc->dev = &pdev->dev;
- platform_set_drvdata(pdev, spc);
ret = sprd_pwm_clk_init(spc);
if (ret)
@@ -274,20 +273,13 @@ static int sprd_pwm_probe(struct platform_device *pdev)
spc->chip.ops = &sprd_pwm_ops;
spc->chip.npwm = spc->num_pwms;
- ret = pwmchip_add(&spc->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &spc->chip);
if (ret)
dev_err(&pdev->dev, "failed to add PWM chip\n");
return ret;
}
-static void sprd_pwm_remove(struct platform_device *pdev)
-{
- struct sprd_pwm_chip *spc = platform_get_drvdata(pdev);
-
- pwmchip_remove(&spc->chip);
-}
-
static const struct of_device_id sprd_pwm_of_match[] = {
{ .compatible = "sprd,ums512-pwm", },
{ },
@@ -300,7 +292,6 @@ static struct platform_driver sprd_pwm_driver = {
.of_match_table = sprd_pwm_of_match,
},
.probe = sprd_pwm_probe,
- .remove_new = sprd_pwm_remove,
};
module_platform_driver(sprd_pwm_driver);
diff --git a/drivers/pwm/pwm-sti.c b/drivers/pwm/pwm-sti.c
index b1d1373648a3..dc92cea31cd0 100644
--- a/drivers/pwm/pwm-sti.c
+++ b/drivers/pwm/pwm-sti.c
@@ -79,6 +79,7 @@ struct sti_pwm_compat_data {
unsigned int cpt_num_devs;
unsigned int max_pwm_cnt;
unsigned int max_prescale;
+ struct sti_cpt_ddata *ddata;
};
struct sti_pwm_chip {
@@ -314,7 +315,7 @@ static int sti_pwm_capture(struct pwm_chip *chip, struct pwm_device *pwm,
{
struct sti_pwm_chip *pc = to_sti_pwmchip(chip);
struct sti_pwm_compat_data *cdata = pc->cdata;
- struct sti_cpt_ddata *ddata = pwm_get_chip_data(pwm);
+ struct sti_cpt_ddata *ddata = &cdata->ddata[pwm->hwpwm];
struct device *dev = pc->dev;
unsigned int effective_ticks;
unsigned long long high, low;
@@ -420,7 +421,6 @@ static const struct pwm_ops sti_pwm_ops = {
.capture = sti_pwm_capture,
.apply = sti_pwm_apply,
.free = sti_pwm_free,
- .owner = THIS_MODULE,
};
static irqreturn_t sti_pwm_interrupt(int irq, void *data)
@@ -440,7 +440,7 @@ static irqreturn_t sti_pwm_interrupt(int irq, void *data)
while (cpt_int_stat) {
devicenum = ffs(cpt_int_stat) - 1;
- ddata = pwm_get_chip_data(&pc->chip.pwms[devicenum]);
+ ddata = &pc->cdata->ddata[devicenum];
/*
* Capture input:
@@ -638,30 +638,28 @@ static int sti_pwm_probe(struct platform_device *pdev)
dev_err(dev, "failed to prepare clock\n");
return ret;
}
+
+ cdata->ddata = devm_kzalloc(dev, cdata->cpt_num_devs * sizeof(*cdata->ddata), GFP_KERNEL);
+ if (!cdata->ddata)
+ return -ENOMEM;
}
pc->chip.dev = dev;
pc->chip.ops = &sti_pwm_ops;
pc->chip.npwm = pc->cdata->pwm_num_devs;
- ret = pwmchip_add(&pc->chip);
- if (ret < 0) {
- clk_unprepare(pc->pwm_clk);
- clk_unprepare(pc->cpt_clk);
- return ret;
- }
-
for (i = 0; i < cdata->cpt_num_devs; i++) {
- struct sti_cpt_ddata *ddata;
-
- ddata = devm_kzalloc(dev, sizeof(*ddata), GFP_KERNEL);
- if (!ddata)
- return -ENOMEM;
+ struct sti_cpt_ddata *ddata = &cdata->ddata[i];
init_waitqueue_head(&ddata->wait);
mutex_init(&ddata->lock);
+ }
- pwm_set_chip_data(&pc->chip.pwms[i], ddata);
+ ret = pwmchip_add(&pc->chip);
+ if (ret < 0) {
+ clk_unprepare(pc->pwm_clk);
+ clk_unprepare(pc->cpt_clk);
+ return ret;
}
platform_set_drvdata(pdev, pc);
diff --git a/drivers/pwm/pwm-stm32-lp.c b/drivers/pwm/pwm-stm32-lp.c
index bb3a045a7334..b67974cc1872 100644
--- a/drivers/pwm/pwm-stm32-lp.c
+++ b/drivers/pwm/pwm-stm32-lp.c
@@ -189,7 +189,6 @@ static int stm32_pwm_lp_get_state(struct pwm_chip *chip,
}
static const struct pwm_ops stm32_pwm_lp_ops = {
- .owner = THIS_MODULE,
.apply = stm32_pwm_lp_apply,
.get_state = stm32_pwm_lp_get_state,
};
diff --git a/drivers/pwm/pwm-stm32.c b/drivers/pwm/pwm-stm32.c
index 3d6be7749e23..3303a754ea02 100644
--- a/drivers/pwm/pwm-stm32.c
+++ b/drivers/pwm/pwm-stm32.c
@@ -487,7 +487,6 @@ static int stm32_pwm_apply_locked(struct pwm_chip *chip, struct pwm_device *pwm,
}
static const struct pwm_ops stm32pwm_ops = {
- .owner = THIS_MODULE,
.apply = stm32_pwm_apply_locked,
.capture = IS_ENABLED(CONFIG_DMA_ENGINE) ? stm32_pwm_capture : NULL,
};
diff --git a/drivers/pwm/pwm-stmpe.c b/drivers/pwm/pwm-stmpe.c
index e205405c4828..a46f5b4dd816 100644
--- a/drivers/pwm/pwm-stmpe.c
+++ b/drivers/pwm/pwm-stmpe.c
@@ -287,7 +287,6 @@ static int stmpe_24xx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops stmpe_24xx_pwm_ops = {
.apply = stmpe_24xx_pwm_apply,
- .owner = THIS_MODULE,
};
static int __init stmpe_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-sun4i.c b/drivers/pwm/pwm-sun4i.c
index c84fcf1a13dc..1a439025540d 100644
--- a/drivers/pwm/pwm-sun4i.c
+++ b/drivers/pwm/pwm-sun4i.c
@@ -325,7 +325,6 @@ static int sun4i_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops sun4i_pwm_ops = {
.apply = sun4i_pwm_apply,
.get_state = sun4i_pwm_get_state,
- .owner = THIS_MODULE,
};
static const struct sun4i_pwm_data sun4i_pwm_dual_nobypass = {
diff --git a/drivers/pwm/pwm-sunplus.c b/drivers/pwm/pwm-sunplus.c
index 7705c7b86c3a..773e2f80526e 100644
--- a/drivers/pwm/pwm-sunplus.c
+++ b/drivers/pwm/pwm-sunplus.c
@@ -163,7 +163,6 @@ static int sunplus_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops sunplus_pwm_ops = {
.apply = sunplus_pwm_apply,
.get_state = sunplus_pwm_get_state,
- .owner = THIS_MODULE,
};
static void sunplus_pwm_clk_release(void *data)
diff --git a/drivers/pwm/pwm-tegra.c b/drivers/pwm/pwm-tegra.c
index a169a34e0778..39ea51e08c94 100644
--- a/drivers/pwm/pwm-tegra.c
+++ b/drivers/pwm/pwm-tegra.c
@@ -268,7 +268,6 @@ static int tegra_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops tegra_pwm_ops = {
.apply = tegra_pwm_apply,
- .owner = THIS_MODULE,
};
static int tegra_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-tiecap.c b/drivers/pwm/pwm-tiecap.c
index 8c94b266c1b2..11e3549cf103 100644
--- a/drivers/pwm/pwm-tiecap.c
+++ b/drivers/pwm/pwm-tiecap.c
@@ -205,7 +205,6 @@ static int ecap_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops ecap_pwm_ops = {
.apply = ecap_pwm_apply,
- .owner = THIS_MODULE,
};
static const struct of_device_id ecap_of_match[] = {
diff --git a/drivers/pwm/pwm-tiehrpwm.c b/drivers/pwm/pwm-tiehrpwm.c
index ecbfd7e954ec..66ac2655845f 100644
--- a/drivers/pwm/pwm-tiehrpwm.c
+++ b/drivers/pwm/pwm-tiehrpwm.c
@@ -437,7 +437,6 @@ static int ehrpwm_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops ehrpwm_pwm_ops = {
.free = ehrpwm_pwm_free,
.apply = ehrpwm_pwm_apply,
- .owner = THIS_MODULE,
};
static const struct of_device_id ehrpwm_of_match[] = {
diff --git a/drivers/pwm/pwm-twl-led.c b/drivers/pwm/pwm-twl-led.c
index 8fb84b441853..625233f4703a 100644
--- a/drivers/pwm/pwm-twl-led.c
+++ b/drivers/pwm/pwm-twl-led.c
@@ -189,7 +189,6 @@ static int twl4030_pwmled_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops twl4030_pwmled_ops = {
.apply = twl4030_pwmled_apply,
- .owner = THIS_MODULE,
};
static int twl6030_pwmled_config(struct pwm_chip *chip, struct pwm_device *pwm,
@@ -342,7 +341,6 @@ static const struct pwm_ops twl6030_pwmled_ops = {
.apply = twl6030_pwmled_apply,
.request = twl6030_pwmled_request,
.free = twl6030_pwmled_free,
- .owner = THIS_MODULE,
};
static int twl_pwmled_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-twl.c b/drivers/pwm/pwm-twl.c
index 86567add79db..603d31f27470 100644
--- a/drivers/pwm/pwm-twl.c
+++ b/drivers/pwm/pwm-twl.c
@@ -333,12 +333,10 @@ static const struct pwm_ops twl4030_pwm_ops = {
.apply = twl4030_pwm_apply,
.request = twl4030_pwm_request,
.free = twl4030_pwm_free,
- .owner = THIS_MODULE,
};
static const struct pwm_ops twl6030_pwm_ops = {
.apply = twl6030_pwm_apply,
- .owner = THIS_MODULE,
};
static int twl_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-visconti.c b/drivers/pwm/pwm-visconti.c
index 7f7591a2384c..8d736d558122 100644
--- a/drivers/pwm/pwm-visconti.c
+++ b/drivers/pwm/pwm-visconti.c
@@ -129,7 +129,6 @@ static int visconti_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops visconti_pwm_ops = {
.apply = visconti_pwm_apply,
.get_state = visconti_pwm_get_state,
- .owner = THIS_MODULE,
};
static int visconti_pwm_probe(struct platform_device *pdev)
diff --git a/drivers/pwm/pwm-vt8500.c b/drivers/pwm/pwm-vt8500.c
index 6d46db51daac..5568d5312d3c 100644
--- a/drivers/pwm/pwm-vt8500.c
+++ b/drivers/pwm/pwm-vt8500.c
@@ -221,7 +221,6 @@ static int vt8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
static const struct pwm_ops vt8500_pwm_ops = {
.apply = vt8500_pwm_apply,
- .owner = THIS_MODULE,
};
static const struct of_device_id vt8500_pwm_dt_ids[] = {
@@ -236,10 +235,8 @@ static int vt8500_pwm_probe(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
int ret;
- if (!np) {
- dev_err(&pdev->dev, "invalid devicetree node\n");
- return -EINVAL;
- }
+ if (!np)
+ return dev_err_probe(&pdev->dev, -EINVAL, "invalid devicetree node\n");
vt8500 = devm_kzalloc(&pdev->dev, sizeof(*vt8500), GFP_KERNEL);
if (vt8500 == NULL)
@@ -249,45 +246,23 @@ static int vt8500_pwm_probe(struct platform_device *pdev)
vt8500->chip.ops = &vt8500_pwm_ops;
vt8500->chip.npwm = VT8500_NR_PWMS;
- vt8500->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(vt8500->clk)) {
- dev_err(&pdev->dev, "clock source not specified\n");
- return PTR_ERR(vt8500->clk);
- }
+ vt8500->clk = devm_clk_get_prepared(&pdev->dev, NULL);
+ if (IS_ERR(vt8500->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(vt8500->clk), "clock source not specified\n");
vt8500->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(vt8500->base))
return PTR_ERR(vt8500->base);
- ret = clk_prepare(vt8500->clk);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to prepare clock\n");
- return ret;
- }
-
- ret = pwmchip_add(&vt8500->chip);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to add PWM chip\n");
- clk_unprepare(vt8500->clk);
- return ret;
- }
-
- platform_set_drvdata(pdev, vt8500);
- return ret;
-}
-
-static void vt8500_pwm_remove(struct platform_device *pdev)
-{
- struct vt8500_chip *vt8500 = platform_get_drvdata(pdev);
-
- pwmchip_remove(&vt8500->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &vt8500->chip);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "failed to add PWM chip\n");
- clk_unprepare(vt8500->clk);
+ return 0;
}
static struct platform_driver vt8500_pwm_driver = {
.probe = vt8500_pwm_probe,
- .remove_new = vt8500_pwm_remove,
.driver = {
.name = "vt8500-pwm",
.of_match_table = vt8500_pwm_dt_ids,
diff --git a/drivers/pwm/pwm-xilinx.c b/drivers/pwm/pwm-xilinx.c
index 85153ee90809..5f3c2a6fed11 100644
--- a/drivers/pwm/pwm-xilinx.c
+++ b/drivers/pwm/pwm-xilinx.c
@@ -198,7 +198,6 @@ static int xilinx_pwm_get_state(struct pwm_chip *chip,
static const struct pwm_ops xilinx_pwm_ops = {
.apply = xilinx_pwm_apply,
.get_state = xilinx_pwm_get_state,
- .owner = THIS_MODULE,
};
static const struct regmap_config xilinx_pwm_regmap_config = {
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index d09e08b71cfb..5dd33155d5d5 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -352,7 +352,7 @@ EXPORT_SYMBOL(ap_test_config_ctrl_domain);
/*
* ap_queue_info(): Check and get AP queue info.
* Returns: 1 if APQN exists and info is filled,
- * 0 if APQN seems to exit but there is no info
+ * 0 if APQN seems to exist but there is no info
* available (eg. caused by an asynch pending error)
* -1 invalid APQN, TAPQ error or AP queue status which
* indicates there is no APQN.
@@ -373,36 +373,33 @@ static int ap_queue_info(ap_qid_t qid, int *q_type, unsigned int *q_fac,
/* call TAPQ on this APQN */
status = ap_test_queue(qid, ap_apft_available(), &tapq_info);
- /* handle pending async error with return 'no info available' */
- if (status.async)
- return 0;
-
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_DECONFIGURED:
case AP_RESPONSE_CHECKSTOPPED:
case AP_RESPONSE_BUSY:
- /*
- * According to the architecture in all these cases the
- * info should be filled. All bits 0 is not possible as
- * there is at least one of the mode bits set.
- */
- if (WARN_ON_ONCE(!tapq_info.value))
- return 0;
- *q_type = tapq_info.at;
- *q_fac = tapq_info.fac;
- *q_depth = tapq_info.qd;
- *q_ml = tapq_info.ml;
- *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
- *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
- return 1;
+ /* For all these RCs the tapq info should be available */
+ break;
default:
- /*
- * A response code which indicates, there is no info available.
- */
- return -1;
+ /* On a pending async error the info should be available */
+ if (!status.async)
+ return -1;
+ break;
}
+
+ /* There should be at least one of the mode bits set */
+ if (WARN_ON_ONCE(!tapq_info.value))
+ return 0;
+
+ *q_type = tapq_info.at;
+ *q_fac = tapq_info.fac;
+ *q_depth = tapq_info.qd;
+ *q_ml = tapq_info.ml;
+ *q_decfg = status.response_code == AP_RESPONSE_DECONFIGURED;
+ *q_cstop = status.response_code == AP_RESPONSE_CHECKSTOPPED;
+
+ return 1;
}
void ap_wait(enum ap_sm_wait wait)
@@ -1022,6 +1019,10 @@ EXPORT_SYMBOL(ap_driver_unregister);
void ap_bus_force_rescan(void)
{
+ /* Only trigger AP bus scans after the initial scan is done */
+ if (atomic64_read(&ap_scan_bus_count) <= 0)
+ return;
+
/* processing a asynchronous bus rescan */
del_timer(&ap_config_timer);
queue_work(system_long_wq, &ap_scan_work);
diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h
index 359a35f894d5..b0771ca0849b 100644
--- a/drivers/s390/crypto/ap_bus.h
+++ b/drivers/s390/crypto/ap_bus.h
@@ -206,7 +206,6 @@ struct ap_queue {
bool config; /* configured state */
bool chkstop; /* checkstop state */
ap_qid_t qid; /* AP queue id. */
- bool interrupt; /* indicate if interrupts are enabled */
bool se_bound; /* SE bound state */
unsigned int assoc_idx; /* SE association index */
int queue_count; /* # messages currently on AP queue. */
diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c
index 993240370ecf..3934a0cc13e7 100644
--- a/drivers/s390/crypto/ap_queue.c
+++ b/drivers/s390/crypto/ap_queue.c
@@ -200,13 +200,13 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
return AP_SM_WAIT_AGAIN;
}
aq->sm_state = AP_SM_STATE_IDLE;
- return AP_SM_WAIT_NONE;
+ break;
case AP_RESPONSE_NO_PENDING_REPLY:
if (aq->queue_count > 0)
- return aq->interrupt ?
+ return status.irq_enabled ?
AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT;
aq->sm_state = AP_SM_STATE_IDLE;
- return AP_SM_WAIT_NONE;
+ break;
default:
aq->dev_state = AP_DEV_STATE_ERROR;
aq->last_err_rc = status.response_code;
@@ -215,6 +215,16 @@ static enum ap_sm_wait ap_sm_read(struct ap_queue *aq)
AP_QID_CARD(aq->qid), AP_QID_QUEUE(aq->qid));
return AP_SM_WAIT_NONE;
}
+ /* Check and maybe enable irq support (again) on this queue */
+ if (!status.irq_enabled && status.queue_empty) {
+ void *lsi_ptr = ap_airq_ptr();
+
+ if (lsi_ptr && ap_queue_enable_irq(aq, lsi_ptr) == 0) {
+ aq->sm_state = AP_SM_STATE_SETIRQ_WAIT;
+ return AP_SM_WAIT_AGAIN;
+ }
+ }
+ return AP_SM_WAIT_NONE;
}
/**
@@ -254,7 +264,7 @@ static enum ap_sm_wait ap_sm_write(struct ap_queue *aq)
fallthrough;
case AP_RESPONSE_Q_FULL:
aq->sm_state = AP_SM_STATE_QUEUE_FULL;
- return aq->interrupt ?
+ return status.irq_enabled ?
AP_SM_WAIT_INTERRUPT : AP_SM_WAIT_HIGH_TIMEOUT;
case AP_RESPONSE_RESET_IN_PROGRESS:
aq->sm_state = AP_SM_STATE_RESET_WAIT;
@@ -307,7 +317,6 @@ static enum ap_sm_wait ap_sm_reset(struct ap_queue *aq)
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
aq->sm_state = AP_SM_STATE_RESET_WAIT;
- aq->interrupt = false;
aq->rapq_fbit = 0;
aq->se_bound = false;
return AP_SM_WAIT_LOW_TIMEOUT;
@@ -383,7 +392,6 @@ static enum ap_sm_wait ap_sm_setirq_wait(struct ap_queue *aq)
if (status.irq_enabled == 1) {
/* Irqs are now enabled */
- aq->interrupt = true;
aq->sm_state = (aq->queue_count > 0) ?
AP_SM_STATE_WORKING : AP_SM_STATE_IDLE;
}
@@ -626,16 +634,21 @@ static ssize_t interrupt_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct ap_queue *aq = to_ap_queue(dev);
+ struct ap_queue_status status;
int rc = 0;
spin_lock_bh(&aq->lock);
- if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT)
+ if (aq->sm_state == AP_SM_STATE_SETIRQ_WAIT) {
rc = sysfs_emit(buf, "Enable Interrupt pending.\n");
- else if (aq->interrupt)
- rc = sysfs_emit(buf, "Interrupts enabled.\n");
- else
- rc = sysfs_emit(buf, "Interrupts disabled.\n");
+ } else {
+ status = ap_tapq(aq->qid, NULL);
+ if (status.irq_enabled)
+ rc = sysfs_emit(buf, "Interrupts enabled.\n");
+ else
+ rc = sysfs_emit(buf, "Interrupts disabled.\n");
+ }
spin_unlock_bh(&aq->lock);
+
return rc;
}
@@ -1032,7 +1045,6 @@ struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type)
if (ap_sb_available() && is_prot_virt_guest())
aq->ap_dev.device.groups = ap_queue_dev_sb_attr_groups;
aq->qid = qid;
- aq->interrupt = false;
spin_lock_init(&aq->lock);
INIT_LIST_HEAD(&aq->pendingq);
INIT_LIST_HEAD(&aq->requestq);
diff --git a/drivers/s390/crypto/zcrypt_card.c b/drivers/s390/crypto/zcrypt_card.c
index c815722d0ac8..050462d95222 100644
--- a/drivers/s390/crypto/zcrypt_card.c
+++ b/drivers/s390/crypto/zcrypt_card.c
@@ -52,7 +52,7 @@ static ssize_t online_show(struct device *dev,
{
struct zcrypt_card *zc = dev_get_drvdata(dev);
struct ap_card *ac = to_ap_card(dev);
- int online = ac->config && zc->online ? 1 : 0;
+ int online = ac->config && !ac->chkstop && zc->online ? 1 : 0;
return sysfs_emit(buf, "%d\n", online);
}
@@ -70,7 +70,7 @@ static ssize_t online_store(struct device *dev,
if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
return -EINVAL;
- if (online && !ac->config)
+ if (online && (!ac->config || ac->chkstop))
return -ENODEV;
zc->online = online;
diff --git a/drivers/s390/crypto/zcrypt_queue.c b/drivers/s390/crypto/zcrypt_queue.c
index 112a80e8e6c2..67d8e0ae0eec 100644
--- a/drivers/s390/crypto/zcrypt_queue.c
+++ b/drivers/s390/crypto/zcrypt_queue.c
@@ -42,7 +42,7 @@ static ssize_t online_show(struct device *dev,
{
struct zcrypt_queue *zq = dev_get_drvdata(dev);
struct ap_queue *aq = to_ap_queue(dev);
- int online = aq->config && zq->online ? 1 : 0;
+ int online = aq->config && !aq->chkstop && zq->online ? 1 : 0;
return sysfs_emit(buf, "%d\n", online);
}
@@ -59,7 +59,8 @@ static ssize_t online_store(struct device *dev,
if (sscanf(buf, "%d\n", &online) != 1 || online < 0 || online > 1)
return -EINVAL;
- if (online && (!aq->config || !aq->card->config))
+ if (online && (!aq->config || !aq->card->config ||
+ aq->chkstop || aq->card->chkstop))
return -ENODEV;
if (online && !zc->online)
return -EINVAL;
diff --git a/drivers/staging/greybus/pwm.c b/drivers/staging/greybus/pwm.c
index 57cc1960d059..a3cb68cfa0f9 100644
--- a/drivers/staging/greybus/pwm.c
+++ b/drivers/staging/greybus/pwm.c
@@ -258,7 +258,6 @@ static const struct pwm_ops gb_pwm_ops = {
.request = gb_pwm_request,
.free = gb_pwm_free,
.apply = gb_pwm_apply,
- .owner = THIS_MODULE,
};
static int gb_pwm_probe(struct gbphy_device *gbphy_dev,
diff --git a/drivers/watchdog/apple_wdt.c b/drivers/watchdog/apple_wdt.c
index eddeb0fede89..d4f739932f0b 100644
--- a/drivers/watchdog/apple_wdt.c
+++ b/drivers/watchdog/apple_wdt.c
@@ -173,6 +173,8 @@ static int apple_wdt_probe(struct platform_device *pdev)
if (!wdt->clk_rate)
return -EINVAL;
+ platform_set_drvdata(pdev, wdt);
+
wdt->wdd.ops = &apple_wdt_ops;
wdt->wdd.info = &apple_wdt_info;
wdt->wdd.max_timeout = U32_MAX / wdt->clk_rate;
@@ -190,6 +192,28 @@ static int apple_wdt_probe(struct platform_device *pdev)
return devm_watchdog_register_device(dev, &wdt->wdd);
}
+static int apple_wdt_resume(struct device *dev)
+{
+ struct apple_wdt *wdt = dev_get_drvdata(dev);
+
+ if (watchdog_active(&wdt->wdd) || watchdog_hw_running(&wdt->wdd))
+ apple_wdt_start(&wdt->wdd);
+
+ return 0;
+}
+
+static int apple_wdt_suspend(struct device *dev)
+{
+ struct apple_wdt *wdt = dev_get_drvdata(dev);
+
+ if (watchdog_active(&wdt->wdd) || watchdog_hw_running(&wdt->wdd))
+ apple_wdt_stop(&wdt->wdd);
+
+ return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(apple_wdt_pm_ops, apple_wdt_suspend, apple_wdt_resume);
+
static const struct of_device_id apple_wdt_of_match[] = {
{ .compatible = "apple,wdt" },
{},
@@ -200,6 +224,7 @@ static struct platform_driver apple_wdt_driver = {
.driver = {
.name = "apple-watchdog",
.of_match_table = apple_wdt_of_match,
+ .pm = pm_sleep_ptr(&apple_wdt_pm_ops),
},
.probe = apple_wdt_probe,
};
diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c
index b72a858bbac7..b4773a6aaf8c 100644
--- a/drivers/watchdog/aspeed_wdt.c
+++ b/drivers/watchdog/aspeed_wdt.c
@@ -79,6 +79,8 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
#define WDT_TIMEOUT_STATUS_BOOT_SECONDARY BIT(1)
#define WDT_CLEAR_TIMEOUT_STATUS 0x14
#define WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION BIT(0)
+#define WDT_RESET_MASK1 0x1c
+#define WDT_RESET_MASK2 0x20
/*
* WDT_RESET_WIDTH controls the characteristics of the external pulse (if
@@ -402,6 +404,8 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
if ((of_device_is_compatible(np, "aspeed,ast2500-wdt")) ||
(of_device_is_compatible(np, "aspeed,ast2600-wdt"))) {
+ u32 reset_mask[2];
+ size_t nrstmask = of_device_is_compatible(np, "aspeed,ast2600-wdt") ? 2 : 1;
u32 reg = readl(wdt->base + WDT_RESET_WIDTH);
reg &= wdt->cfg->ext_pulse_width_mask;
@@ -419,6 +423,13 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
reg |= WDT_OPEN_DRAIN_MAGIC;
writel(reg, wdt->base + WDT_RESET_WIDTH);
+
+ ret = of_property_read_u32_array(np, "aspeed,reset-mask", reset_mask, nrstmask);
+ if (!ret) {
+ writel(reset_mask[0], wdt->base + WDT_RESET_MASK1);
+ if (nrstmask > 1)
+ writel(reset_mask[1], wdt->base + WDT_RESET_MASK2);
+ }
}
if (!of_property_read_u32(np, "aspeed,ext-pulse-duration", &duration)) {
diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index fed7be246442..b111b28acb94 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -348,25 +348,21 @@ static int __init at91wdt_probe(struct platform_device *pdev)
if (IS_ERR(wdt->base))
return PTR_ERR(wdt->base);
- wdt->sclk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(wdt->sclk))
- return PTR_ERR(wdt->sclk);
-
- err = clk_prepare_enable(wdt->sclk);
- if (err) {
+ wdt->sclk = devm_clk_get_enabled(&pdev->dev, NULL);
+ if (IS_ERR(wdt->sclk)) {
dev_err(&pdev->dev, "Could not enable slow clock\n");
- return err;
+ return PTR_ERR(wdt->sclk);
}
if (pdev->dev.of_node) {
err = of_at91wdt_init(pdev->dev.of_node, wdt);
if (err)
- goto err_clk;
+ return err;
}
err = at91_wdt_init(pdev, wdt);
if (err)
- goto err_clk;
+ return err;
platform_set_drvdata(pdev, wdt);
@@ -374,11 +370,6 @@ static int __init at91wdt_probe(struct platform_device *pdev)
wdt->wdd.timeout, wdt->nowayout);
return 0;
-
-err_clk:
- clk_disable_unprepare(wdt->sclk);
-
- return err;
}
static int __exit at91wdt_remove(struct platform_device *pdev)
@@ -388,7 +379,6 @@ static int __exit at91wdt_remove(struct platform_device *pdev)
pr_warn("I quit now, hardware will probably reboot!\n");
del_timer(&wdt->timer);
- clk_disable_unprepare(wdt->sclk);
return 0;
}
diff --git a/drivers/watchdog/ath79_wdt.c b/drivers/watchdog/ath79_wdt.c
index b7b705060438..e5cc30622b12 100644
--- a/drivers/watchdog/ath79_wdt.c
+++ b/drivers/watchdog/ath79_wdt.c
@@ -257,19 +257,13 @@ static int ath79_wdt_probe(struct platform_device *pdev)
if (IS_ERR(wdt_base))
return PTR_ERR(wdt_base);
- wdt_clk = devm_clk_get(&pdev->dev, "wdt");
+ wdt_clk = devm_clk_get_enabled(&pdev->dev, "wdt");
if (IS_ERR(wdt_clk))
return PTR_ERR(wdt_clk);
- err = clk_prepare_enable(wdt_clk);
- if (err)
- return err;
-
wdt_freq = clk_get_rate(wdt_clk);
- if (!wdt_freq) {
- err = -EINVAL;
- goto err_clk_disable;
- }
+ if (!wdt_freq)
+ return -EINVAL;
max_timeout = (0xfffffffful / wdt_freq);
if (timeout < 1 || timeout > max_timeout) {
@@ -286,20 +280,15 @@ static int ath79_wdt_probe(struct platform_device *pdev)
if (err) {
dev_err(&pdev->dev,
"unable to register misc device, err=%d\n", err);
- goto err_clk_disable;
+ return err;
}
return 0;
-
-err_clk_disable:
- clk_disable_unprepare(wdt_clk);
- return err;
}
static void ath79_wdt_remove(struct platform_device *pdev)
{
misc_deregister(&ath79_wdt_miscdev);
- clk_disable_unprepare(wdt_clk);
}
static void ath79_wdt_shutdown(struct platform_device *pdev)
diff --git a/drivers/watchdog/gpio_wdt.c b/drivers/watchdog/gpio_wdt.c
index 0923201ce874..a7b814ea740b 100644
--- a/drivers/watchdog/gpio_wdt.c
+++ b/drivers/watchdog/gpio_wdt.c
@@ -5,12 +5,13 @@
* Author: 2013, Alexander Shiyan <[email protected]>
*/
-#include <linux/err.h>
#include <linux/delay.h>
-#include <linux/module.h>
+#include <linux/err.h>
#include <linux/gpio/consumer.h>
-#include <linux/of.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/watchdog.h>
static bool nowayout = WATCHDOG_NOWAYOUT;
@@ -106,7 +107,6 @@ static const struct watchdog_ops gpio_wdt_ops = {
static int gpio_wdt_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct device_node *np = dev->of_node;
struct gpio_wdt_priv *priv;
enum gpiod_flags gflags;
unsigned int hw_margin;
@@ -119,7 +119,7 @@ static int gpio_wdt_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, priv);
- ret = of_property_read_string(np, "hw_algo", &algo);
+ ret = device_property_read_string(dev, "hw_algo", &algo);
if (ret)
return ret;
if (!strcmp(algo, "toggle")) {
@@ -136,16 +136,14 @@ static int gpio_wdt_probe(struct platform_device *pdev)
if (IS_ERR(priv->gpiod))
return PTR_ERR(priv->gpiod);
- ret = of_property_read_u32(np,
- "hw_margin_ms", &hw_margin);
+ ret = device_property_read_u32(dev, "hw_margin_ms", &hw_margin);
if (ret)
return ret;
/* Disallow values lower than 2 and higher than 65535 ms */
if (hw_margin < 2 || hw_margin > 65535)
return -EINVAL;
- priv->always_running = of_property_read_bool(np,
- "always-running");
+ priv->always_running = device_property_read_bool(dev, "always-running");
watchdog_set_drvdata(&priv->wdd, priv);
diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c
index c703586c6e5f..b21d7a74a42d 100644
--- a/drivers/watchdog/imx7ulp_wdt.c
+++ b/drivers/watchdog/imx7ulp_wdt.c
@@ -23,6 +23,7 @@
#define LPO_CLK_SHIFT 8
#define WDOG_CS_CLK (LPO_CLK << LPO_CLK_SHIFT)
#define WDOG_CS_EN BIT(7)
+#define WDOG_CS_INT_EN BIT(6)
#define WDOG_CS_UPDATE BIT(5)
#define WDOG_CS_WAIT BIT(1)
#define WDOG_CS_STOP BIT(0)
@@ -62,6 +63,7 @@ struct imx7ulp_wdt_device {
void __iomem *base;
struct clk *clk;
bool post_rcs_wait;
+ bool ext_reset;
const struct imx_wdt_hw_feature *hw;
};
@@ -285,6 +287,9 @@ static int imx7ulp_wdt_init(struct imx7ulp_wdt_device *wdt, unsigned int timeout
if (wdt->hw->prescaler_enable)
val |= WDOG_CS_PRES;
+ if (wdt->ext_reset)
+ val |= WDOG_CS_INT_EN;
+
do {
ret = _imx7ulp_wdt_init(wdt, timeout, val);
toval = readl(wdt->base + WDOG_TOVAL);
@@ -321,6 +326,9 @@ static int imx7ulp_wdt_probe(struct platform_device *pdev)
return PTR_ERR(imx7ulp_wdt->clk);
}
+ /* The WDOG may need to do external reset through dedicated pin */
+ imx7ulp_wdt->ext_reset = of_property_read_bool(dev->of_node, "fsl,ext-reset-output");
+
imx7ulp_wdt->post_rcs_wait = true;
if (of_device_is_compatible(dev->of_node,
"fsl,imx8ulp-wdt")) {
diff --git a/drivers/watchdog/imx_sc_wdt.c b/drivers/watchdog/imx_sc_wdt.c
index 8ac021748d16..e51fe1b78518 100644
--- a/drivers/watchdog/imx_sc_wdt.c
+++ b/drivers/watchdog/imx_sc_wdt.c
@@ -34,6 +34,7 @@
#define SC_IRQ_WDOG 1
#define SC_IRQ_GROUP_WDOG 1
+#define SC_TIMER_ERR_BUSY 10
static bool nowayout = WATCHDOG_NOWAYOUT;
module_param(nowayout, bool, 0000);
@@ -61,7 +62,9 @@ static int imx_sc_wdt_start(struct watchdog_device *wdog)
arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_START_WDOG,
0, 0, 0, 0, 0, 0, &res);
- if (res.a0)
+
+ /* Ignore if already enabled(SC_TIMER_ERR_BUSY) */
+ if (res.a0 && res.a0 != SC_TIMER_ERR_BUSY)
return -EACCES;
arm_smccc_smc(IMX_SIP_TIMER, IMX_SIP_TIMER_SET_WDOG_ACT,
diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c
index bb1122909396..e888b1bdd1f2 100644
--- a/drivers/watchdog/it87_wdt.c
+++ b/drivers/watchdog/it87_wdt.c
@@ -13,9 +13,9 @@
* http://www.ite.com.tw/
*
* Support of the watchdog timers, which are available on
- * IT8607, IT8620, IT8622, IT8625, IT8628, IT8655, IT8665, IT8686,
- * IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726, IT8728,
- * IT8772, IT8783 and IT8784.
+ * IT8607, IT8613, IT8620, IT8622, IT8625, IT8628, IT8655, IT8665,
+ * IT8686, IT8702, IT8712, IT8716, IT8718, IT8720, IT8721, IT8726,
+ * IT8728, IT8772, IT8783 and IT8784.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -50,6 +50,7 @@
/* Chip Id numbers */
#define NO_DEV_ID 0xffff
#define IT8607_ID 0x8607
+#define IT8613_ID 0x8613
#define IT8620_ID 0x8620
#define IT8622_ID 0x8622
#define IT8625_ID 0x8625
@@ -277,6 +278,7 @@ static int __init it87_wdt_init(void)
max_units = 65535;
break;
case IT8607_ID:
+ case IT8613_ID:
case IT8620_ID:
case IT8622_ID:
case IT8625_ID:
diff --git a/drivers/watchdog/ixp4xx_wdt.c b/drivers/watchdog/ixp4xx_wdt.c
index 607ce4b8df57..ec0c08652ec2 100644
--- a/drivers/watchdog/ixp4xx_wdt.c
+++ b/drivers/watchdog/ixp4xx_wdt.c
@@ -105,6 +105,25 @@ static const struct watchdog_ops ixp4xx_wdt_ops = {
.owner = THIS_MODULE,
};
+/*
+ * The A0 version of the IXP422 had a bug in the watchdog making
+ * is useless, but we still need to use it to restart the system
+ * as it is the only way, so in this special case we register a
+ * "dummy" watchdog that doesn't really work, but will support
+ * the restart operation.
+ */
+static int ixp4xx_wdt_dummy(struct watchdog_device *wdd)
+{
+ return 0;
+}
+
+static const struct watchdog_ops ixp4xx_wdt_restart_only_ops = {
+ .start = ixp4xx_wdt_dummy,
+ .stop = ixp4xx_wdt_dummy,
+ .restart = ixp4xx_wdt_restart,
+ .owner = THIS_MODULE,
+};
+
static const struct watchdog_info ixp4xx_wdt_info = {
.options = WDIOF_KEEPALIVEPING
| WDIOF_MAGICCLOSE
@@ -114,14 +133,17 @@ static const struct watchdog_info ixp4xx_wdt_info = {
static int ixp4xx_wdt_probe(struct platform_device *pdev)
{
+ static const struct watchdog_ops *iwdt_ops;
struct device *dev = &pdev->dev;
struct ixp4xx_wdt *iwdt;
struct clk *clk;
int ret;
if (!(read_cpuid_id() & 0xf) && !cpu_is_ixp46x()) {
- dev_err(dev, "Rev. A0 IXP42x CPU detected - watchdog disabled\n");
- return -ENODEV;
+ dev_info(dev, "Rev. A0 IXP42x CPU detected - only restart supported\n");
+ iwdt_ops = &ixp4xx_wdt_restart_only_ops;
+ } else {
+ iwdt_ops = &ixp4xx_wdt_ops;
}
iwdt = devm_kzalloc(dev, sizeof(*iwdt), GFP_KERNEL);
@@ -141,7 +163,7 @@ static int ixp4xx_wdt_probe(struct platform_device *pdev)
iwdt->rate = IXP4XX_TIMER_FREQ;
iwdt->wdd.info = &ixp4xx_wdt_info;
- iwdt->wdd.ops = &ixp4xx_wdt_ops;
+ iwdt->wdd.ops = iwdt_ops;
iwdt->wdd.min_timeout = 1;
iwdt->wdd.max_timeout = U32_MAX / iwdt->rate;
iwdt->wdd.parent = dev;
diff --git a/drivers/watchdog/marvell_gti_wdt.c b/drivers/watchdog/marvell_gti_wdt.c
index d7eb8286e11e..098bb141a521 100644
--- a/drivers/watchdog/marvell_gti_wdt.c
+++ b/drivers/watchdog/marvell_gti_wdt.c
@@ -8,8 +8,8 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/module.h>
-#include <linux/of_platform.h>
#include <linux/platform_device.h>
+#include <linux/of.h>
#include <linux/watchdog.h>
/*
@@ -190,6 +190,13 @@ static int gti_wdt_set_pretimeout(struct watchdog_device *wdev,
struct gti_wdt_priv *priv = watchdog_get_drvdata(wdev);
struct watchdog_device *wdog_dev = &priv->wdev;
+ if (!timeout) {
+ /* Disable Interrupt */
+ writeq(GTI_CWD_INT_ENA_CLR_VAL(priv->wdt_timer_idx),
+ priv->base + GTI_CWD_INT_ENA_CLR);
+ return 0;
+ }
+
/* pretimeout should 1/3 of max_timeout */
if (timeout * 3 <= wdog_dev->max_timeout)
return gti_wdt_settimeout(wdev, timeout * 3);
@@ -271,7 +278,7 @@ static int gti_wdt_probe(struct platform_device *pdev)
&wdt_idx);
if (!err) {
if (wdt_idx >= priv->data->gti_num_timers)
- return dev_err_probe(&pdev->dev, err,
+ return dev_err_probe(&pdev->dev, -EINVAL,
"GTI wdog timer index not valid");
priv->wdt_timer_idx = wdt_idx;
@@ -292,6 +299,7 @@ static int gti_wdt_probe(struct platform_device *pdev)
/* Maximum timeout is 3 times the pretimeout */
wdog_dev->max_timeout = max_pretimeout * 3;
+ wdog_dev->max_hw_heartbeat_ms = max_pretimeout * 1000;
/* Minimum first timeout (pretimeout) is 1, so min_timeout as 3 */
wdog_dev->min_timeout = 3;
wdog_dev->timeout = wdog_dev->pretimeout;
@@ -308,7 +316,7 @@ static int gti_wdt_probe(struct platform_device *pdev)
irq = platform_get_irq(pdev, 0);
if (irq < 0)
- return dev_err_probe(&pdev->dev, irq, "IRQ resource not found\n");
+ return irq;
err = devm_request_irq(dev, irq, gti_wdt_interrupt, 0,
pdev->name, &priv->wdev);
diff --git a/drivers/watchdog/mlx_wdt.c b/drivers/watchdog/mlx_wdt.c
index 9c5b6616fc87..667e2c5b3431 100644
--- a/drivers/watchdog/mlx_wdt.c
+++ b/drivers/watchdog/mlx_wdt.c
@@ -39,6 +39,7 @@
* @tleft_idx: index for direct access to time left register;
* @ping_idx: index for direct access to ping register;
* @reset_idx: index for direct access to reset cause register;
+ * @regmap_val_sz: size of value in register map;
* @wd_type: watchdog HW type;
*/
struct mlxreg_wdt {
diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c
index 05657dc1d36a..352853e6fe71 100644
--- a/drivers/watchdog/of_xilinx_wdt.c
+++ b/drivers/watchdog/of_xilinx_wdt.c
@@ -187,7 +187,7 @@ static int xwdt_probe(struct platform_device *pdev)
watchdog_set_nowayout(xilinx_wdt_wdd, enable_once);
- xdev->clk = devm_clk_get_enabled(dev, NULL);
+ xdev->clk = devm_clk_get_prepared(dev, NULL);
if (IS_ERR(xdev->clk)) {
if (PTR_ERR(xdev->clk) != -ENOENT)
return PTR_ERR(xdev->clk);
@@ -218,18 +218,25 @@ static int xwdt_probe(struct platform_device *pdev)
spin_lock_init(&xdev->spinlock);
watchdog_set_drvdata(xilinx_wdt_wdd, xdev);
+ rc = clk_enable(xdev->clk);
+ if (rc) {
+ dev_err(dev, "unable to enable clock\n");
+ return rc;
+ }
+
rc = xwdt_selftest(xdev);
if (rc == XWT_TIMER_FAILED) {
dev_err(dev, "SelfTest routine error\n");
+ clk_disable(xdev->clk);
return rc;
}
+ clk_disable(xdev->clk);
+
rc = devm_watchdog_register_device(dev, xilinx_wdt_wdd);
if (rc)
return rc;
- clk_disable(xdev->clk);
-
dev_info(dev, "Xilinx Watchdog Timer with timeout %ds\n",
xilinx_wdt_wdd->timeout);
diff --git a/drivers/watchdog/sbsa_gwdt.c b/drivers/watchdog/sbsa_gwdt.c
index 421ebcda62e6..5f23913ce3b4 100644
--- a/drivers/watchdog/sbsa_gwdt.c
+++ b/drivers/watchdog/sbsa_gwdt.c
@@ -152,14 +152,14 @@ static int sbsa_gwdt_set_timeout(struct watchdog_device *wdd,
timeout = clamp_t(unsigned int, timeout, 1, wdd->max_hw_heartbeat_ms / 1000);
if (action)
- sbsa_gwdt_reg_write(gwdt->clk * timeout, gwdt);
+ sbsa_gwdt_reg_write((u64)gwdt->clk * timeout, gwdt);
else
/*
* In the single stage mode, The first signal (WS0) is ignored,
* the timeout is (WOR * 2), so the WOR should be configured
* to half value of timeout.
*/
- sbsa_gwdt_reg_write(gwdt->clk / 2 * timeout, gwdt);
+ sbsa_gwdt_reg_write(((u64)gwdt->clk / 2) * timeout, gwdt);
return 0;
}
diff --git a/drivers/watchdog/st_lpc_wdt.c b/drivers/watchdog/st_lpc_wdt.c
index d2aa43c00221..4c5b8d98a4f3 100644
--- a/drivers/watchdog/st_lpc_wdt.c
+++ b/drivers/watchdog/st_lpc_wdt.c
@@ -15,7 +15,6 @@
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/watchdog.h>
@@ -42,7 +41,7 @@ struct st_wdog {
void __iomem *base;
struct device *dev;
struct regmap *regmap;
- struct st_wdog_syscfg *syscfg;
+ const struct st_wdog_syscfg *syscfg;
struct clk *clk;
unsigned long clkrate;
bool warm_reset;
@@ -150,7 +149,6 @@ static void st_clk_disable_unprepare(void *data)
static int st_wdog_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- const struct of_device_id *match;
struct device_node *np = dev->of_node;
struct st_wdog *st_wdog;
struct regmap *regmap;
@@ -173,12 +171,7 @@ static int st_wdog_probe(struct platform_device *pdev)
if (!st_wdog)
return -ENOMEM;
- match = of_match_device(st_wdog_match, dev);
- if (!match) {
- dev_err(dev, "Couldn't match device\n");
- return -ENODEV;
- }
- st_wdog->syscfg = (struct st_wdog_syscfg *)match->data;
+ st_wdog->syscfg = (struct st_wdog_syscfg *)device_get_match_data(dev);
base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(base))
diff --git a/drivers/watchdog/sunplus_wdt.c b/drivers/watchdog/sunplus_wdt.c
index e2d8c532bcb1..9d3ca848e8b6 100644
--- a/drivers/watchdog/sunplus_wdt.c
+++ b/drivers/watchdog/sunplus_wdt.c
@@ -136,11 +136,6 @@ static const struct watchdog_ops sp_wdt_ops = {
.restart = sp_wdt_restart,
};
-static void sp_clk_disable_unprepare(void *data)
-{
- clk_disable_unprepare(data);
-}
-
static void sp_reset_control_assert(void *data)
{
reset_control_assert(data);
@@ -156,17 +151,9 @@ static int sp_wdt_probe(struct platform_device *pdev)
if (!priv)
return -ENOMEM;
- priv->clk = devm_clk_get(dev, NULL);
+ priv->clk = devm_clk_get_enabled(dev, NULL);
if (IS_ERR(priv->clk))
- return dev_err_probe(dev, PTR_ERR(priv->clk), "Failed to get clock\n");
-
- ret = clk_prepare_enable(priv->clk);
- if (ret)
- return dev_err_probe(dev, ret, "Failed to enable clock\n");
-
- ret = devm_add_action_or_reset(dev, sp_clk_disable_unprepare, priv->clk);
- if (ret)
- return ret;
+ return dev_err_probe(dev, PTR_ERR(priv->clk), "Failed to enable clock\n");
/* The timer and watchdog shared the STC reset */
priv->rstc = devm_reset_control_get_shared(dev, NULL);
diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
index 0ba99bed59fc..650fdc7996e1 100644
--- a/drivers/watchdog/wdat_wdt.c
+++ b/drivers/watchdog/wdat_wdt.c
@@ -269,7 +269,7 @@ static int wdat_wdt_stop(struct watchdog_device *wdd)
static int wdat_wdt_ping(struct watchdog_device *wdd)
{
- return wdat_wdt_run_action(to_wdat_wdt(wdd), ACPI_WDAT_RESET, 0, NULL);
+ return wdat_wdt_run_action(to_wdat_wdt(wdd), ACPI_WDAT_RESET, wdd->timeout, NULL);
}
static int wdat_wdt_set_timeout(struct watchdog_device *wdd,
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 02c4e2937879..bfdfafe00993 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -295,6 +295,7 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (attr->ia_valid & ATTR_SIZE)
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
+ setattr_copy(&nop_mnt_idmap, inode, attr);
exfat_truncate_inode_atime(inode);
if (attr->ia_valid & ATTR_SIZE) {
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 875234179d1f..e7ff58b8e68c 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -56,18 +56,18 @@ int __exfat_write_inode(struct inode *inode, int sync)
&ep->dentry.file.create_time,
&ep->dentry.file.create_date,
&ep->dentry.file.create_time_cs);
+ ts = inode_get_mtime(inode);
exfat_set_entry_time(sbi, &ts,
&ep->dentry.file.modify_tz,
&ep->dentry.file.modify_time,
&ep->dentry.file.modify_date,
&ep->dentry.file.modify_time_cs);
- inode_set_mtime_to_ts(inode, ts);
+ ts = inode_get_atime(inode);
exfat_set_entry_time(sbi, &ts,
&ep->dentry.file.access_tz,
&ep->dentry.file.access_time,
&ep->dentry.file.access_date,
NULL);
- inode_set_atime_to_ts(inode, ts);
/* File size should be zero if there is no cluster allocated */
on_disk_size = i_size_read(inode);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 7df2503cef6c..01ac733a6320 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -125,7 +125,7 @@ config PNFS_BLOCK
config PNFS_FLEXFILE_LAYOUT
tristate
- depends on NFS_V4_1 && NFS_V3
+ depends on NFS_V4_1
default NFS_V4
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index cf7365581031..fa1a14def45c 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -448,6 +448,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
delegation->cred = get_cred(cred);
delegation->inode = inode;
delegation->flags = 1<<NFS_DELEGATION_REFERENCED;
+ delegation->test_gen = 0;
spin_lock_init(&delegation->lock);
spin_lock(&clp->cl_lock);
@@ -1294,6 +1295,8 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server,
struct inode *inode;
const struct cred *cred;
nfs4_stateid stateid;
+ unsigned long gen = ++server->delegation_gen;
+
restart:
rcu_read_lock();
restart_locked:
@@ -1303,7 +1306,8 @@ restart_locked:
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_TEST_EXPIRED,
- &delegation->flags) == 0)
+ &delegation->flags) == 0 ||
+ delegation->test_gen == gen)
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
@@ -1312,6 +1316,7 @@ restart_locked:
cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
spin_unlock(&delegation->lock);
+ delegation->test_gen = gen;
clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
rcu_read_unlock();
nfs_delegation_test_free_expired(inode, &stateid, cred);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 1c378992b7c0..a6f495d012cf 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -21,6 +21,7 @@ struct nfs_delegation {
fmode_t type;
unsigned long pagemod_limit;
__u64 change_attr;
+ unsigned long test_gen;
unsigned long flags;
refcount_t refcount;
spinlock_t lock;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index e6a51fd94fea..13dffe4201e6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2532,7 +2532,7 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, const char *symname)
{
- struct page *page;
+ struct folio *folio;
char *kaddr;
struct iattr attr;
unsigned int pathlen = strlen(symname);
@@ -2547,24 +2547,24 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
attr.ia_mode = S_IFLNK | S_IRWXUGO;
attr.ia_valid = ATTR_MODE;
- page = alloc_page(GFP_USER);
- if (!page)
+ folio = folio_alloc(GFP_USER, 0);
+ if (!folio)
return -ENOMEM;
- kaddr = page_address(page);
+ kaddr = folio_address(folio);
memcpy(kaddr, symname, pathlen);
if (pathlen < PAGE_SIZE)
memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
trace_nfs_symlink_enter(dir, dentry);
- error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
+ error = NFS_PROTO(dir)->symlink(dir, dentry, folio, pathlen, &attr);
trace_nfs_symlink_exit(dir, dentry, error);
if (error != 0) {
dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
dir->i_sb->s_id, dir->i_ino,
dentry, symname, error);
d_drop(dentry);
- __free_page(page);
+ folio_put(folio);
return error;
}
@@ -2574,18 +2574,13 @@ int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
* No big deal if we can't add this page to the page cache here.
* READLINK will get the missing page from the server if needed.
*/
- if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
- GFP_KERNEL)) {
- SetPageUptodate(page);
- unlock_page(page);
- /*
- * add_to_page_cache_lru() grabs an extra page refcount.
- * Drop it here to avoid leaking this page later.
- */
- put_page(page);
- } else
- __free_page(page);
+ if (filemap_add_folio(d_inode(dentry)->i_mapping, folio, 0,
+ GFP_KERNEL) == 0) {
+ folio_mark_uptodate(folio);
+ folio_unlock(folio);
+ }
+ folio_put(folio);
return 0;
}
EXPORT_SYMBOL_GPL(nfs_symlink);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 4bf208a0a8e9..2de66e4e8280 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -543,9 +543,10 @@ out:
}
static int
-nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+nfs3_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio,
unsigned int len, struct iattr *sattr)
{
+ struct page *page = &folio->page;
struct nfs3_createdata *data;
struct dentry *d_alias;
int status = -ENOMEM;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 827d00e2f094..581698f1b7b2 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -209,6 +209,7 @@ struct nfs4_exception {
struct inode *inode;
nfs4_stateid *stateid;
long timeout;
+ unsigned short retrans;
unsigned char task_is_privileged : 1;
unsigned char delay : 1,
recovering : 1,
@@ -546,6 +547,7 @@ extern unsigned short max_session_slots;
extern unsigned short max_session_cb_slots;
extern unsigned short send_implementation_id;
extern bool recover_lost_locks;
+extern short nfs_delay_retrans;
#define NFS4_CLIENT_ID_UNIQ_LEN (64)
extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a654d7234f51..8a943fffaad5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -585,6 +585,21 @@ wait_on_recovery:
return 0;
}
+/*
+ * Track the number of NFS4ERR_DELAY related retransmissions and return
+ * EAGAIN if the 'softerr' mount option is set, and we've exceeded the limit
+ * set by 'nfs_delay_retrans'.
+ */
+static int nfs4_exception_should_retrans(const struct nfs_server *server,
+ struct nfs4_exception *exception)
+{
+ if (server->flags & NFS_MOUNT_SOFTERR && nfs_delay_retrans >= 0) {
+ if (exception->retrans++ >= (unsigned short)nfs_delay_retrans)
+ return -EAGAIN;
+ }
+ return 0;
+}
+
/* This is the error handling routine for processes that are allowed
* to sleep.
*/
@@ -595,6 +610,11 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
+ int ret2 = nfs4_exception_should_retrans(server, exception);
+ if (ret2 < 0) {
+ exception->retry = 0;
+ return ret2;
+ }
ret = nfs4_delay(&exception->timeout,
exception->interruptible);
goto out_retry;
@@ -623,6 +643,11 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
+ int ret2 = nfs4_exception_should_retrans(server, exception);
+ if (ret2 < 0) {
+ exception->retry = 0;
+ return ret2;
+ }
rpc_delay(task, nfs4_update_delay(&exception->timeout));
goto out_retry;
}
@@ -5011,9 +5036,10 @@ static void nfs4_free_createdata(struct nfs4_createdata *data)
}
static int _nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
- struct page *page, unsigned int len, struct iattr *sattr,
+ struct folio *folio, unsigned int len, struct iattr *sattr,
struct nfs4_label *label)
{
+ struct page *page = &folio->page;
struct nfs4_createdata *data;
int status = -ENAMETOOLONG;
@@ -5038,7 +5064,7 @@ out:
}
static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
- struct page *page, unsigned int len, struct iattr *sattr)
+ struct folio *folio, unsigned int len, struct iattr *sattr)
{
struct nfs4_exception exception = {
.interruptible = true,
@@ -5049,7 +5075,7 @@ static int nfs4_proc_symlink(struct inode *dir, struct dentry *dentry,
label = nfs4_label_init_security(dir, dentry, sattr, &l);
do {
- err = _nfs4_proc_symlink(dir, dentry, page, len, sattr, label);
+ err = _nfs4_proc_symlink(dir, dentry, folio, len, sattr, label);
trace_nfs4_symlink(dir, &dentry->d_name, err);
err = nfs4_handle_exception(NFS_SERVER(dir), err,
&exception);
@@ -5622,7 +5648,7 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
- nfs4_state_protect_write(server->nfs_client, clnt, msg, hdr);
+ nfs4_state_protect_write(hdr->ds_clp ? hdr->ds_clp : server->nfs_client, clnt, msg, hdr);
}
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
@@ -5663,7 +5689,8 @@ static void nfs4_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
data->res.server = server;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1, 0);
- nfs4_state_protect(server->nfs_client, NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
+ nfs4_state_protect(data->ds_clp ? data->ds_clp : server->nfs_client,
+ NFS_SP4_MACH_CRED_COMMIT, clnt, msg);
}
static int _nfs4_proc_commit(struct file *dst, struct nfs_commitargs *args,
@@ -8934,6 +8961,7 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
sp4_how = (adata->clp->cl_sp4_flags == 0 ? SP4_NONE : SP4_MACH_CRED);
+try_again:
/* Test connection for session trunking. Async exchange_id call */
task = nfs4_run_exchange_id(adata->clp, adata->cred, sp4_how, xprt);
if (IS_ERR(task))
@@ -8946,11 +8974,15 @@ void nfs4_test_session_trunk(struct rpc_clnt *clnt, struct rpc_xprt *xprt,
if (status == 0)
rpc_clnt_xprt_switch_add_xprt(clnt, xprt);
- else if (rpc_clnt_xprt_switch_has_addr(clnt,
+ else if (status != -NFS4ERR_DELAY && rpc_clnt_xprt_switch_has_addr(clnt,
(struct sockaddr *)&xprt->addr))
rpc_clnt_xprt_switch_remove_xprt(clnt, xprt);
rpc_put_task(task);
+ if (status == -NFS4ERR_DELAY) {
+ ssleep(1);
+ goto try_again;
+ }
}
EXPORT_SYMBOL_GPL(nfs4_test_session_trunk);
@@ -9621,6 +9653,9 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
nfs4_sequence_free_slot(&lgp->res.seq_res);
+ exception->state = NULL;
+ exception->stateid = NULL;
+
switch (nfs4err) {
case 0:
goto out;
@@ -9716,7 +9751,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
};
struct pnfs_layout_segment *
-nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
+nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
+ struct nfs4_exception *exception)
{
struct inode *inode = lgp->args.inode;
struct nfs_server *server = NFS_SERVER(inode);
@@ -9736,13 +9772,10 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
RPC_TASK_MOVEABLE,
};
struct pnfs_layout_segment *lseg = NULL;
- struct nfs4_exception exception = {
- .inode = inode,
- .timeout = *timeout,
- };
int status = 0;
nfs4_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0, 0);
+ exception->retry = 0;
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
@@ -9753,11 +9786,12 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
goto out;
if (task->tk_status < 0) {
- status = nfs4_layoutget_handle_exception(task, lgp, &exception);
- *timeout = exception.timeout;
+ exception->retry = 1;
+ status = nfs4_layoutget_handle_exception(task, lgp, exception);
} else if (lgp->res.layoutp->len == 0) {
+ exception->retry = 1;
status = -EAGAIN;
- *timeout = nfs4_update_delay(&exception.timeout);
+ nfs4_update_delay(&exception->timeout);
} else
lseg = pnfs_layout_process(lgp);
out:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 84343aefbbd6..21a365357629 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1980,7 +1980,9 @@ pnfs_update_layout(struct inode *ino,
struct pnfs_layout_segment *lseg = NULL;
struct nfs4_layoutget *lgp;
nfs4_stateid stateid;
- long timeout = 0;
+ struct nfs4_exception exception = {
+ .inode = ino,
+ };
unsigned long giveup = jiffies + (clp->cl_lease_time << 1);
bool first;
@@ -2144,7 +2146,7 @@ lookup_again:
lgp->lo = lo;
pnfs_get_layout_hdr(lo);
- lseg = nfs4_proc_layoutget(lgp, &timeout);
+ lseg = nfs4_proc_layoutget(lgp, &exception);
trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET);
nfs_layoutget_end(lo);
@@ -2171,6 +2173,8 @@ lookup_again:
goto out_put_layout_hdr;
}
if (lseg) {
+ if (!exception.retry)
+ goto out_put_layout_hdr;
if (first)
pnfs_clear_first_layoutget(lo);
trace_pnfs_update_layout(ino, pos, count,
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d886c8226d8f..db57a85500ee 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -35,6 +35,7 @@
#include <linux/nfs_page.h>
#include <linux/workqueue.h>
+struct nfs4_exception;
struct nfs4_opendata;
enum {
@@ -245,7 +246,9 @@ extern size_t max_response_pages(struct nfs_server *server);
extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
struct pnfs_device *dev,
const struct cred *cred);
-extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout);
+extern struct pnfs_layout_segment *
+nfs4_proc_layoutget(struct nfs4_layoutget *lgp,
+ struct nfs4_exception *exception);
extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp, bool sync);
/* pnfs.c */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e3570c656b0f..ad3a321ae997 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -396,9 +396,10 @@ nfs_proc_link(struct inode *inode, struct inode *dir, const struct qstr *name)
}
static int
-nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct page *page,
+nfs_proc_symlink(struct inode *dir, struct dentry *dentry, struct folio *folio,
unsigned int len, struct iattr *sattr)
{
+ struct page *page = &folio->page;
struct nfs_fh *fh;
struct nfs_fattr *fattr;
struct nfs_symlinkargs arg = {
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2667ab753d42..075b31c93f87 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1371,6 +1371,7 @@ unsigned short max_session_cb_slots = NFS4_DEF_CB_SLOT_TABLE_SIZE;
unsigned short send_implementation_id = 1;
char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
bool recover_lost_locks = false;
+short nfs_delay_retrans = -1;
EXPORT_SYMBOL_GPL(nfs_callback_nr_threads);
EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
@@ -1381,6 +1382,7 @@ EXPORT_SYMBOL_GPL(max_session_cb_slots);
EXPORT_SYMBOL_GPL(send_implementation_id);
EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier);
EXPORT_SYMBOL_GPL(recover_lost_locks);
+EXPORT_SYMBOL_GPL(nfs_delay_retrans);
#define NFS_CALLBACK_MAXPORTNR (65535U)
@@ -1429,5 +1431,9 @@ MODULE_PARM_DESC(recover_lost_locks,
"If the server reports that a lock might be lost, "
"try to recover it risking data corruption.");
-
+module_param_named(delay_retrans, nfs_delay_retrans, short, 0644);
+MODULE_PARM_DESC(delay_retrans,
+ "Unless negative, specifies the number of times the NFSv4 "
+ "client retries a request before returning an EAGAIN error, "
+ "after a reply of NFS4ERR_DELAY from the server.");
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9d82d50ce0b1..b664caea8b4e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -739,6 +739,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
&pgio);
pgio.pg_error = 0;
nfs_pageio_complete(&pgio);
+ if (err == -EAGAIN && mntflags & NFS_MOUNT_SOFTERR)
+ break;
} while (err < 0 && !nfs_error_is_fatal(err));
nfs_io_completion_put(ioc);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 30c931b38853..be62acffad6c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -21,7 +21,7 @@
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_quota.h"
@@ -2989,7 +2989,7 @@ xfs_bmap_extsize_align(
* If realtime, and the result isn't a multiple of the realtime
* extent size we need to remove blocks until it is.
*/
- if (rt && (temp = (align_alen % mp->m_sb.sb_rextsize))) {
+ if (rt && (temp = xfs_extlen_to_rtxmod(mp, align_alen))) {
/*
* We're not covering the original request, or
* we won't be able to once we fix the length.
@@ -3016,7 +3016,7 @@ xfs_bmap_extsize_align(
else {
align_alen -= orig_off - align_off;
align_off = orig_off;
- align_alen -= align_alen % mp->m_sb.sb_rextsize;
+ align_alen -= xfs_extlen_to_rtxmod(mp, align_alen);
}
/*
* Result doesn't cover the request, fail it.
@@ -4826,12 +4826,8 @@ xfs_bmap_del_extent_delay(
ASSERT(got->br_startoff <= del->br_startoff);
ASSERT(got_endoff >= del_endoff);
- if (isrt) {
- uint64_t rtexts = XFS_FSB_TO_B(mp, del->br_blockcount);
-
- do_div(rtexts, mp->m_sb.sb_rextsize);
- xfs_mod_frextents(mp, rtexts);
- }
+ if (isrt)
+ xfs_mod_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
/*
* Update the inode delalloc counter now and wait to update the
@@ -5057,33 +5053,20 @@ xfs_bmap_del_extent_real(
flags = XFS_ILOG_CORE;
if (whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip)) {
- xfs_filblks_t len;
- xfs_extlen_t mod;
-
- len = div_u64_rem(del->br_blockcount, mp->m_sb.sb_rextsize,
- &mod);
- ASSERT(mod == 0);
-
if (!(bflags & XFS_BMAPI_REMAP)) {
- xfs_fsblock_t bno;
-
- bno = div_u64_rem(del->br_startblock,
- mp->m_sb.sb_rextsize, &mod);
- ASSERT(mod == 0);
-
- error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+ error = xfs_rtfree_blocks(tp, del->br_startblock,
+ del->br_blockcount);
if (error)
goto done;
}
do_fx = 0;
- nblks = len * mp->m_sb.sb_rextsize;
qfield = XFS_TRANS_DQ_RTBCOUNT;
} else {
do_fx = 1;
- nblks = del->br_blockcount;
qfield = XFS_TRANS_DQ_BCOUNT;
}
+ nblks = del->br_blockcount;
del_endblock = del->br_startblock + del->br_blockcount;
if (cur) {
@@ -5289,7 +5272,6 @@ __xfs_bunmapi(
int tmp_logflags; /* partial logging flags */
int wasdel; /* was a delayed alloc extent */
int whichfork; /* data or attribute fork */
- xfs_fsblock_t sum;
xfs_filblks_t len = *rlen; /* length to unmap in file */
xfs_fileoff_t end;
struct xfs_iext_cursor icur;
@@ -5384,8 +5366,8 @@ __xfs_bunmapi(
if (!isrt)
goto delete;
- sum = del.br_startblock + del.br_blockcount;
- div_u64_rem(sum, mp->m_sb.sb_rextsize, &mod);
+ mod = xfs_rtb_to_rtxoff(mp,
+ del.br_startblock + del.br_blockcount);
if (mod) {
/*
* Realtime extent not lined up at the end.
@@ -5432,7 +5414,8 @@ __xfs_bunmapi(
goto error0;
goto nodelete;
}
- div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
+
+ mod = xfs_rtb_to_rtxoff(mp, del.br_startblock);
if (mod) {
xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
@@ -6209,8 +6192,8 @@ xfs_bmap_validate_extent(
return __this_address;
if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
- if (!xfs_verify_rtext(mp, irec->br_startblock,
- irec->br_blockcount))
+ if (!xfs_verify_rtbext(mp, irec->br_startblock,
+ irec->br_blockcount))
return __this_address;
} else {
if (!xfs_verify_fsbext(mp, irec->br_startblock,
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 371dc07233e0..9a88aba1589f 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -98,7 +98,7 @@ typedef struct xfs_sb {
uint32_t sb_blocksize; /* logical block size, bytes */
xfs_rfsblock_t sb_dblocks; /* number of data blocks */
xfs_rfsblock_t sb_rblocks; /* number of realtime blocks */
- xfs_rtblock_t sb_rextents; /* number of realtime extents */
+ xfs_rtbxlen_t sb_rextents; /* number of realtime extents */
uuid_t sb_uuid; /* user-visible file system unique id */
xfs_fsblock_t sb_logstart; /* starting block of log if internal */
xfs_ino_t sb_rootino; /* root inode number */
@@ -691,6 +691,22 @@ struct xfs_agfl {
xfs_daddr_to_agno(mp, (d) + (len) - 1)))
/*
+ * Realtime bitmap information is accessed by the word, which is currently
+ * stored in host-endian format.
+ */
+union xfs_rtword_raw {
+ __u32 old;
+};
+
+/*
+ * Realtime summary counts are accessed by the word, which is currently
+ * stored in host-endian format.
+ */
+union xfs_suminfo_raw {
+ __u32 old;
+};
+
+/*
* XFS Timestamps
* ==============
*
@@ -1142,24 +1158,10 @@ static inline bool xfs_dinode_has_large_extent_counts(
#define XFS_BLOCKSIZE(mp) ((mp)->m_sb.sb_blocksize)
#define XFS_BLOCKMASK(mp) ((mp)->m_blockmask)
-#define XFS_BLOCKWSIZE(mp) ((mp)->m_blockwsize)
-#define XFS_BLOCKWMASK(mp) ((mp)->m_blockwmask)
/*
- * RT Summary and bit manipulation macros.
+ * RT bit manipulation macros.
*/
-#define XFS_SUMOFFS(mp,ls,bb) ((int)((ls) * (mp)->m_sb.sb_rbmblocks + (bb)))
-#define XFS_SUMOFFSTOBLOCK(mp,s) \
- (((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
-#define XFS_SUMPTR(mp,bp,so) \
- ((xfs_suminfo_t *)((bp)->b_addr + \
- (((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
-
-#define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log)
-#define XFS_BLOCKTOBIT(mp,bb) ((bb) << (mp)->m_blkbit_log)
-#define XFS_BITTOWORD(mp,bi) \
- ((int)(((bi) >> XFS_NBWORDLOG) & XFS_BLOCKWMASK(mp)))
-
#define XFS_RTMIN(a,b) ((a) < (b) ? (a) : (b))
#define XFS_RTMAX(a,b) ((a) > (b) ? (a) : (b))
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 396648acb5be..c269d704314d 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -16,6 +16,7 @@
#include "xfs_trans.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
+#include "xfs_rtbitmap.h"
/*
* Realtime allocator bitmap functions shared with userspace.
@@ -46,25 +47,69 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
.verify_write = xfs_rtbuf_verify_write,
};
+/* Release cached rt bitmap and summary buffers. */
+void
+xfs_rtbuf_cache_relse(
+ struct xfs_rtalloc_args *args)
+{
+ if (args->rbmbp) {
+ xfs_trans_brelse(args->tp, args->rbmbp);
+ args->rbmbp = NULL;
+ args->rbmoff = NULLFILEOFF;
+ }
+ if (args->sumbp) {
+ xfs_trans_brelse(args->tp, args->sumbp);
+ args->sumbp = NULL;
+ args->sumoff = NULLFILEOFF;
+ }
+}
+
/*
* Get a buffer for the bitmap or summary file block specified.
* The buffer is returned read and locked.
*/
int
xfs_rtbuf_get(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t block, /* block number in bitmap or summary */
- int issum, /* is summary not bitmap */
- struct xfs_buf **bpp) /* output: buffer for the block */
+ struct xfs_rtalloc_args *args,
+ xfs_fileoff_t block, /* block number in bitmap or summary */
+ int issum) /* is summary not bitmap */
{
- struct xfs_buf *bp; /* block buffer, result */
- xfs_inode_t *ip; /* bitmap or summary inode */
- xfs_bmbt_irec_t map;
- int nmap = 1;
- int error; /* error value */
+ struct xfs_mount *mp = args->mp;
+ struct xfs_buf **cbpp; /* cached block buffer */
+ xfs_fileoff_t *coffp; /* cached block number */
+ struct xfs_buf *bp; /* block buffer, result */
+ struct xfs_inode *ip; /* bitmap or summary inode */
+ struct xfs_bmbt_irec map;
+ enum xfs_blft type;
+ int nmap = 1;
+ int error;
- ip = issum ? mp->m_rsumip : mp->m_rbmip;
+ if (issum) {
+ cbpp = &args->sumbp;
+ coffp = &args->sumoff;
+ ip = mp->m_rsumip;
+ type = XFS_BLFT_RTSUMMARY_BUF;
+ } else {
+ cbpp = &args->rbmbp;
+ coffp = &args->rbmoff;
+ ip = mp->m_rbmip;
+ type = XFS_BLFT_RTBITMAP_BUF;
+ }
+
+ /*
+ * If we have a cached buffer, and the block number matches, use that.
+ */
+ if (*cbpp && *coffp == block)
+ return 0;
+
+ /*
+ * Otherwise we have to have to get the buffer. If there was an old
+ * one, get rid of it first.
+ */
+ if (*cbpp) {
+ xfs_trans_brelse(args->tp, *cbpp);
+ *cbpp = NULL;
+ }
error = xfs_bmapi_read(ip, block, 1, &map, &nmap, 0);
if (error)
@@ -74,15 +119,15 @@ xfs_rtbuf_get(
return -EFSCORRUPTED;
ASSERT(map.br_startblock != NULLFSBLOCK);
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+ error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map.br_startblock),
mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
if (error)
return error;
- xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF
- : XFS_BLFT_RTBITMAP_BUF);
- *bpp = bp;
+ xfs_trans_buf_set_type(args->tp, bp, type);
+ *cbpp = bp;
+ *coffp = block;
return 0;
}
@@ -92,47 +137,44 @@ xfs_rtbuf_get(
*/
int
xfs_rtfind_back(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to look at */
- xfs_rtblock_t limit, /* last block to look at */
- xfs_rtblock_t *rtblock) /* out: start block found */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* starting rtext to look at */
+ xfs_rtxnum_t limit, /* last rtext to look at */
+ xfs_rtxnum_t *rtx) /* out: start rtext found */
{
- xfs_rtword_t *b; /* current word in buffer */
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- struct xfs_buf *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* starting word in buffer */
- int error; /* error value */
- xfs_rtblock_t firstbit; /* first useful bit in the word */
- xfs_rtblock_t i; /* current bit number rel. to start */
- xfs_rtblock_t len; /* length of inspected area */
- xfs_rtword_t mask; /* mask of relevant bits for value */
- xfs_rtword_t want; /* mask for "good" values */
- xfs_rtword_t wdiff; /* difference from wanted value */
- int word; /* word number in the buffer */
+ struct xfs_mount *mp = args->mp;
+ int bit; /* bit number in the word */
+ xfs_fileoff_t block; /* bitmap block number */
+ int error; /* error value */
+ xfs_rtxnum_t firstbit; /* first useful bit in the word */
+ xfs_rtxnum_t i; /* current bit number rel. to start */
+ xfs_rtxnum_t len; /* length of inspected area */
+ xfs_rtword_t mask; /* mask of relevant bits for value */
+ xfs_rtword_t want; /* mask for "good" values */
+ xfs_rtword_t wdiff; /* difference from wanted value */
+ xfs_rtword_t incore;
+ unsigned int word; /* word number in the buffer */
/*
* Compute and read in starting bitmap block for starting block.
*/
- block = XFS_BITTOBLOCK(mp, start);
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- if (error) {
+ block = xfs_rtx_to_rbmblock(mp, start);
+ error = xfs_rtbitmap_read_buf(args, block);
+ if (error)
return error;
- }
- bufp = bp->b_addr;
+
/*
* Get the first word's index & point to it.
*/
- word = XFS_BITTOWORD(mp, start);
- b = &bufp[word];
+ word = xfs_rtx_to_rbmword(mp, start);
bit = (int)(start & (XFS_NBWORD - 1));
len = start - limit + 1;
/*
* Compute match value, based on the bit at start: if 1 (free)
* then all-ones, else all-zeroes.
*/
- want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+ incore = xfs_rtbitmap_getword(args, word);
+ want = (incore & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
/*
* If the starting position is not word-aligned, deal with the
* partial word.
@@ -149,13 +191,12 @@ xfs_rtfind_back(
* Calculate the difference between the value there
* and what we're looking for.
*/
- if ((wdiff = (*b ^ want) & mask)) {
+ if ((wdiff = (incore ^ want) & mask)) {
/*
* Different. Mark where we are and return.
*/
- xfs_trans_brelse(tp, bp);
i = bit - XFS_RTHIBIT(wdiff);
- *rtblock = start - i + 1;
+ *rtx = start - i + 1;
return 0;
}
i = bit - firstbit + 1;
@@ -167,19 +208,11 @@ xfs_rtfind_back(
/*
* If done with this block, get the previous one.
*/
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
- if (error) {
+ error = xfs_rtbitmap_read_buf(args, --block);
+ if (error)
return error;
- }
- bufp = bp->b_addr;
- word = XFS_BLOCKWMASK(mp);
- b = &bufp[word];
- } else {
- /*
- * Go on to the previous word in the buffer.
- */
- b--;
+
+ word = mp->m_blockwsize - 1;
}
} else {
/*
@@ -195,13 +228,13 @@ xfs_rtfind_back(
/*
* Compute difference between actual and desired value.
*/
- if ((wdiff = *b ^ want)) {
+ incore = xfs_rtbitmap_getword(args, word);
+ if ((wdiff = incore ^ want)) {
/*
* Different, mark where we are and return.
*/
- xfs_trans_brelse(tp, bp);
i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
- *rtblock = start - i + 1;
+ *rtx = start - i + 1;
return 0;
}
i += XFS_NBWORD;
@@ -213,19 +246,11 @@ xfs_rtfind_back(
/*
* If done with this block, get the previous one.
*/
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, --block, 0, &bp);
- if (error) {
+ error = xfs_rtbitmap_read_buf(args, --block);
+ if (error)
return error;
- }
- bufp = bp->b_addr;
- word = XFS_BLOCKWMASK(mp);
- b = &bufp[word];
- } else {
- /*
- * Go on to the previous word in the buffer.
- */
- b--;
+
+ word = mp->m_blockwsize - 1;
}
}
/*
@@ -242,13 +267,13 @@ xfs_rtfind_back(
/*
* Compute difference between actual and desired value.
*/
- if ((wdiff = (*b ^ want) & mask)) {
+ incore = xfs_rtbitmap_getword(args, word);
+ if ((wdiff = (incore ^ want) & mask)) {
/*
* Different, mark where we are and return.
*/
- xfs_trans_brelse(tp, bp);
i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff);
- *rtblock = start - i + 1;
+ *rtx = start - i + 1;
return 0;
} else
i = len;
@@ -256,8 +281,7 @@ xfs_rtfind_back(
/*
* No match, return that we scanned the whole area.
*/
- xfs_trans_brelse(tp, bp);
- *rtblock = start - i + 1;
+ *rtx = start - i + 1;
return 0;
}
@@ -267,47 +291,44 @@ xfs_rtfind_back(
*/
int
xfs_rtfind_forw(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to look at */
- xfs_rtblock_t limit, /* last block to look at */
- xfs_rtblock_t *rtblock) /* out: start block found */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* starting rtext to look at */
+ xfs_rtxnum_t limit, /* last rtext to look at */
+ xfs_rtxnum_t *rtx) /* out: start rtext found */
{
- xfs_rtword_t *b; /* current word in buffer */
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- struct xfs_buf *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* starting word in buffer */
- int error; /* error value */
- xfs_rtblock_t i; /* current bit number rel. to start */
- xfs_rtblock_t lastbit; /* last useful bit in the word */
- xfs_rtblock_t len; /* length of inspected area */
- xfs_rtword_t mask; /* mask of relevant bits for value */
- xfs_rtword_t want; /* mask for "good" values */
- xfs_rtword_t wdiff; /* difference from wanted value */
- int word; /* word number in the buffer */
+ struct xfs_mount *mp = args->mp;
+ int bit; /* bit number in the word */
+ xfs_fileoff_t block; /* bitmap block number */
+ int error;
+ xfs_rtxnum_t i; /* current bit number rel. to start */
+ xfs_rtxnum_t lastbit;/* last useful bit in the word */
+ xfs_rtxnum_t len; /* length of inspected area */
+ xfs_rtword_t mask; /* mask of relevant bits for value */
+ xfs_rtword_t want; /* mask for "good" values */
+ xfs_rtword_t wdiff; /* difference from wanted value */
+ xfs_rtword_t incore;
+ unsigned int word; /* word number in the buffer */
/*
* Compute and read in starting bitmap block for starting block.
*/
- block = XFS_BITTOBLOCK(mp, start);
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- if (error) {
+ block = xfs_rtx_to_rbmblock(mp, start);
+ error = xfs_rtbitmap_read_buf(args, block);
+ if (error)
return error;
- }
- bufp = bp->b_addr;
+
/*
* Get the first word's index & point to it.
*/
- word = XFS_BITTOWORD(mp, start);
- b = &bufp[word];
+ word = xfs_rtx_to_rbmword(mp, start);
bit = (int)(start & (XFS_NBWORD - 1));
len = limit - start + 1;
/*
* Compute match value, based on the bit at start: if 1 (free)
* then all-ones, else all-zeroes.
*/
- want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
+ incore = xfs_rtbitmap_getword(args, word);
+ want = (incore & ((xfs_rtword_t)1 << bit)) ? -1 : 0;
/*
* If the starting position is not word-aligned, deal with the
* partial word.
@@ -323,13 +344,12 @@ xfs_rtfind_forw(
* Calculate the difference between the value there
* and what we're looking for.
*/
- if ((wdiff = (*b ^ want) & mask)) {
+ if ((wdiff = (incore ^ want) & mask)) {
/*
* Different. Mark where we are and return.
*/
- xfs_trans_brelse(tp, bp);
i = XFS_RTLOBIT(wdiff) - bit;
- *rtblock = start + i - 1;
+ *rtx = start + i - 1;
return 0;
}
i = lastbit - bit;
@@ -337,22 +357,15 @@ xfs_rtfind_forw(
* Go on to next block if that's where the next word is
* and we need the next word.
*/
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ if (++word == mp->m_blockwsize && i < len) {
/*
* If done with this block, get the previous one.
*/
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
+ error = xfs_rtbitmap_read_buf(args, ++block);
+ if (error)
return error;
- }
- b = bufp = bp->b_addr;
+
word = 0;
- } else {
- /*
- * Go on to the previous word in the buffer.
- */
- b++;
}
} else {
/*
@@ -368,13 +381,13 @@ xfs_rtfind_forw(
/*
* Compute difference between actual and desired value.
*/
- if ((wdiff = *b ^ want)) {
+ incore = xfs_rtbitmap_getword(args, word);
+ if ((wdiff = incore ^ want)) {
/*
* Different, mark where we are and return.
*/
- xfs_trans_brelse(tp, bp);
i += XFS_RTLOBIT(wdiff);
- *rtblock = start + i - 1;
+ *rtx = start + i - 1;
return 0;
}
i += XFS_NBWORD;
@@ -382,22 +395,15 @@ xfs_rtfind_forw(
* Go on to next block if that's where the next word is
* and we need the next word.
*/
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ if (++word == mp->m_blockwsize && i < len) {
/*
* If done with this block, get the next one.
*/
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
+ error = xfs_rtbitmap_read_buf(args, ++block);
+ if (error)
return error;
- }
- b = bufp = bp->b_addr;
+
word = 0;
- } else {
- /*
- * Go on to the next word in the buffer.
- */
- b++;
}
}
/*
@@ -412,13 +418,13 @@ xfs_rtfind_forw(
/*
* Compute difference between actual and desired value.
*/
- if ((wdiff = (*b ^ want) & mask)) {
+ incore = xfs_rtbitmap_getword(args, word);
+ if ((wdiff = (incore ^ want) & mask)) {
/*
* Different, mark where we are and return.
*/
- xfs_trans_brelse(tp, bp);
i += XFS_RTLOBIT(wdiff);
- *rtblock = start + i - 1;
+ *rtx = start + i - 1;
return 0;
} else
i = len;
@@ -426,11 +432,25 @@ xfs_rtfind_forw(
/*
* No match, return that we scanned the whole area.
*/
- xfs_trans_brelse(tp, bp);
- *rtblock = start + i - 1;
+ *rtx = start + i - 1;
return 0;
}
+/* Log rtsummary counter at @infoword. */
+static inline void
+xfs_trans_log_rtsummary(
+ struct xfs_rtalloc_args *args,
+ unsigned int infoword)
+{
+ struct xfs_buf *bp = args->sumbp;
+ size_t first, last;
+
+ first = (void *)xfs_rsumblock_infoptr(args, infoword) - bp->b_addr;
+ last = first + sizeof(xfs_suminfo_t) - 1;
+
+ xfs_trans_log_buf(args->tp, bp, first, last);
+}
+
/*
* Read and/or modify the summary information for a given extent size,
* bitmap block combination.
@@ -442,86 +462,77 @@ xfs_rtfind_forw(
*/
int
xfs_rtmodify_summary_int(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- int log, /* log2 of extent size */
- xfs_rtblock_t bbno, /* bitmap block number */
- int delta, /* change to make to summary info */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- xfs_suminfo_t *sum) /* out: summary info for this block */
+ struct xfs_rtalloc_args *args,
+ int log, /* log2 of extent size */
+ xfs_fileoff_t bbno, /* bitmap block number */
+ int delta, /* change to make to summary info */
+ xfs_suminfo_t *sum) /* out: summary info for this block */
{
- struct xfs_buf *bp; /* buffer for the summary block */
- int error; /* error value */
- xfs_fsblock_t sb; /* summary fsblock */
- int so; /* index into the summary file */
- xfs_suminfo_t *sp; /* pointer to returned data */
+ struct xfs_mount *mp = args->mp;
+ int error;
+ xfs_fileoff_t sb; /* summary fsblock */
+ xfs_rtsumoff_t so; /* index into the summary file */
+ unsigned int infoword;
/*
* Compute entry number in the summary file.
*/
- so = XFS_SUMOFFS(mp, log, bbno);
+ so = xfs_rtsumoffs(mp, log, bbno);
/*
* Compute the block number in the summary file.
*/
- sb = XFS_SUMOFFSTOBLOCK(mp, so);
- /*
- * If we have an old buffer, and the block number matches, use that.
- */
- if (*rbpp && *rsb == sb)
- bp = *rbpp;
- /*
- * Otherwise we have to get the buffer.
- */
- else {
- /*
- * If there was an old one, get rid of it first.
- */
- if (*rbpp)
- xfs_trans_brelse(tp, *rbpp);
- error = xfs_rtbuf_get(mp, tp, sb, 1, &bp);
- if (error) {
- return error;
- }
- /*
- * Remember this buffer and block for the next call.
- */
- *rbpp = bp;
- *rsb = sb;
- }
+ sb = xfs_rtsumoffs_to_block(mp, so);
+
+ error = xfs_rtsummary_read_buf(args, sb);
+ if (error)
+ return error;
+
/*
* Point to the summary information, modify/log it, and/or copy it out.
*/
- sp = XFS_SUMPTR(mp, bp, so);
+ infoword = xfs_rtsumoffs_to_infoword(mp, so);
if (delta) {
- uint first = (uint)((char *)sp - (char *)bp->b_addr);
+ xfs_suminfo_t val = xfs_suminfo_add(args, infoword, delta);
- *sp += delta;
if (mp->m_rsum_cache) {
- if (*sp == 0 && log == mp->m_rsum_cache[bbno])
- mp->m_rsum_cache[bbno]++;
- if (*sp != 0 && log < mp->m_rsum_cache[bbno])
+ if (val == 0 && log + 1 == mp->m_rsum_cache[bbno])
mp->m_rsum_cache[bbno] = log;
+ if (val != 0 && log >= mp->m_rsum_cache[bbno])
+ mp->m_rsum_cache[bbno] = log + 1;
}
- xfs_trans_log_buf(tp, bp, first, first + sizeof(*sp) - 1);
+ xfs_trans_log_rtsummary(args, infoword);
+ if (sum)
+ *sum = val;
+ } else if (sum) {
+ *sum = xfs_suminfo_get(args, infoword);
}
- if (sum)
- *sum = *sp;
return 0;
}
int
xfs_rtmodify_summary(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- int log, /* log2 of extent size */
- xfs_rtblock_t bbno, /* bitmap block number */
- int delta, /* change to make to summary info */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb) /* in/out: summary block number */
+ struct xfs_rtalloc_args *args,
+ int log, /* log2 of extent size */
+ xfs_fileoff_t bbno, /* bitmap block number */
+ int delta) /* in/out: summary block number */
+{
+ return xfs_rtmodify_summary_int(args, log, bbno, delta, NULL);
+}
+
+/* Log rtbitmap block from the word @from to the byte before @next. */
+static inline void
+xfs_trans_log_rtbitmap(
+ struct xfs_rtalloc_args *args,
+ unsigned int from,
+ unsigned int next)
{
- return xfs_rtmodify_summary_int(mp, tp, log, bbno,
- delta, rbpp, rsb, NULL);
+ struct xfs_buf *bp = args->rbmbp;
+ size_t first, last;
+
+ first = (void *)xfs_rbmblock_wordptr(args, from) - bp->b_addr;
+ last = ((void *)xfs_rbmblock_wordptr(args, next) - 1) - bp->b_addr;
+
+ xfs_trans_log_buf(args->tp, bp, first, last);
}
/*
@@ -530,41 +541,37 @@ xfs_rtmodify_summary(
*/
int
xfs_rtmodify_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to modify */
- xfs_extlen_t len, /* length of extent to modify */
- int val) /* 1 for free, 0 for allocated */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* starting rtext to modify */
+ xfs_rtxlen_t len, /* length of extent to modify */
+ int val) /* 1 for free, 0 for allocated */
{
- xfs_rtword_t *b; /* current word in buffer */
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- struct xfs_buf *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* starting word in buffer */
- int error; /* error value */
- xfs_rtword_t *first; /* first used word in the buffer */
- int i; /* current bit number rel. to start */
- int lastbit; /* last useful bit in word */
- xfs_rtword_t mask; /* mask o frelevant bits for value */
- int word; /* word number in the buffer */
+ struct xfs_mount *mp = args->mp;
+ int bit; /* bit number in the word */
+ xfs_fileoff_t block; /* bitmap block number */
+ int error;
+ int i; /* current bit number rel. to start */
+ int lastbit; /* last useful bit in word */
+ xfs_rtword_t mask; /* mask of relevant bits for value */
+ xfs_rtword_t incore;
+ unsigned int firstword; /* first word used in the buffer */
+ unsigned int word; /* word number in the buffer */
/*
* Compute starting bitmap block number.
*/
- block = XFS_BITTOBLOCK(mp, start);
+ block = xfs_rtx_to_rbmblock(mp, start);
/*
* Read the bitmap block, and point to its data.
*/
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- if (error) {
+ error = xfs_rtbitmap_read_buf(args, block);
+ if (error)
return error;
- }
- bufp = bp->b_addr;
+
/*
* Compute the starting word's address, and starting bit.
*/
- word = XFS_BITTOWORD(mp, start);
- first = b = &bufp[word];
+ firstword = word = xfs_rtx_to_rbmword(mp, start);
bit = (int)(start & (XFS_NBWORD - 1));
/*
* 0 (allocated) => all zeroes; 1 (free) => all ones.
@@ -583,34 +590,28 @@ xfs_rtmodify_range(
/*
* Set/clear the active bits.
*/
+ incore = xfs_rtbitmap_getword(args, word);
if (val)
- *b |= mask;
+ incore |= mask;
else
- *b &= ~mask;
+ incore &= ~mask;
+ xfs_rtbitmap_setword(args, word, incore);
i = lastbit - bit;
/*
* Go on to the next block if that's where the next word is
* and we need the next word.
*/
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ if (++word == mp->m_blockwsize && i < len) {
/*
* Log the changed part of this block.
* Get the next one.
*/
- xfs_trans_log_buf(tp, bp,
- (uint)((char *)first - (char *)bufp),
- (uint)((char *)b - (char *)bufp));
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
+ xfs_trans_log_rtbitmap(args, firstword, word);
+ error = xfs_rtbitmap_read_buf(args, ++block);
+ if (error)
return error;
- }
- first = b = bufp = bp->b_addr;
- word = 0;
- } else {
- /*
- * Go on to the next word in the buffer
- */
- b++;
+
+ firstword = word = 0;
}
} else {
/*
@@ -626,31 +627,23 @@ xfs_rtmodify_range(
/*
* Set the word value correctly.
*/
- *b = val;
+ xfs_rtbitmap_setword(args, word, val);
i += XFS_NBWORD;
/*
* Go on to the next block if that's where the next word is
* and we need the next word.
*/
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ if (++word == mp->m_blockwsize && i < len) {
/*
* Log the changed part of this block.
* Get the next one.
*/
- xfs_trans_log_buf(tp, bp,
- (uint)((char *)first - (char *)bufp),
- (uint)((char *)b - (char *)bufp));
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
+ xfs_trans_log_rtbitmap(args, firstword, word);
+ error = xfs_rtbitmap_read_buf(args, ++block);
+ if (error)
return error;
- }
- first = b = bufp = bp->b_addr;
- word = 0;
- } else {
- /*
- * Go on to the next word in the buffer
- */
- b++;
+
+ firstword = word = 0;
}
}
/*
@@ -665,18 +658,19 @@ xfs_rtmodify_range(
/*
* Set/clear the active bits.
*/
+ incore = xfs_rtbitmap_getword(args, word);
if (val)
- *b |= mask;
+ incore |= mask;
else
- *b &= ~mask;
- b++;
+ incore &= ~mask;
+ xfs_rtbitmap_setword(args, word, incore);
+ word++;
}
/*
* Log any remaining changed bytes.
*/
- if (b > first)
- xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp),
- (uint)((char *)b - (char *)bufp - 1));
+ if (word > firstword)
+ xfs_trans_log_rtbitmap(args, firstword, word);
return 0;
}
@@ -686,23 +680,21 @@ xfs_rtmodify_range(
*/
int
xfs_rtfree_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block to free */
- xfs_extlen_t len, /* length to free */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb) /* in/out: summary block number */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* starting rtext to free */
+ xfs_rtxlen_t len) /* in/out: summary block number */
{
- xfs_rtblock_t end; /* end of the freed extent */
- int error; /* error value */
- xfs_rtblock_t postblock; /* first block freed > end */
- xfs_rtblock_t preblock; /* first block freed < start */
+ struct xfs_mount *mp = args->mp;
+ xfs_rtxnum_t end; /* end of the freed extent */
+ int error; /* error value */
+ xfs_rtxnum_t postblock; /* first rtext freed > end */
+ xfs_rtxnum_t preblock; /* first rtext freed < start */
end = start + len - 1;
/*
* Modify the bitmap to mark this extent freed.
*/
- error = xfs_rtmodify_range(mp, tp, start, len, 1);
+ error = xfs_rtmodify_range(args, start, len, 1);
if (error) {
return error;
}
@@ -711,15 +703,15 @@ xfs_rtfree_range(
* We need to find the beginning and end of the extent so we can
* properly update the summary.
*/
- error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+ error = xfs_rtfind_back(args, start, 0, &preblock);
if (error) {
return error;
}
/*
* Find the next allocated block (end of allocated extent).
*/
- error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
- &postblock);
+ error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1,
+ &postblock);
if (error)
return error;
/*
@@ -727,9 +719,9 @@ xfs_rtfree_range(
* old extent, add summary data for them to be allocated.
*/
if (preblock < start) {
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(start - preblock),
- XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+ error = xfs_rtmodify_summary(args,
+ XFS_RTBLOCKLOG(start - preblock),
+ xfs_rtx_to_rbmblock(mp, preblock), -1);
if (error) {
return error;
}
@@ -739,9 +731,9 @@ xfs_rtfree_range(
* old extent, add summary data for them to be allocated.
*/
if (postblock > end) {
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(postblock - end),
- XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb);
+ error = xfs_rtmodify_summary(args,
+ XFS_RTBLOCKLOG(postblock - end),
+ xfs_rtx_to_rbmblock(mp, end + 1), -1);
if (error) {
return error;
}
@@ -750,10 +742,9 @@ xfs_rtfree_range(
* Increment the summary information corresponding to the entire
* (new) free extent.
*/
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(postblock + 1 - preblock),
- XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
- return error;
+ return xfs_rtmodify_summary(args,
+ XFS_RTBLOCKLOG(postblock + 1 - preblock),
+ xfs_rtx_to_rbmblock(mp, preblock), 1);
}
/*
@@ -762,43 +753,39 @@ xfs_rtfree_range(
*/
int
xfs_rtcheck_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* starting block number of extent */
- xfs_extlen_t len, /* length of extent */
- int val, /* 1 for free, 0 for allocated */
- xfs_rtblock_t *new, /* out: first block not matching */
- int *stat) /* out: 1 for matches, 0 for not */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* starting rtext number of extent */
+ xfs_rtxlen_t len, /* length of extent */
+ int val, /* 1 for free, 0 for allocated */
+ xfs_rtxnum_t *new, /* out: first rtext not matching */
+ int *stat) /* out: 1 for matches, 0 for not */
{
- xfs_rtword_t *b; /* current word in buffer */
- int bit; /* bit number in the word */
- xfs_rtblock_t block; /* bitmap block number */
- struct xfs_buf *bp; /* buf for the block */
- xfs_rtword_t *bufp; /* starting word in buffer */
- int error; /* error value */
- xfs_rtblock_t i; /* current bit number rel. to start */
- xfs_rtblock_t lastbit; /* last useful bit in word */
- xfs_rtword_t mask; /* mask of relevant bits for value */
- xfs_rtword_t wdiff; /* difference from wanted value */
- int word; /* word number in the buffer */
+ struct xfs_mount *mp = args->mp;
+ int bit; /* bit number in the word */
+ xfs_fileoff_t block; /* bitmap block number */
+ int error;
+ xfs_rtxnum_t i; /* current bit number rel. to start */
+ xfs_rtxnum_t lastbit; /* last useful bit in word */
+ xfs_rtword_t mask; /* mask of relevant bits for value */
+ xfs_rtword_t wdiff; /* difference from wanted value */
+ xfs_rtword_t incore;
+ unsigned int word; /* word number in the buffer */
/*
* Compute starting bitmap block number
*/
- block = XFS_BITTOBLOCK(mp, start);
+ block = xfs_rtx_to_rbmblock(mp, start);
/*
* Read the bitmap block.
*/
- error = xfs_rtbuf_get(mp, tp, block, 0, &bp);
- if (error) {
+ error = xfs_rtbitmap_read_buf(args, block);
+ if (error)
return error;
- }
- bufp = bp->b_addr;
+
/*
* Compute the starting word's address, and starting bit.
*/
- word = XFS_BITTOWORD(mp, start);
- b = &bufp[word];
+ word = xfs_rtx_to_rbmword(mp, start);
bit = (int)(start & (XFS_NBWORD - 1));
/*
* 0 (allocated) => all zero's; 1 (free) => all one's.
@@ -820,11 +807,11 @@ xfs_rtcheck_range(
/*
* Compute difference between actual and desired value.
*/
- if ((wdiff = (*b ^ val) & mask)) {
+ incore = xfs_rtbitmap_getword(args, word);
+ if ((wdiff = (incore ^ val) & mask)) {
/*
* Different, compute first wrong bit and return.
*/
- xfs_trans_brelse(tp, bp);
i = XFS_RTLOBIT(wdiff) - bit;
*new = start + i;
*stat = 0;
@@ -835,22 +822,15 @@ xfs_rtcheck_range(
* Go on to next block if that's where the next word is
* and we need the next word.
*/
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ if (++word == mp->m_blockwsize && i < len) {
/*
* If done with this block, get the next one.
*/
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
+ error = xfs_rtbitmap_read_buf(args, ++block);
+ if (error)
return error;
- }
- b = bufp = bp->b_addr;
+
word = 0;
- } else {
- /*
- * Go on to the next word in the buffer.
- */
- b++;
}
} else {
/*
@@ -866,11 +846,11 @@ xfs_rtcheck_range(
/*
* Compute difference between actual and desired value.
*/
- if ((wdiff = *b ^ val)) {
+ incore = xfs_rtbitmap_getword(args, word);
+ if ((wdiff = incore ^ val)) {
/*
* Different, compute first wrong bit and return.
*/
- xfs_trans_brelse(tp, bp);
i += XFS_RTLOBIT(wdiff);
*new = start + i;
*stat = 0;
@@ -881,22 +861,15 @@ xfs_rtcheck_range(
* Go on to next block if that's where the next word is
* and we need the next word.
*/
- if (++word == XFS_BLOCKWSIZE(mp) && i < len) {
+ if (++word == mp->m_blockwsize && i < len) {
/*
* If done with this block, get the next one.
*/
- xfs_trans_brelse(tp, bp);
- error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp);
- if (error) {
+ error = xfs_rtbitmap_read_buf(args, ++block);
+ if (error)
return error;
- }
- b = bufp = bp->b_addr;
+
word = 0;
- } else {
- /*
- * Go on to the next word in the buffer.
- */
- b++;
}
}
/*
@@ -911,11 +884,11 @@ xfs_rtcheck_range(
/*
* Compute difference between actual and desired value.
*/
- if ((wdiff = (*b ^ val) & mask)) {
+ incore = xfs_rtbitmap_getword(args, word);
+ if ((wdiff = (incore ^ val) & mask)) {
/*
* Different, compute first wrong bit and return.
*/
- xfs_trans_brelse(tp, bp);
i += XFS_RTLOBIT(wdiff);
*new = start + i;
*stat = 0;
@@ -926,7 +899,6 @@ xfs_rtcheck_range(
/*
* Successful, return.
*/
- xfs_trans_brelse(tp, bp);
*new = start + i;
*stat = 1;
return 0;
@@ -936,58 +908,57 @@ xfs_rtcheck_range(
/*
* Check that the given extent (block range) is allocated already.
*/
-STATIC int /* error */
+STATIC int
xfs_rtcheck_alloc_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number of extent */
- xfs_extlen_t len) /* length of extent */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* starting rtext number of extent */
+ xfs_rtxlen_t len) /* length of extent */
{
- xfs_rtblock_t new; /* dummy for xfs_rtcheck_range */
- int stat;
- int error;
+ xfs_rtxnum_t new; /* dummy for xfs_rtcheck_range */
+ int stat;
+ int error;
- error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat);
+ error = xfs_rtcheck_range(args, start, len, 0, &new, &stat);
if (error)
return error;
ASSERT(stat);
return 0;
}
#else
-#define xfs_rtcheck_alloc_range(m,t,b,l) (0)
+#define xfs_rtcheck_alloc_range(a,b,l) (0)
#endif
/*
* Free an extent in the realtime subvolume. Length is expressed in
* realtime extents, as is the block number.
*/
-int /* error */
+int
xfs_rtfree_extent(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number to free */
- xfs_extlen_t len) /* length of extent freed */
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_rtxnum_t start, /* starting rtext number to free */
+ xfs_rtxlen_t len) /* length of extent freed */
{
- int error; /* error value */
- xfs_mount_t *mp; /* file system mount structure */
- xfs_fsblock_t sb; /* summary file block number */
- struct xfs_buf *sumbp = NULL; /* summary file block buffer */
- struct timespec64 atime;
-
- mp = tp->t_mountp;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_rtalloc_args args = {
+ .mp = mp,
+ .tp = tp,
+ };
+ int error;
+ struct timespec64 atime;
ASSERT(mp->m_rbmip->i_itemp != NULL);
ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
- error = xfs_rtcheck_alloc_range(mp, tp, bno, len);
+ error = xfs_rtcheck_alloc_range(&args, start, len);
if (error)
return error;
/*
* Free the range of realtime blocks.
*/
- error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb);
- if (error) {
- return error;
- }
+ error = xfs_rtfree_range(&args, start, len);
+ if (error)
+ goto out;
+
/*
* Mark more blocks free in the superblock.
*/
@@ -1002,11 +973,47 @@ xfs_rtfree_extent(
mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
atime = inode_get_atime(VFS_I(mp->m_rbmip));
- *((uint64_t *)&atime) = 0;
+ atime.tv_sec = 0;
inode_set_atime_to_ts(VFS_I(mp->m_rbmip), atime);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
}
- return 0;
+ error = 0;
+out:
+ xfs_rtbuf_cache_relse(&args);
+ return error;
+}
+
+/*
+ * Free some blocks in the realtime subvolume. rtbno and rtlen are in units of
+ * rt blocks, not rt extents; must be aligned to the rt extent size; and rtlen
+ * cannot exceed XFS_MAX_BMBT_EXTLEN.
+ */
+int
+xfs_rtfree_blocks(
+ struct xfs_trans *tp,
+ xfs_fsblock_t rtbno,
+ xfs_filblks_t rtlen)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_rtxnum_t start;
+ xfs_filblks_t len;
+ xfs_extlen_t mod;
+
+ ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN);
+
+ len = xfs_rtb_to_rtxrem(mp, rtlen, &mod);
+ if (mod) {
+ ASSERT(mod == 0);
+ return -EIO;
+ }
+
+ start = xfs_rtb_to_rtxrem(mp, rtbno, &mod);
+ if (mod) {
+ ASSERT(mod == 0);
+ return -EIO;
+ }
+
+ return xfs_rtfree_extent(tp, start, len);
}
/* Find all the free records within a given range. */
@@ -1019,10 +1026,14 @@ xfs_rtalloc_query_range(
xfs_rtalloc_query_range_fn fn,
void *priv)
{
+ struct xfs_rtalloc_args args = {
+ .mp = mp,
+ .tp = tp,
+ };
struct xfs_rtalloc_rec rec;
- xfs_rtblock_t rtstart;
- xfs_rtblock_t rtend;
- xfs_rtblock_t high_key;
+ xfs_rtxnum_t rtstart;
+ xfs_rtxnum_t rtend;
+ xfs_rtxnum_t high_key;
int is_free;
int error = 0;
@@ -1038,13 +1049,13 @@ xfs_rtalloc_query_range(
rtstart = low_rec->ar_startext;
while (rtstart <= high_key) {
/* Is the first block free? */
- error = xfs_rtcheck_range(mp, tp, rtstart, 1, 1, &rtend,
+ error = xfs_rtcheck_range(&args, rtstart, 1, 1, &rtend,
&is_free);
if (error)
break;
/* How long does the extent go for? */
- error = xfs_rtfind_forw(mp, tp, rtstart, high_key, &rtend);
+ error = xfs_rtfind_forw(&args, rtstart, high_key, &rtend);
if (error)
break;
@@ -1060,6 +1071,7 @@ xfs_rtalloc_query_range(
rtstart = rtend + 1;
}
+ xfs_rtbuf_cache_relse(&args);
return error;
}
@@ -1085,18 +1097,79 @@ int
xfs_rtalloc_extent_is_free(
struct xfs_mount *mp,
struct xfs_trans *tp,
- xfs_rtblock_t start,
- xfs_extlen_t len,
+ xfs_rtxnum_t start,
+ xfs_rtxlen_t len,
bool *is_free)
{
- xfs_rtblock_t end;
+ struct xfs_rtalloc_args args = {
+ .mp = mp,
+ .tp = tp,
+ };
+ xfs_rtxnum_t end;
int matches;
int error;
- error = xfs_rtcheck_range(mp, tp, start, len, 1, &end, &matches);
+ error = xfs_rtcheck_range(&args, start, len, 1, &end, &matches);
+ xfs_rtbuf_cache_relse(&args);
if (error)
return error;
*is_free = matches;
return 0;
}
+
+/*
+ * Compute the number of rtbitmap blocks needed to track the given number of rt
+ * extents.
+ */
+xfs_filblks_t
+xfs_rtbitmap_blockcount(
+ struct xfs_mount *mp,
+ xfs_rtbxlen_t rtextents)
+{
+ return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize);
+}
+
+/*
+ * Compute the number of rtbitmap words needed to populate every block of a
+ * bitmap that is large enough to track the given number of rt extents.
+ */
+unsigned long long
+xfs_rtbitmap_wordcount(
+ struct xfs_mount *mp,
+ xfs_rtbxlen_t rtextents)
+{
+ xfs_filblks_t blocks;
+
+ blocks = xfs_rtbitmap_blockcount(mp, rtextents);
+ return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG;
+}
+
+/* Compute the number of rtsummary blocks needed to track the given rt space. */
+xfs_filblks_t
+xfs_rtsummary_blockcount(
+ struct xfs_mount *mp,
+ unsigned int rsumlevels,
+ xfs_extlen_t rbmblocks)
+{
+ unsigned long long rsumwords;
+
+ rsumwords = (unsigned long long)rsumlevels * rbmblocks;
+ return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG);
+}
+
+/*
+ * Compute the number of rtsummary info words needed to populate every block of
+ * a summary file that is large enough to track the given rt space.
+ */
+unsigned long long
+xfs_rtsummary_wordcount(
+ struct xfs_mount *mp,
+ unsigned int rsumlevels,
+ xfs_extlen_t rbmblocks)
+{
+ xfs_filblks_t blocks;
+
+ blocks = xfs_rtsummary_blockcount(mp, rsumlevels, rbmblocks);
+ return XFS_FSB_TO_B(mp, blocks) >> XFS_WORDLOG;
+}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
new file mode 100644
index 000000000000..c0637057d69c
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
+ * All Rights Reserved.
+ */
+#ifndef __XFS_RTBITMAP_H__
+#define __XFS_RTBITMAP_H__
+
+struct xfs_rtalloc_args {
+ struct xfs_mount *mp;
+ struct xfs_trans *tp;
+
+ struct xfs_buf *rbmbp; /* bitmap block buffer */
+ struct xfs_buf *sumbp; /* summary block buffer */
+
+ xfs_fileoff_t rbmoff; /* bitmap block number */
+ xfs_fileoff_t sumoff; /* summary block number */
+};
+
+static inline xfs_rtblock_t
+xfs_rtx_to_rtb(
+ struct xfs_mount *mp,
+ xfs_rtxnum_t rtx)
+{
+ if (mp->m_rtxblklog >= 0)
+ return rtx << mp->m_rtxblklog;
+
+ return rtx * mp->m_sb.sb_rextsize;
+}
+
+static inline xfs_extlen_t
+xfs_rtxlen_to_extlen(
+ struct xfs_mount *mp,
+ xfs_rtxlen_t rtxlen)
+{
+ if (mp->m_rtxblklog >= 0)
+ return rtxlen << mp->m_rtxblklog;
+
+ return rtxlen * mp->m_sb.sb_rextsize;
+}
+
+/* Compute the misalignment between an extent length and a realtime extent .*/
+static inline unsigned int
+xfs_extlen_to_rtxmod(
+ struct xfs_mount *mp,
+ xfs_extlen_t len)
+{
+ if (mp->m_rtxblklog >= 0)
+ return len & mp->m_rtxblkmask;
+
+ return len % mp->m_sb.sb_rextsize;
+}
+
+static inline xfs_rtxlen_t
+xfs_extlen_to_rtxlen(
+ struct xfs_mount *mp,
+ xfs_extlen_t len)
+{
+ if (mp->m_rtxblklog >= 0)
+ return len >> mp->m_rtxblklog;
+
+ return len / mp->m_sb.sb_rextsize;
+}
+
+/* Convert an rt block number into an rt extent number. */
+static inline xfs_rtxnum_t
+xfs_rtb_to_rtx(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ if (likely(mp->m_rtxblklog >= 0))
+ return rtbno >> mp->m_rtxblklog;
+
+ return div_u64(rtbno, mp->m_sb.sb_rextsize);
+}
+
+/* Return the offset of an rt block number within an rt extent. */
+static inline xfs_extlen_t
+xfs_rtb_to_rtxoff(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ if (likely(mp->m_rtxblklog >= 0))
+ return rtbno & mp->m_rtxblkmask;
+
+ return do_div(rtbno, mp->m_sb.sb_rextsize);
+}
+
+/*
+ * Crack an rt block number into an rt extent number and an offset within that
+ * rt extent. Returns the rt extent number directly and the offset in @off.
+ */
+static inline xfs_rtxnum_t
+xfs_rtb_to_rtxrem(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno,
+ xfs_extlen_t *off)
+{
+ if (likely(mp->m_rtxblklog >= 0)) {
+ *off = rtbno & mp->m_rtxblkmask;
+ return rtbno >> mp->m_rtxblklog;
+ }
+
+ return div_u64_rem(rtbno, mp->m_sb.sb_rextsize, off);
+}
+
+/*
+ * Convert an rt block number into an rt extent number, rounding up to the next
+ * rt extent if the rt block is not aligned to an rt extent boundary.
+ */
+static inline xfs_rtxnum_t
+xfs_rtb_to_rtxup(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ if (likely(mp->m_rtxblklog >= 0)) {
+ if (rtbno & mp->m_rtxblkmask)
+ return (rtbno >> mp->m_rtxblklog) + 1;
+ return rtbno >> mp->m_rtxblklog;
+ }
+
+ if (do_div(rtbno, mp->m_sb.sb_rextsize))
+ rtbno++;
+ return rtbno;
+}
+
+/* Round this rtblock up to the nearest rt extent size. */
+static inline xfs_rtblock_t
+xfs_rtb_roundup_rtx(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ return roundup_64(rtbno, mp->m_sb.sb_rextsize);
+}
+
+/* Round this rtblock down to the nearest rt extent size. */
+static inline xfs_rtblock_t
+xfs_rtb_rounddown_rtx(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ return rounddown_64(rtbno, mp->m_sb.sb_rextsize);
+}
+
+/* Convert an rt extent number to a file block offset in the rt bitmap file. */
+static inline xfs_fileoff_t
+xfs_rtx_to_rbmblock(
+ struct xfs_mount *mp,
+ xfs_rtxnum_t rtx)
+{
+ return rtx >> mp->m_blkbit_log;
+}
+
+/* Convert an rt extent number to a word offset within an rt bitmap block. */
+static inline unsigned int
+xfs_rtx_to_rbmword(
+ struct xfs_mount *mp,
+ xfs_rtxnum_t rtx)
+{
+ return (rtx >> XFS_NBWORDLOG) & (mp->m_blockwsize - 1);
+}
+
+/* Convert a file block offset in the rt bitmap file to an rt extent number. */
+static inline xfs_rtxnum_t
+xfs_rbmblock_to_rtx(
+ struct xfs_mount *mp,
+ xfs_fileoff_t rbmoff)
+{
+ return rbmoff << mp->m_blkbit_log;
+}
+
+/* Return a pointer to a bitmap word within a rt bitmap block. */
+static inline union xfs_rtword_raw *
+xfs_rbmblock_wordptr(
+ struct xfs_rtalloc_args *args,
+ unsigned int index)
+{
+ union xfs_rtword_raw *words = args->rbmbp->b_addr;
+
+ return words + index;
+}
+
+/* Convert an ondisk bitmap word to its incore representation. */
+static inline xfs_rtword_t
+xfs_rtbitmap_getword(
+ struct xfs_rtalloc_args *args,
+ unsigned int index)
+{
+ union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
+
+ return word->old;
+}
+
+/* Set an ondisk bitmap word from an incore representation. */
+static inline void
+xfs_rtbitmap_setword(
+ struct xfs_rtalloc_args *args,
+ unsigned int index,
+ xfs_rtword_t value)
+{
+ union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
+
+ word->old = value;
+}
+
+/*
+ * Convert a rt extent length and rt bitmap block number to a xfs_suminfo_t
+ * offset within the rt summary file.
+ */
+static inline xfs_rtsumoff_t
+xfs_rtsumoffs(
+ struct xfs_mount *mp,
+ int log2_len,
+ xfs_fileoff_t rbmoff)
+{
+ return log2_len * mp->m_sb.sb_rbmblocks + rbmoff;
+}
+
+/*
+ * Convert an xfs_suminfo_t offset to a file block offset within the rt summary
+ * file.
+ */
+static inline xfs_fileoff_t
+xfs_rtsumoffs_to_block(
+ struct xfs_mount *mp,
+ xfs_rtsumoff_t rsumoff)
+{
+ return XFS_B_TO_FSBT(mp, rsumoff * sizeof(xfs_suminfo_t));
+}
+
+/*
+ * Convert an xfs_suminfo_t offset to an info word offset within an rt summary
+ * block.
+ */
+static inline unsigned int
+xfs_rtsumoffs_to_infoword(
+ struct xfs_mount *mp,
+ xfs_rtsumoff_t rsumoff)
+{
+ unsigned int mask = mp->m_blockmask >> XFS_SUMINFOLOG;
+
+ return rsumoff & mask;
+}
+
+/* Return a pointer to a summary info word within a rt summary block. */
+static inline union xfs_suminfo_raw *
+xfs_rsumblock_infoptr(
+ struct xfs_rtalloc_args *args,
+ unsigned int index)
+{
+ union xfs_suminfo_raw *info = args->sumbp->b_addr;
+
+ return info + index;
+}
+
+/* Get the current value of a summary counter. */
+static inline xfs_suminfo_t
+xfs_suminfo_get(
+ struct xfs_rtalloc_args *args,
+ unsigned int index)
+{
+ union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
+
+ return info->old;
+}
+
+/* Add to the current value of a summary counter and return the new value. */
+static inline xfs_suminfo_t
+xfs_suminfo_add(
+ struct xfs_rtalloc_args *args,
+ unsigned int index,
+ int delta)
+{
+ union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
+
+ info->old += delta;
+ return info->old;
+}
+
+/*
+ * Functions for walking free space rtextents in the realtime bitmap.
+ */
+struct xfs_rtalloc_rec {
+ xfs_rtxnum_t ar_startext;
+ xfs_rtbxlen_t ar_extcount;
+};
+
+typedef int (*xfs_rtalloc_query_range_fn)(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ const struct xfs_rtalloc_rec *rec,
+ void *priv);
+
+#ifdef CONFIG_XFS_RT
+void xfs_rtbuf_cache_relse(struct xfs_rtalloc_args *args);
+
+int xfs_rtbuf_get(struct xfs_rtalloc_args *args, xfs_fileoff_t block,
+ int issum);
+
+static inline int
+xfs_rtbitmap_read_buf(
+ struct xfs_rtalloc_args *args,
+ xfs_fileoff_t block)
+{
+ return xfs_rtbuf_get(args, block, 0);
+}
+
+static inline int
+xfs_rtsummary_read_buf(
+ struct xfs_rtalloc_args *args,
+ xfs_fileoff_t block)
+{
+ return xfs_rtbuf_get(args, block, 1);
+}
+
+int xfs_rtcheck_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+ xfs_rtxlen_t len, int val, xfs_rtxnum_t *new, int *stat);
+int xfs_rtfind_back(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+ xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
+int xfs_rtfind_forw(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+ xfs_rtxnum_t limit, xfs_rtxnum_t *rtblock);
+int xfs_rtmodify_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+ xfs_rtxlen_t len, int val);
+int xfs_rtmodify_summary_int(struct xfs_rtalloc_args *args, int log,
+ xfs_fileoff_t bbno, int delta, xfs_suminfo_t *sum);
+int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log,
+ xfs_fileoff_t bbno, int delta);
+int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
+ xfs_rtxlen_t len);
+int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
+ const struct xfs_rtalloc_rec *low_rec,
+ const struct xfs_rtalloc_rec *high_rec,
+ xfs_rtalloc_query_range_fn fn, void *priv);
+int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtalloc_query_range_fn fn,
+ void *priv);
+int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
+ xfs_rtxnum_t start, xfs_rtxlen_t len,
+ bool *is_free);
+/*
+ * Free an extent in the realtime subvolume. Length is expressed in
+ * realtime extents, as is the block number.
+ */
+int /* error */
+xfs_rtfree_extent(
+ struct xfs_trans *tp, /* transaction pointer */
+ xfs_rtxnum_t start, /* starting rtext number to free */
+ xfs_rtxlen_t len); /* length of extent freed */
+
+/* Same as above, but in units of rt blocks. */
+int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
+ xfs_filblks_t rtlen);
+
+xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t
+ rtextents);
+unsigned long long xfs_rtbitmap_wordcount(struct xfs_mount *mp,
+ xfs_rtbxlen_t rtextents);
+
+xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp,
+ unsigned int rsumlevels, xfs_extlen_t rbmblocks);
+unsigned long long xfs_rtsummary_wordcount(struct xfs_mount *mp,
+ unsigned int rsumlevels, xfs_extlen_t rbmblocks);
+#else /* CONFIG_XFS_RT */
+# define xfs_rtfree_extent(t,b,l) (-ENOSYS)
+# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
+# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS)
+# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
+# define xfs_rtbitmap_read_buf(a,b) (-ENOSYS)
+# define xfs_rtsummary_read_buf(a,b) (-ENOSYS)
+# define xfs_rtbuf_cache_relse(a) (0)
+# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
+static inline xfs_filblks_t
+xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
+{
+ /* shut up gcc */
+ return 0;
+}
+# define xfs_rtbitmap_wordcount(mp, r) (0)
+# define xfs_rtsummary_blockcount(mp, l, b) (0)
+# define xfs_rtsummary_wordcount(mp, l, b) (0)
+#endif /* CONFIG_XFS_RT */
+
+#endif /* __XFS_RTBITMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 6264daaab37b..1f74d0cd1618 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -975,6 +975,8 @@ xfs_sb_mount_common(
mp->m_blockmask = sbp->sb_blocksize - 1;
mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
mp->m_blockwmask = mp->m_blockwsize - 1;
+ mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize);
+ mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize);
mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 1);
mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, 0);
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index a5e14740ec9a..19134b23c10b 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -25,7 +25,7 @@ extern uint64_t xfs_sb_version_to_features(struct xfs_sb *sbp);
extern int xfs_update_secondary_sbs(struct xfs_mount *mp);
-#define XFS_FS_GEOM_MAX_STRUCT_VER (4)
+#define XFS_FS_GEOM_MAX_STRUCT_VER (5)
extern void xfs_fs_geometry(struct xfs_mount *mp, struct xfs_fsop_geom *geo,
int struct_version);
extern int xfs_sb_read_secondary(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 5b2f27cbdb80..6cd45e8c118d 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -19,6 +19,7 @@
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_trans_space.h"
+#include "xfs_rtbitmap.h"
#define _ALLOC true
#define _FREE false
@@ -217,11 +218,12 @@ xfs_rtalloc_block_count(
struct xfs_mount *mp,
unsigned int num_ops)
{
- unsigned int blksz = XFS_FSB_TO_B(mp, 1);
- unsigned int rtbmp_bytes;
+ unsigned int rtbmp_blocks;
+ xfs_rtxlen_t rtxlen;
- rtbmp_bytes = (XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize) / NBBY;
- return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
+ rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
+ rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen);
+ return (rtbmp_blocks + 1) * num_ops;
}
/*
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index 5c2765934732..c299b16c9365 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -148,10 +148,10 @@ xfs_verify_rtbno(
/* Verify that a realtime device extent is fully contained inside the volume. */
bool
-xfs_verify_rtext(
+xfs_verify_rtbext(
struct xfs_mount *mp,
xfs_rtblock_t rtbno,
- xfs_rtblock_t len)
+ xfs_filblks_t len)
{
if (rtbno + len <= rtbno)
return false;
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 851220021484..533200c4ccc2 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -11,6 +11,7 @@ typedef uint32_t prid_t; /* project ID */
typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */
typedef uint32_t xfs_agino_t; /* inode # within allocation grp */
typedef uint32_t xfs_extlen_t; /* extent length in blocks */
+typedef uint32_t xfs_rtxlen_t; /* file extent length in rtextents */
typedef uint32_t xfs_agnumber_t; /* allocation group number */
typedef uint64_t xfs_extnum_t; /* # of extents in a file */
typedef uint32_t xfs_aextnum_t; /* # extents in an attribute fork */
@@ -18,6 +19,7 @@ typedef int64_t xfs_fsize_t; /* bytes in a file */
typedef uint64_t xfs_ufsize_t; /* unsigned bytes in a file */
typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */
+typedef uint32_t xfs_rtsumoff_t; /* offset of an rtsummary info word */
typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */
typedef int64_t xfs_lsn_t; /* log sequence number */
@@ -31,6 +33,8 @@ typedef uint64_t xfs_rfsblock_t; /* blockno in filesystem (raw) */
typedef uint64_t xfs_rtblock_t; /* extent (block) in realtime area */
typedef uint64_t xfs_fileoff_t; /* block number in a file */
typedef uint64_t xfs_filblks_t; /* number of blocks in a file */
+typedef uint64_t xfs_rtxnum_t; /* rtextent number */
+typedef uint64_t xfs_rtbxlen_t; /* rtbitmap extent length in rtextents */
typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
@@ -47,6 +51,7 @@ typedef void * xfs_failaddr_t;
#define NULLRFSBLOCK ((xfs_rfsblock_t)-1)
#define NULLRTBLOCK ((xfs_rtblock_t)-1)
#define NULLFILEOFF ((xfs_fileoff_t)-1)
+#define NULLRTEXTNO ((xfs_rtxnum_t)-1)
#define NULLAGBLOCK ((xfs_agblock_t)-1)
#define NULLAGNUMBER ((xfs_agnumber_t)-1)
@@ -145,6 +150,7 @@ typedef uint32_t xfs_dqid_t;
*/
#define XFS_NBBYLOG 3 /* log2(NBBY) */
#define XFS_WORDLOG 2 /* log2(sizeof(xfs_rtword_t)) */
+#define XFS_SUMINFOLOG 2 /* log2(sizeof(xfs_suminfo_t)) */
#define XFS_NBWORDLOG (XFS_NBBYLOG + XFS_WORDLOG)
#define XFS_NBWORD (1 << XFS_NBWORDLOG)
#define XFS_WORDMASK ((1 << XFS_WORDLOG) - 1)
@@ -229,8 +235,8 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
-bool xfs_verify_rtext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
- xfs_rtblock_t len);
+bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
+ xfs_filblks_t len);
bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount);
bool xfs_verify_dablk(struct xfs_mount *mp, xfs_fileoff_t off);
void xfs_icount_range(struct xfs_mount *mp, unsigned long long *min,
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 75588915572e..06d8c1996a33 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -410,7 +410,7 @@ xchk_bmap_iextent(
/* Make sure the extent points to a valid place. */
if (info->is_rt &&
- !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
+ !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount))
xchk_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
if (!info->is_rt &&
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 05be757668bb..5799e9a94f1f 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -16,7 +16,7 @@
#include "xfs_health.h"
#include "xfs_btree.h"
#include "xfs_ag.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
#include "scrub/scrub.h"
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 59d7912fb75f..889f556bc98f 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -20,6 +20,7 @@
#include "xfs_reflink.h"
#include "xfs_rmap.h"
#include "xfs_bmap_util.h"
+#include "xfs_rtbitmap.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
@@ -225,7 +226,7 @@ xchk_inode_extsize(
*/
if ((flags & XFS_DIFLAG_RTINHERIT) &&
(flags & XFS_DIFLAG_EXTSZINHERIT) &&
- value % sc->mp->m_sb.sb_rextsize > 0)
+ xfs_extlen_to_rtxmod(sc->mp, value) > 0)
xchk_ino_set_warning(sc, ino);
}
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 008ddb599e13..41a1d89ae8e6 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -11,7 +11,7 @@
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "scrub/scrub.h"
@@ -48,12 +48,12 @@ xchk_rtbitmap_rec(
{
struct xfs_scrub *sc = priv;
xfs_rtblock_t startblock;
- xfs_rtblock_t blockcount;
+ xfs_filblks_t blockcount;
- startblock = rec->ar_startext * mp->m_sb.sb_rextsize;
- blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize;
+ startblock = xfs_rtx_to_rtb(mp, rec->ar_startext);
+ blockcount = xfs_rtx_to_rtb(mp, rec->ar_extcount);
- if (!xfs_verify_rtext(mp, startblock, blockcount))
+ if (!xfs_verify_rtbext(mp, startblock, blockcount))
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
return 0;
}
@@ -128,26 +128,22 @@ out:
void
xchk_xref_is_used_rt_space(
struct xfs_scrub *sc,
- xfs_rtblock_t fsbno,
+ xfs_rtblock_t rtbno,
xfs_extlen_t len)
{
- xfs_rtblock_t startext;
- xfs_rtblock_t endext;
- xfs_rtblock_t extcount;
+ xfs_rtxnum_t startext;
+ xfs_rtxnum_t endext;
bool is_free;
int error;
if (xchk_skip_xref(sc->sm))
return;
- startext = fsbno;
- endext = fsbno + len - 1;
- do_div(startext, sc->mp->m_sb.sb_rextsize);
- do_div(endext, sc->mp->m_sb.sb_rextsize);
- extcount = endext - startext + 1;
+ startext = xfs_rtb_to_rtx(sc->mp, rtbno);
+ endext = xfs_rtb_to_rtx(sc->mp, rtbno + len - 1);
xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
- error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext, extcount,
- &is_free);
+ error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext,
+ endext - startext + 1, &is_free);
if (!xchk_should_check_xref(sc, &error, NULL))
goto out_unlock;
if (is_free)
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 437ed9acbb27..8b15c47408d0 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -13,7 +13,7 @@
#include "xfs_inode.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "scrub/scrub.h"
@@ -81,34 +81,45 @@ typedef unsigned int xchk_rtsumoff_t;
static inline int
xfsum_load(
struct xfs_scrub *sc,
- xchk_rtsumoff_t sumoff,
- xfs_suminfo_t *info)
+ xfs_rtsumoff_t sumoff,
+ union xfs_suminfo_raw *rawinfo)
{
- return xfile_obj_load(sc->xfile, info, sizeof(xfs_suminfo_t),
+ return xfile_obj_load(sc->xfile, rawinfo,
+ sizeof(union xfs_suminfo_raw),
sumoff << XFS_WORDLOG);
}
static inline int
xfsum_store(
struct xfs_scrub *sc,
- xchk_rtsumoff_t sumoff,
- const xfs_suminfo_t info)
+ xfs_rtsumoff_t sumoff,
+ const union xfs_suminfo_raw rawinfo)
{
- return xfile_obj_store(sc->xfile, &info, sizeof(xfs_suminfo_t),
+ return xfile_obj_store(sc->xfile, &rawinfo,
+ sizeof(union xfs_suminfo_raw),
sumoff << XFS_WORDLOG);
}
static inline int
xfsum_copyout(
struct xfs_scrub *sc,
- xchk_rtsumoff_t sumoff,
- xfs_suminfo_t *info,
+ xfs_rtsumoff_t sumoff,
+ union xfs_suminfo_raw *rawinfo,
unsigned int nr_words)
{
- return xfile_obj_load(sc->xfile, info, nr_words << XFS_WORDLOG,
+ return xfile_obj_load(sc->xfile, rawinfo, nr_words << XFS_WORDLOG,
sumoff << XFS_WORDLOG);
}
+static inline xfs_suminfo_t
+xchk_rtsum_inc(
+ struct xfs_mount *mp,
+ union xfs_suminfo_raw *v)
+{
+ v->old += 1;
+ return v->old;
+}
+
/* Update the summary file to reflect the free extent that we've accumulated. */
STATIC int
xchk_rtsum_record_free(
@@ -121,23 +132,24 @@ xchk_rtsum_record_free(
xfs_fileoff_t rbmoff;
xfs_rtblock_t rtbno;
xfs_filblks_t rtlen;
- xchk_rtsumoff_t offs;
+ xfs_rtsumoff_t offs;
unsigned int lenlog;
- xfs_suminfo_t v = 0;
+ union xfs_suminfo_raw v;
+ xfs_suminfo_t value;
int error = 0;
if (xchk_should_terminate(sc, &error))
return error;
/* Compute the relevant location in the rtsum file. */
- rbmoff = XFS_BITTOBLOCK(mp, rec->ar_startext);
+ rbmoff = xfs_rtx_to_rbmblock(mp, rec->ar_startext);
lenlog = XFS_RTBLOCKLOG(rec->ar_extcount);
- offs = XFS_SUMOFFS(mp, lenlog, rbmoff);
+ offs = xfs_rtsumoffs(mp, lenlog, rbmoff);
- rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
- rtlen = rec->ar_extcount * mp->m_sb.sb_rextsize;
+ rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
+ rtlen = xfs_rtx_to_rtb(mp, rec->ar_extcount);
- if (!xfs_verify_rtext(mp, rtbno, rtlen)) {
+ if (!xfs_verify_rtbext(mp, rtbno, rtlen)) {
xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino);
return -EFSCORRUPTED;
}
@@ -147,9 +159,9 @@ xchk_rtsum_record_free(
if (error)
return error;
- v++;
+ value = xchk_rtsum_inc(sc->mp, &v);
trace_xchk_rtsum_record_free(mp, rec->ar_startext, rec->ar_extcount,
- lenlog, offs, v);
+ lenlog, offs, value);
return xfsum_store(sc, offs, v);
}
@@ -160,12 +172,11 @@ xchk_rtsum_compute(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
- unsigned long long rtbmp_bytes;
+ unsigned long long rtbmp_blocks;
/* If the bitmap size doesn't match the computed size, bail. */
- rtbmp_bytes = howmany_64(mp->m_sb.sb_rextents, NBBY);
- if (roundup_64(rtbmp_bytes, mp->m_sb.sb_blocksize) !=
- mp->m_rbmip->i_disk_size)
+ rtbmp_blocks = xfs_rtbitmap_blockcount(mp, mp->m_sb.sb_rextents);
+ if (XFS_FSB_TO_B(mp, rtbmp_blocks) != mp->m_rbmip->i_disk_size)
return -EFSCORRUPTED;
return xfs_rtalloc_query_all(sc->mp, sc->tp, xchk_rtsum_record_free,
@@ -177,14 +188,18 @@ STATIC int
xchk_rtsum_compare(
struct xfs_scrub *sc)
{
+ struct xfs_rtalloc_args args = {
+ .mp = sc->mp,
+ .tp = sc->tp,
+ };
struct xfs_mount *mp = sc->mp;
- struct xfs_buf *bp;
struct xfs_bmbt_irec map;
xfs_fileoff_t off;
xchk_rtsumoff_t sumoff = 0;
int nmap;
for (off = 0; off < XFS_B_TO_FSB(mp, mp->m_rsumsize); off++) {
+ union xfs_suminfo_raw *ondisk_info;
int error = 0;
if (xchk_should_terminate(sc, &error))
@@ -205,22 +220,23 @@ xchk_rtsum_compare(
}
/* Read a block's worth of ondisk rtsummary file. */
- error = xfs_rtbuf_get(mp, sc->tp, off, 1, &bp);
+ error = xfs_rtsummary_read_buf(&args, off);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, off, &error))
return error;
/* Read a block's worth of computed rtsummary file. */
error = xfsum_copyout(sc, sumoff, sc->buf, mp->m_blockwsize);
if (error) {
- xfs_trans_brelse(sc->tp, bp);
+ xfs_rtbuf_cache_relse(&args);
return error;
}
- if (memcmp(bp->b_addr, sc->buf,
+ ondisk_info = xfs_rsumblock_infoptr(&args, 0);
+ if (memcmp(ondisk_info, sc->buf,
mp->m_blockwsize << XFS_WORDLOG) != 0)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, off);
- xfs_trans_brelse(sc->tp, bp);
+ xfs_rtbuf_cache_relse(&args);
sumoff += mp->m_blockwsize;
}
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 46249e7b17e0..29afa4851235 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -13,6 +13,7 @@
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_ag.h"
+#include "xfs_rtbitmap.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index cbd4d01e253c..4a8bc6f3c8f2 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1036,17 +1036,18 @@ TRACE_EVENT(xfarray_sort_stats,
#ifdef CONFIG_XFS_RT
TRACE_EVENT(xchk_rtsum_record_free,
- TP_PROTO(struct xfs_mount *mp, xfs_rtblock_t start,
- uint64_t len, unsigned int log, loff_t pos, xfs_suminfo_t v),
- TP_ARGS(mp, start, len, log, pos, v),
+ TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start,
+ xfs_rtbxlen_t len, unsigned int log, loff_t pos,
+ xfs_suminfo_t value),
+ TP_ARGS(mp, start, len, log, pos, value),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(dev_t, rtdev)
- __field(xfs_rtblock_t, start)
+ __field(xfs_rtxnum_t, start)
__field(unsigned long long, len)
__field(unsigned int, log)
__field(loff_t, pos)
- __field(xfs_suminfo_t, v)
+ __field(xfs_suminfo_t, value)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
@@ -1055,7 +1056,7 @@ TRACE_EVENT(xchk_rtsum_record_free,
__entry->len = len;
__entry->log = log;
__entry->pos = pos;
- __entry->v = v;
+ __entry->value = value;
),
TP_printk("dev %d:%d rtdev %d:%d rtx 0x%llx rtxcount 0x%llx log %u rsumpos 0x%llx sumcount %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -1064,7 +1065,7 @@ TRACE_EVENT(xchk_rtsum_record_free,
__entry->len,
__entry->log,
__entry->pos,
- __entry->v)
+ __entry->value)
);
#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 40e0a1f1f753..731260a5af6d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -28,6 +28,7 @@
#include "xfs_icache.h"
#include "xfs_iomap.h"
#include "xfs_reflink.h"
+#include "xfs_rtbitmap.h"
/* Kernel only BMAP related definitions and functions */
@@ -75,28 +76,28 @@ xfs_bmap_rtalloc(
{
struct xfs_mount *mp = ap->ip->i_mount;
xfs_fileoff_t orig_offset = ap->offset;
- xfs_rtblock_t rtb;
- xfs_extlen_t prod = 0; /* product factor for allocators */
+ xfs_rtxnum_t rtx;
+ xfs_rtxlen_t prod = 0; /* product factor for allocators */
xfs_extlen_t mod = 0; /* product factor for allocators */
- xfs_extlen_t ralen = 0; /* realtime allocation length */
+ xfs_rtxlen_t ralen = 0; /* realtime allocation length */
xfs_extlen_t align; /* minimum allocation alignment */
xfs_extlen_t orig_length = ap->length;
xfs_extlen_t minlen = mp->m_sb.sb_rextsize;
- xfs_extlen_t raminlen;
+ xfs_rtxlen_t raminlen;
bool rtlocked = false;
bool ignore_locality = false;
int error;
align = xfs_get_extsz_hint(ap->ip);
retry:
- prod = align / mp->m_sb.sb_rextsize;
+ prod = xfs_extlen_to_rtxlen(mp, align);
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
align, 1, ap->eof, 0,
ap->conv, &ap->offset, &ap->length);
if (error)
return error;
ASSERT(ap->length);
- ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
+ ASSERT(xfs_extlen_to_rtxmod(mp, ap->length) == 0);
/*
* If we shifted the file offset downward to satisfy an extent size
@@ -116,17 +117,14 @@ retry:
prod = 1;
/*
* Set ralen to be the actual requested length in rtextents.
- */
- ralen = ap->length / mp->m_sb.sb_rextsize;
- /*
+ *
* If the old value was close enough to XFS_BMBT_MAX_EXTLEN that
* we rounded up to it, cut it back so it's valid again.
* Note that if it's a really large request (bigger than
* XFS_BMBT_MAX_EXTLEN), we don't hear about that number, and can't
* adjust the starting point to match it.
*/
- if (ralen * mp->m_sb.sb_rextsize >= XFS_MAX_BMBT_EXTLEN)
- ralen = XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize;
+ ralen = xfs_extlen_to_rtxlen(mp, min(ap->length, XFS_MAX_BMBT_EXTLEN));
/*
* Lock out modifications to both the RT bitmap and summary inodes
@@ -144,12 +142,10 @@ retry:
* pick an extent that will space things out in the rt area.
*/
if (ap->eof && ap->offset == 0) {
- xfs_rtblock_t rtx; /* realtime extent no */
-
error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
if (error)
return error;
- ap->blkno = rtx * mp->m_sb.sb_rextsize;
+ ap->blkno = xfs_rtx_to_rtb(mp, rtx);
} else {
ap->blkno = 0;
}
@@ -160,20 +156,18 @@ retry:
* Realtime allocation, done through xfs_rtallocate_extent.
*/
if (ignore_locality)
- ap->blkno = 0;
+ rtx = 0;
else
- do_div(ap->blkno, mp->m_sb.sb_rextsize);
- rtb = ap->blkno;
- ap->length = ralen;
- raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
- error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length,
- &ralen, ap->wasdel, prod, &rtb);
+ rtx = xfs_rtb_to_rtx(mp, ap->blkno);
+ raminlen = max_t(xfs_rtxlen_t, 1, xfs_extlen_to_rtxlen(mp, minlen));
+ error = xfs_rtallocate_extent(ap->tp, rtx, raminlen, ralen, &ralen,
+ ap->wasdel, prod, &rtx);
if (error)
return error;
- if (rtb != NULLRTBLOCK) {
- ap->blkno = rtb * mp->m_sb.sb_rextsize;
- ap->length = ralen * mp->m_sb.sb_rextsize;
+ if (rtx != NULLRTEXTNO) {
+ ap->blkno = xfs_rtx_to_rtb(mp, rtx);
+ ap->length = xfs_rtxlen_to_extlen(mp, ralen);
ap->ip->i_nblocks += ap->length;
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
if (ap->wasdel)
@@ -690,7 +684,7 @@ xfs_can_free_eofblocks(
*/
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
- end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
+ end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb);
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (last_fsb <= end_fsb)
return false;
@@ -780,12 +774,10 @@ xfs_alloc_file_space(
{
xfs_mount_t *mp = ip->i_mount;
xfs_off_t count;
- xfs_filblks_t allocated_fsb;
xfs_filblks_t allocatesize_fsb;
xfs_extlen_t extsz, temp;
xfs_fileoff_t startoffset_fsb;
xfs_fileoff_t endoffset_fsb;
- int nimaps;
int rt;
xfs_trans_t *tp;
xfs_bmbt_irec_t imaps[1], *imapp;
@@ -808,7 +800,6 @@ xfs_alloc_file_space(
count = len;
imapp = &imaps[0];
- nimaps = 1;
startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
allocatesize_fsb = endoffset_fsb - startoffset_fsb;
@@ -819,6 +810,7 @@ xfs_alloc_file_space(
while (allocatesize_fsb && !error) {
xfs_fileoff_t s, e;
unsigned int dblocks, rblocks, resblks;
+ int nimaps = 1;
/*
* Determine space reservations for data/realtime.
@@ -884,15 +876,19 @@ xfs_alloc_file_space(
if (error)
break;
- allocated_fsb = imapp->br_blockcount;
-
- if (nimaps == 0) {
- error = -ENOSPC;
- break;
+ /*
+ * If the allocator cannot find a single free extent large
+ * enough to cover the start block of the requested range,
+ * xfs_bmapi_write will return 0 but leave *nimaps set to 0.
+ *
+ * In that case we simply need to keep looping with the same
+ * startoffset_fsb so that one of the following allocations
+ * will eventually reach the requested range.
+ */
+ if (nimaps) {
+ startoffset_fsb += imapp->br_blockcount;
+ allocatesize_fsb -= imapp->br_blockcount;
}
-
- startoffset_fsb += allocated_fsb;
- allocatesize_fsb -= allocated_fsb;
}
return error;
@@ -989,10 +985,8 @@ xfs_free_file_space(
/* We can only free complete realtime extents. */
if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
- startoffset_fsb = roundup_64(startoffset_fsb,
- mp->m_sb.sb_rextsize);
- endoffset_fsb = rounddown_64(endoffset_fsb,
- mp->m_sb.sb_rextsize);
+ startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb);
+ endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb);
}
/*
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 203700278ddb..e33e5e13b95f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -214,6 +214,43 @@ xfs_ilock_iocb(
return 0;
}
+static int
+xfs_ilock_iocb_for_write(
+ struct kiocb *iocb,
+ unsigned int *lock_mode)
+{
+ ssize_t ret;
+ struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp));
+
+ ret = xfs_ilock_iocb(iocb, *lock_mode);
+ if (ret)
+ return ret;
+
+ if (*lock_mode == XFS_IOLOCK_EXCL)
+ return 0;
+ if (!xfs_iflags_test(ip, XFS_IREMAPPING))
+ return 0;
+
+ xfs_iunlock(ip, *lock_mode);
+ *lock_mode = XFS_IOLOCK_EXCL;
+ return xfs_ilock_iocb(iocb, *lock_mode);
+}
+
+static unsigned int
+xfs_ilock_for_write_fault(
+ struct xfs_inode *ip)
+{
+ /* get a shared lock if no remapping in progress */
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ if (!xfs_iflags_test(ip, XFS_IREMAPPING))
+ return XFS_MMAPLOCK_SHARED;
+
+ /* wait for remapping to complete */
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
+ return XFS_MMAPLOCK_EXCL;
+}
+
STATIC ssize_t
xfs_file_dio_read(
struct kiocb *iocb,
@@ -551,7 +588,7 @@ xfs_file_dio_write_aligned(
unsigned int iolock = XFS_IOLOCK_SHARED;
ssize_t ret;
- ret = xfs_ilock_iocb(iocb, iolock);
+ ret = xfs_ilock_iocb_for_write(iocb, &iolock);
if (ret)
return ret;
ret = xfs_file_write_checks(iocb, from, &iolock);
@@ -618,7 +655,7 @@ retry_exclusive:
flags = IOMAP_DIO_FORCE_WAIT;
}
- ret = xfs_ilock_iocb(iocb, iolock);
+ ret = xfs_ilock_iocb_for_write(iocb, &iolock);
if (ret)
return ret;
@@ -1180,7 +1217,7 @@ xfs_file_remap_range(
if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out))
xfs_log_force_inode(dest);
out_unlock:
- xfs_iunlock2_io_mmap(src, dest);
+ xfs_iunlock2_remapping(src, dest);
if (ret)
trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_);
return remapped > 0 ? remapped : ret;
@@ -1328,6 +1365,7 @@ __xfs_filemap_fault(
struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
vm_fault_t ret;
+ unsigned int lock_mode = 0;
trace_xfs_filemap_fault(ip, order, write_fault);
@@ -1336,25 +1374,24 @@ __xfs_filemap_fault(
file_update_time(vmf->vma->vm_file);
}
+ if (IS_DAX(inode) || write_fault)
+ lock_mode = xfs_ilock_for_write_fault(XFS_I(inode));
+
if (IS_DAX(inode)) {
pfn_t pfn;
- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
ret = xfs_dax_fault(vmf, order, write_fault, &pfn);
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, order, pfn);
- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ } else if (write_fault) {
+ ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
} else {
- if (write_fault) {
- xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = iomap_page_mkwrite(vmf,
- &xfs_page_mkwrite_iomap_ops);
- xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- } else {
- ret = filemap_fault(vmf);
- }
+ ret = filemap_fault(vmf);
}
+ if (lock_mode)
+ xfs_iunlock(XFS_I(inode), lock_mode);
+
if (write_fault)
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 736e5545f584..5a72217f5feb 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -23,7 +23,7 @@
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_alloc_btree.h"
-#include "xfs_rtalloc.h"
+#include "xfs_rtbitmap.h"
#include "xfs_ag.h"
/* Convert an xfs_fsmap to an fsmap. */
@@ -483,11 +483,11 @@ xfs_getfsmap_rtdev_rtbitmap_helper(
xfs_rtblock_t rtbno;
xfs_daddr_t rec_daddr, len_daddr;
- rtbno = rec->ar_startext * mp->m_sb.sb_rextsize;
+ rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
irec.rm_startblock = rtbno;
- rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize;
+ rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount);
len_daddr = XFS_FSB_TO_BB(mp, rtbno);
irec.rm_blockcount = rtbno;
@@ -514,7 +514,7 @@ xfs_getfsmap_rtdev_rtbitmap(
uint64_t eofs;
int error;
- eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize);
+ eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents));
if (keys[0].fmr_physical >= eofs)
return 0;
start_rtb = XFS_BB_TO_FSBT(mp,
@@ -539,11 +539,8 @@ xfs_getfsmap_rtdev_rtbitmap(
* Set up query parameters to return free rtextents covering the range
* we want.
*/
- alow.ar_startext = start_rtb;
- ahigh.ar_startext = end_rtb;
- do_div(alow.ar_startext, mp->m_sb.sb_rextsize);
- if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize))
- ahigh.ar_startext++;
+ alow.ar_startext = xfs_rtb_to_rtx(mp, start_rtb);
+ ahigh.ar_startext = xfs_rtb_to_rtxup(mp, end_rtb);
error = xfs_rtalloc_query_range(mp, tp, &alow, &ahigh,
xfs_getfsmap_rtdev_rtbitmap_helper, info);
if (error)
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 36f5cf802c07..c0f1c89786c2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -918,6 +918,13 @@ xfs_droplink(
xfs_trans_t *tp,
xfs_inode_t *ip)
{
+ if (VFS_I(ip)->i_nlink == 0) {
+ xfs_alert(ip->i_mount,
+ "%s: Attempt to drop inode (%llu) with nlink zero.",
+ __func__, ip->i_ino);
+ return -EFSCORRUPTED;
+ }
+
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
drop_nlink(VFS_I(ip));
@@ -3621,6 +3628,23 @@ xfs_iunlock2_io_mmap(
inode_unlock(VFS_I(ip1));
}
+/* Drop the MMAPLOCK and the IOLOCK after a remap completes. */
+void
+xfs_iunlock2_remapping(
+ struct xfs_inode *ip1,
+ struct xfs_inode *ip2)
+{
+ xfs_iflags_clear(ip1, XFS_IREMAPPING);
+
+ if (ip1 != ip2)
+ xfs_iunlock(ip1, XFS_MMAPLOCK_SHARED);
+ xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
+
+ if (ip1 != ip2)
+ inode_unlock_shared(VFS_I(ip1));
+ inode_unlock(VFS_I(ip2));
+}
+
/*
* Reload the incore inode list for this inode. Caller should ensure that
* the link count cannot change, either by taking ILOCK_SHARED or otherwise
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 0c5bdb91152e..3dc47937da5d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -347,6 +347,14 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
/* Quotacheck is running but inode has not been added to quota counts. */
#define XFS_IQUOTAUNCHECKED (1 << 14)
+/*
+ * Remap in progress. Callers that wish to update file data while
+ * holding a shared IOLOCK or MMAPLOCK must drop the lock and retake
+ * the lock in exclusive mode. Relocking the file will block until
+ * IREMAPPING is cleared.
+ */
+#define XFS_IREMAPPING (1U << 15)
+
/* All inode state flags related to inode reclaim. */
#define XFS_ALL_IRECLAIM_FLAGS (XFS_IRECLAIMABLE | \
XFS_IRECLAIM | \
@@ -595,6 +603,7 @@ void xfs_end_io(struct work_struct *work);
int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
+void xfs_iunlock2_remapping(struct xfs_inode *ip1, struct xfs_inode *ip2);
static inline bool
xfs_inode_unlinked_incomplete(
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 17c51804f9c6..cd7803fda8b1 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -19,6 +19,7 @@
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_error.h"
+#include "xfs_rtbitmap.h"
#include <linux/iversion.h>
@@ -107,7 +108,7 @@ xfs_inode_item_precommit(
*/
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
- (ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
+ xfs_extlen_to_rtxmod(ip->i_mount, ip->i_extsize) > 0) {
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
XFS_DIFLAG_EXTSZINHERIT);
ip->i_extsize = 0;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 55bb01173cde..a82470e027f7 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -38,6 +38,7 @@
#include "xfs_reflink.h"
#include "xfs_ioctl.h"
#include "xfs_xattr.h"
+#include "xfs_rtbitmap.h"
#include <linux/mount.h>
#include <linux/namei.h>
@@ -1004,7 +1005,7 @@ xfs_fill_fsxattr(
* later.
*/
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
- ip->i_extsize % mp->m_sb.sb_rextsize > 0) {
+ xfs_extlen_to_rtxmod(mp, ip->i_extsize) > 0) {
fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE |
FS_XFLAG_EXTSZINHERIT);
fa->fsx_extsize = 0;
@@ -1130,7 +1131,7 @@ xfs_ioctl_setattr_xflags(
/* If realtime flag is set then must have realtime device */
if (fa->fsx_xflags & FS_XFLAG_REALTIME) {
if (mp->m_sb.sb_rblocks == 0 || mp->m_sb.sb_rextsize == 0 ||
- (ip->i_extsize % mp->m_sb.sb_rextsize))
+ xfs_extlen_to_rtxmod(mp, ip->i_extsize))
return -EINVAL;
}
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index e9d317a3dafe..d7873e0360f0 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -198,6 +198,18 @@ static inline uint64_t howmany_64(uint64_t x, uint32_t y)
return x;
}
+/* If @b is a power of 2, return log2(b). Else return -1. */
+static inline int8_t log2_if_power2(unsigned long b)
+{
+ return is_power_of_2(b) ? ilog2(b) : -1;
+}
+
+/* If @b is a power of 2, return a mask of the lower bits, else return zero. */
+static inline unsigned long long mask64_if_power2(unsigned long b)
+{
+ return is_power_of_2(b) ? b - 1 : 0;
+}
+
int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
char *data, enum req_op op);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 219681d29fbc..503fe3c7edbf 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -101,9 +101,9 @@ typedef struct xfs_mount {
/*
* Optional cache of rt summary level per bitmap block with the
- * invariant that m_rsum_cache[bbno] <= the minimum i for which
- * rsum[i][bbno] != 0. Reads and writes are serialized by the rsumip
- * inode lock.
+ * invariant that m_rsum_cache[bbno] > the maximum i for which
+ * rsum[i][bbno] != 0, or 0 if rsum[i][bbno] == 0 for all i.
+ * Reads and writes are serialized by the rsumip inode lock.
*/
uint8_t *m_rsum_cache;
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
@@ -119,6 +119,7 @@ typedef struct xfs_mount {
uint8_t m_blkbb_log; /* blocklog - BBSHIFT */
uint8_t m_agno_log; /* log #ag's */
uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
+ int8_t m_rtxblklog; /* log2 of rextsize, if possible */
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
uint m_blockwmask; /* blockwsize-1 */
@@ -152,6 +153,7 @@ typedef struct xfs_mount {
uint64_t m_features; /* active filesystem features */
uint64_t m_low_space[XFS_LOWSP_MAX];
uint64_t m_low_rtexts[XFS_LOWSP_MAX];
+ uint64_t m_rtxblkmask; /* rt extent block mask */
struct xfs_ino_geometry m_ino_geo; /* inode geometry */
struct xfs_trans_resv m_resv; /* precomputed res values */
/* low free space thresholds */
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index c4cc99b70dd3..21a7e350b4c5 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -72,6 +72,10 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t, 4);
XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t, 4);
+ /* realtime structures */
+ XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw, 4);
+ XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw, 4);
+
/*
* m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
* 4 bytes anyway so it's not obviously a problem. Hence for the moment
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index eb9102453aff..658edee8381d 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1540,6 +1540,10 @@ xfs_reflink_remap_prep(
if (ret)
goto out_unlock;
+ xfs_iflags_set(src, XFS_IREMAPPING);
+ if (inode_in != inode_out)
+ xfs_ilock_demote(src, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
+
return 0;
out_unlock:
xfs_iunlock2_io_mmap(src, dest);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 2e1a4e5cd03d..88c48de5c9c8 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -19,6 +19,7 @@
#include "xfs_icache.h"
#include "xfs_rtalloc.h"
#include "xfs_sb.h"
+#include "xfs_rtbitmap.h"
/*
* Read and return the summary information for a given extent size,
@@ -28,48 +29,48 @@
*/
static int
xfs_rtget_summary(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- int log, /* log2 of extent size */
- xfs_rtblock_t bbno, /* bitmap block number */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- xfs_suminfo_t *sum) /* out: summary info for this block */
+ struct xfs_rtalloc_args *args,
+ int log, /* log2 of extent size */
+ xfs_fileoff_t bbno, /* bitmap block number */
+ xfs_suminfo_t *sum) /* out: summary info for this block */
{
- return xfs_rtmodify_summary_int(mp, tp, log, bbno, 0, rbpp, rsb, sum);
+ return xfs_rtmodify_summary_int(args, log, bbno, 0, sum);
}
/*
* Return whether there are any free extents in the size range given
* by low and high, for the bitmap block bbno.
*/
-STATIC int /* error */
+STATIC int
xfs_rtany_summary(
- xfs_mount_t *mp, /* file system mount structure */
- xfs_trans_t *tp, /* transaction pointer */
- int low, /* low log2 extent size */
- int high, /* high log2 extent size */
- xfs_rtblock_t bbno, /* bitmap block number */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- int *stat) /* out: any good extents here? */
+ struct xfs_rtalloc_args *args,
+ int low, /* low log2 extent size */
+ int high, /* high log2 extent size */
+ xfs_fileoff_t bbno, /* bitmap block number */
+ int *maxlog) /* out: max log2 extent size free */
{
- int error; /* error value */
- int log; /* loop counter, log2 of ext. size */
- xfs_suminfo_t sum; /* summary data */
-
- /* There are no extents at levels < m_rsum_cache[bbno]. */
- if (mp->m_rsum_cache && low < mp->m_rsum_cache[bbno])
- low = mp->m_rsum_cache[bbno];
+ struct xfs_mount *mp = args->mp;
+ int error;
+ int log; /* loop counter, log2 of ext. size */
+ xfs_suminfo_t sum; /* summary data */
+
+ /* There are no extents at levels >= m_rsum_cache[bbno]. */
+ if (mp->m_rsum_cache) {
+ high = min(high, mp->m_rsum_cache[bbno] - 1);
+ if (low > high) {
+ *maxlog = -1;
+ return 0;
+ }
+ }
/*
* Loop over logs of extent sizes.
*/
- for (log = low; log <= high; log++) {
+ for (log = high; log >= low; log--) {
/*
* Get one summary datum.
*/
- error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum);
+ error = xfs_rtget_summary(args, log, bbno, &sum);
if (error) {
return error;
}
@@ -77,18 +78,18 @@ xfs_rtany_summary(
* If there are any, return success.
*/
if (sum) {
- *stat = 1;
+ *maxlog = log;
goto out;
}
}
/*
* Found nothing, return failure.
*/
- *stat = 0;
+ *maxlog = -1;
out:
- /* There were no extents at levels < log. */
- if (mp->m_rsum_cache && log > mp->m_rsum_cache[bbno])
- mp->m_rsum_cache[bbno] = log;
+ /* There were no extents at levels > log. */
+ if (mp->m_rsum_cache && log + 1 < mp->m_rsum_cache[bbno])
+ mp->m_rsum_cache[bbno] = log + 1;
return 0;
}
@@ -97,60 +98,54 @@ out:
* Copy and transform the summary file, given the old and new
* parameters in the mount structures.
*/
-STATIC int /* error */
+STATIC int
xfs_rtcopy_summary(
- xfs_mount_t *omp, /* old file system mount point */
- xfs_mount_t *nmp, /* new file system mount point */
- xfs_trans_t *tp) /* transaction pointer */
+ struct xfs_rtalloc_args *oargs,
+ struct xfs_rtalloc_args *nargs)
{
- xfs_rtblock_t bbno; /* bitmap block number */
- struct xfs_buf *bp; /* summary buffer */
- int error; /* error return value */
- int log; /* summary level number (log length) */
- xfs_suminfo_t sum; /* summary data */
- xfs_fsblock_t sumbno; /* summary block number */
+ xfs_fileoff_t bbno; /* bitmap block number */
+ int error;
+ int log; /* summary level number (log length) */
+ xfs_suminfo_t sum; /* summary data */
- bp = NULL;
- for (log = omp->m_rsumlevels - 1; log >= 0; log--) {
- for (bbno = omp->m_sb.sb_rbmblocks - 1;
+ for (log = oargs->mp->m_rsumlevels - 1; log >= 0; log--) {
+ for (bbno = oargs->mp->m_sb.sb_rbmblocks - 1;
(xfs_srtblock_t)bbno >= 0;
bbno--) {
- error = xfs_rtget_summary(omp, tp, log, bbno, &bp,
- &sumbno, &sum);
+ error = xfs_rtget_summary(oargs, log, bbno, &sum);
if (error)
- return error;
+ goto out;
if (sum == 0)
continue;
- error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum,
- &bp, &sumbno);
+ error = xfs_rtmodify_summary(oargs, log, bbno, -sum);
if (error)
- return error;
- error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum,
- &bp, &sumbno);
+ goto out;
+ error = xfs_rtmodify_summary(nargs, log, bbno, sum);
if (error)
- return error;
+ goto out;
ASSERT(sum > 0);
}
}
+ error = 0;
+out:
+ xfs_rtbuf_cache_relse(oargs);
return 0;
}
/*
* Mark an extent specified by start and len allocated.
* Updates all the summary information as well as the bitmap.
*/
-STATIC int /* error */
+STATIC int
xfs_rtallocate_range(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t start, /* start block to allocate */
- xfs_extlen_t len, /* length to allocate */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb) /* in/out: summary block number */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* start rtext to allocate */
+ xfs_rtxlen_t len) /* in/out: summary block number */
{
- xfs_rtblock_t end; /* end of the allocated extent */
- int error; /* error value */
- xfs_rtblock_t postblock = 0; /* first block allocated > end */
- xfs_rtblock_t preblock = 0; /* first block allocated < start */
+ struct xfs_mount *mp = args->mp;
+ xfs_rtxnum_t end; /* end of the allocated rtext */
+ int error;
+ xfs_rtxnum_t postblock = 0; /* first rtext allocated > end */
+ xfs_rtxnum_t preblock = 0; /* first rtext allocated < start */
end = start + len - 1;
/*
@@ -158,15 +153,15 @@ xfs_rtallocate_range(
* We need to find the beginning and end of the extent so we can
* properly update the summary.
*/
- error = xfs_rtfind_back(mp, tp, start, 0, &preblock);
+ error = xfs_rtfind_back(args, start, 0, &preblock);
if (error) {
return error;
}
/*
* Find the next allocated block (end of free extent).
*/
- error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
- &postblock);
+ error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1,
+ &postblock);
if (error) {
return error;
}
@@ -174,9 +169,9 @@ xfs_rtallocate_range(
* Decrement the summary information corresponding to the entire
* (old) free extent.
*/
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(postblock + 1 - preblock),
- XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb);
+ error = xfs_rtmodify_summary(args,
+ XFS_RTBLOCKLOG(postblock + 1 - preblock),
+ xfs_rtx_to_rbmblock(mp, preblock), -1);
if (error) {
return error;
}
@@ -185,9 +180,9 @@ xfs_rtallocate_range(
* old extent, add summary data for them to be free.
*/
if (preblock < start) {
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(start - preblock),
- XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb);
+ error = xfs_rtmodify_summary(args,
+ XFS_RTBLOCKLOG(start - preblock),
+ xfs_rtx_to_rbmblock(mp, preblock), 1);
if (error) {
return error;
}
@@ -197,9 +192,9 @@ xfs_rtallocate_range(
* old extent, add summary data for them to be free.
*/
if (postblock > end) {
- error = xfs_rtmodify_summary(mp, tp,
- XFS_RTBLOCKLOG(postblock - end),
- XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb);
+ error = xfs_rtmodify_summary(args,
+ XFS_RTBLOCKLOG(postblock - end),
+ xfs_rtx_to_rbmblock(mp, end + 1), 1);
if (error) {
return error;
}
@@ -207,54 +202,69 @@ xfs_rtallocate_range(
/*
* Modify the bitmap to mark this extent allocated.
*/
- error = xfs_rtmodify_range(mp, tp, start, len, 0);
+ error = xfs_rtmodify_range(args, start, len, 0);
return error;
}
/*
+ * Make sure we don't run off the end of the rt volume. Be careful that
+ * adjusting maxlen downwards doesn't cause us to fail the alignment checks.
+ */
+static inline xfs_rtxlen_t
+xfs_rtallocate_clamp_len(
+ struct xfs_mount *mp,
+ xfs_rtxnum_t startrtx,
+ xfs_rtxlen_t rtxlen,
+ xfs_rtxlen_t prod)
+{
+ xfs_rtxlen_t ret;
+
+ ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx;
+ return rounddown(ret, prod);
+}
+
+/*
* Attempt to allocate an extent minlen<=len<=maxlen starting from
* bitmap block bbno. If we don't get maxlen then use prod to trim
- * the length, if given. Returns error; returns starting block in *rtblock.
+ * the length, if given. Returns error; returns starting block in *rtx.
* The lengths are all in rtextents.
*/
-STATIC int /* error */
+STATIC int
xfs_rtallocate_extent_block(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bbno, /* bitmap block number */
- xfs_extlen_t minlen, /* minimum length to allocate */
- xfs_extlen_t maxlen, /* maximum length to allocate */
- xfs_extlen_t *len, /* out: actual length allocated */
- xfs_rtblock_t *nextp, /* out: next block to try */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- xfs_extlen_t prod, /* extent product factor */
- xfs_rtblock_t *rtblock) /* out: start block allocated */
+ struct xfs_rtalloc_args *args,
+ xfs_fileoff_t bbno, /* bitmap block number */
+ xfs_rtxlen_t minlen, /* minimum length to allocate */
+ xfs_rtxlen_t maxlen, /* maximum length to allocate */
+ xfs_rtxlen_t *len, /* out: actual length allocated */
+ xfs_rtxnum_t *nextp, /* out: next rtext to try */
+ xfs_rtxlen_t prod, /* extent product factor */
+ xfs_rtxnum_t *rtx) /* out: start rtext allocated */
{
- xfs_rtblock_t besti; /* best rtblock found so far */
- xfs_rtblock_t bestlen; /* best length found so far */
- xfs_rtblock_t end; /* last rtblock in chunk */
- int error; /* error value */
- xfs_rtblock_t i; /* current rtblock trying */
- xfs_rtblock_t next; /* next rtblock to try */
- int stat; /* status from internal calls */
+ struct xfs_mount *mp = args->mp;
+ xfs_rtxnum_t besti; /* best rtext found so far */
+ xfs_rtxnum_t bestlen;/* best length found so far */
+ xfs_rtxnum_t end; /* last rtext in chunk */
+ int error;
+ xfs_rtxnum_t i; /* current rtext trying */
+ xfs_rtxnum_t next; /* next rtext to try */
+ int stat; /* status from internal calls */
/*
* Loop over all the extents starting in this bitmap block,
* looking for one that's long enough.
*/
- for (i = XFS_BLOCKTOBIT(mp, bbno), besti = -1, bestlen = 0,
- end = XFS_BLOCKTOBIT(mp, bbno + 1) - 1;
+ for (i = xfs_rbmblock_to_rtx(mp, bbno), besti = -1, bestlen = 0,
+ end = xfs_rbmblock_to_rtx(mp, bbno + 1) - 1;
i <= end;
i++) {
/* Make sure we don't scan off the end of the rt volume. */
- maxlen = min(mp->m_sb.sb_rextents, i + maxlen) - i;
+ maxlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod);
/*
* See if there's a free extent of maxlen starting at i.
* If it's not so then next will contain the first non-free.
*/
- error = xfs_rtcheck_range(mp, tp, i, maxlen, 1, &next, &stat);
+ error = xfs_rtcheck_range(args, i, maxlen, 1, &next, &stat);
if (error) {
return error;
}
@@ -262,13 +272,12 @@ xfs_rtallocate_extent_block(
/*
* i for maxlen is all free, allocate and return that.
*/
- error = xfs_rtallocate_range(mp, tp, i, maxlen, rbpp,
- rsb);
+ error = xfs_rtallocate_range(args, i, maxlen);
if (error) {
return error;
}
*len = maxlen;
- *rtblock = i;
+ *rtx = i;
return 0;
}
/*
@@ -278,7 +287,7 @@ xfs_rtallocate_extent_block(
* so far, remember it.
*/
if (minlen < maxlen) {
- xfs_rtblock_t thislen; /* this extent size */
+ xfs_rtxnum_t thislen; /* this extent size */
thislen = next - i;
if (thislen >= minlen && thislen > bestlen) {
@@ -290,7 +299,7 @@ xfs_rtallocate_extent_block(
* If not done yet, find the start of the next free space.
*/
if (next < end) {
- error = xfs_rtfind_forw(mp, tp, next, end, &i);
+ error = xfs_rtfind_forw(args, next, end, &i);
if (error) {
return error;
}
@@ -301,7 +310,7 @@ xfs_rtallocate_extent_block(
* Searched the whole thing & didn't find a maxlen free extent.
*/
if (minlen < maxlen && besti != -1) {
- xfs_extlen_t p; /* amount to trim length by */
+ xfs_rtxlen_t p; /* amount to trim length by */
/*
* If size should be a multiple of prod, make that so.
@@ -315,51 +324,49 @@ xfs_rtallocate_extent_block(
/*
* Allocate besti for bestlen & return that.
*/
- error = xfs_rtallocate_range(mp, tp, besti, bestlen, rbpp, rsb);
+ error = xfs_rtallocate_range(args, besti, bestlen);
if (error) {
return error;
}
*len = bestlen;
- *rtblock = besti;
+ *rtx = besti;
return 0;
}
/*
* Allocation failed. Set *nextp to the next block to try.
*/
*nextp = next;
- *rtblock = NULLRTBLOCK;
+ *rtx = NULLRTEXTNO;
return 0;
}
/*
* Allocate an extent of length minlen<=len<=maxlen, starting at block
* bno. If we don't get maxlen then use prod to trim the length, if given.
- * Returns error; returns starting block in *rtblock.
+ * Returns error; returns starting block in *rtx.
* The lengths are all in rtextents.
*/
-STATIC int /* error */
+STATIC int
xfs_rtallocate_extent_exact(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number to allocate */
- xfs_extlen_t minlen, /* minimum length to allocate */
- xfs_extlen_t maxlen, /* maximum length to allocate */
- xfs_extlen_t *len, /* out: actual length allocated */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- xfs_extlen_t prod, /* extent product factor */
- xfs_rtblock_t *rtblock) /* out: start block allocated */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* starting rtext number to allocate */
+ xfs_rtxlen_t minlen, /* minimum length to allocate */
+ xfs_rtxlen_t maxlen, /* maximum length to allocate */
+ xfs_rtxlen_t *len, /* out: actual length allocated */
+ xfs_rtxlen_t prod, /* extent product factor */
+ xfs_rtxnum_t *rtx) /* out: start rtext allocated */
{
- int error; /* error value */
- xfs_extlen_t i; /* extent length trimmed due to prod */
- int isfree; /* extent is free */
- xfs_rtblock_t next; /* next block to try (dummy) */
+ int error;
+ xfs_rtxlen_t i; /* extent length trimmed due to prod */
+ int isfree; /* extent is free */
+ xfs_rtxnum_t next; /* next rtext to try (dummy) */
- ASSERT(minlen % prod == 0 && maxlen % prod == 0);
+ ASSERT(minlen % prod == 0);
+ ASSERT(maxlen % prod == 0);
/*
* Check if the range in question (for maxlen) is free.
*/
- error = xfs_rtcheck_range(mp, tp, bno, maxlen, 1, &next, &isfree);
+ error = xfs_rtcheck_range(args, start, maxlen, 1, &next, &isfree);
if (error) {
return error;
}
@@ -367,23 +374,23 @@ xfs_rtallocate_extent_exact(
/*
* If it is, allocate it and return success.
*/
- error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb);
+ error = xfs_rtallocate_range(args, start, maxlen);
if (error) {
return error;
}
*len = maxlen;
- *rtblock = bno;
+ *rtx = start;
return 0;
}
/*
* If not, allocate what there is, if it's at least minlen.
*/
- maxlen = next - bno;
+ maxlen = next - start;
if (maxlen < minlen) {
/*
* Failed, return failure status.
*/
- *rtblock = NULLRTBLOCK;
+ *rtx = NULLRTEXTNO;
return 0;
}
/*
@@ -395,81 +402,82 @@ xfs_rtallocate_extent_exact(
/*
* Now we can't do it, return failure status.
*/
- *rtblock = NULLRTBLOCK;
+ *rtx = NULLRTEXTNO;
return 0;
}
}
/*
* Allocate what we can and return it.
*/
- error = xfs_rtallocate_range(mp, tp, bno, maxlen, rbpp, rsb);
+ error = xfs_rtallocate_range(args, start, maxlen);
if (error) {
return error;
}
*len = maxlen;
- *rtblock = bno;
+ *rtx = start;
return 0;
}
/*
* Allocate an extent of length minlen<=len<=maxlen, starting as near
- * to bno as possible. If we don't get maxlen then use prod to trim
+ * to start as possible. If we don't get maxlen then use prod to trim
* the length, if given. The lengths are all in rtextents.
*/
-STATIC int /* error */
+STATIC int
xfs_rtallocate_extent_near(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number to allocate */
- xfs_extlen_t minlen, /* minimum length to allocate */
- xfs_extlen_t maxlen, /* maximum length to allocate */
- xfs_extlen_t *len, /* out: actual length allocated */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- xfs_extlen_t prod, /* extent product factor */
- xfs_rtblock_t *rtblock) /* out: start block allocated */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start, /* starting rtext number to allocate */
+ xfs_rtxlen_t minlen, /* minimum length to allocate */
+ xfs_rtxlen_t maxlen, /* maximum length to allocate */
+ xfs_rtxlen_t *len, /* out: actual length allocated */
+ xfs_rtxlen_t prod, /* extent product factor */
+ xfs_rtxnum_t *rtx) /* out: start rtext allocated */
{
- int any; /* any useful extents from summary */
- xfs_rtblock_t bbno; /* bitmap block number */
- int error; /* error value */
- int i; /* bitmap block offset (loop control) */
- int j; /* secondary loop control */
- int log2len; /* log2 of minlen */
- xfs_rtblock_t n; /* next block to try */
- xfs_rtblock_t r; /* result block */
-
- ASSERT(minlen % prod == 0 && maxlen % prod == 0);
+ struct xfs_mount *mp = args->mp;
+ int maxlog; /* max useful extent from summary */
+ xfs_fileoff_t bbno; /* bitmap block number */
+ int error;
+ int i; /* bitmap block offset (loop control) */
+ int j; /* secondary loop control */
+ int log2len; /* log2 of minlen */
+ xfs_rtxnum_t n; /* next rtext to try */
+ xfs_rtxnum_t r; /* result rtext */
+
+ ASSERT(minlen % prod == 0);
+ ASSERT(maxlen % prod == 0);
+
/*
* If the block number given is off the end, silently set it to
* the last block.
*/
- if (bno >= mp->m_sb.sb_rextents)
- bno = mp->m_sb.sb_rextents - 1;
+ if (start >= mp->m_sb.sb_rextents)
+ start = mp->m_sb.sb_rextents - 1;
/* Make sure we don't run off the end of the rt volume. */
- maxlen = min(mp->m_sb.sb_rextents, bno + maxlen) - bno;
+ maxlen = xfs_rtallocate_clamp_len(mp, start, maxlen, prod);
if (maxlen < minlen) {
- *rtblock = NULLRTBLOCK;
+ *rtx = NULLRTEXTNO;
return 0;
}
/*
* Try the exact allocation first.
*/
- error = xfs_rtallocate_extent_exact(mp, tp, bno, minlen, maxlen, len,
- rbpp, rsb, prod, &r);
+ error = xfs_rtallocate_extent_exact(args, start, minlen, maxlen, len,
+ prod, &r);
if (error) {
return error;
}
/*
* If the exact allocation worked, return that.
*/
- if (r != NULLRTBLOCK) {
- *rtblock = r;
+ if (r != NULLRTEXTNO) {
+ *rtx = r;
return 0;
}
- bbno = XFS_BITTOBLOCK(mp, bno);
+ bbno = xfs_rtx_to_rbmblock(mp, start);
i = 0;
+ j = -1;
ASSERT(minlen != 0);
log2len = xfs_highbit32(minlen);
/*
@@ -480,8 +488,8 @@ xfs_rtallocate_extent_near(
* Get summary information of extents of all useful levels
* starting in this bitmap block.
*/
- error = xfs_rtany_summary(mp, tp, log2len, mp->m_rsumlevels - 1,
- bbno + i, rbpp, rsb, &any);
+ error = xfs_rtany_summary(args, log2len, mp->m_rsumlevels - 1,
+ bbno + i, &maxlog);
if (error) {
return error;
}
@@ -489,7 +497,10 @@ xfs_rtallocate_extent_near(
* If there are any useful extents starting here, try
* allocating one.
*/
- if (any) {
+ if (maxlog >= 0) {
+ xfs_extlen_t maxavail =
+ min_t(xfs_rtblock_t, maxlen,
+ (1ULL << (maxlog + 1)) - 1);
/*
* On the positive side of the starting location.
*/
@@ -498,17 +509,17 @@ xfs_rtallocate_extent_near(
* Try to allocate an extent starting in
* this block.
*/
- error = xfs_rtallocate_extent_block(mp, tp,
- bbno + i, minlen, maxlen, len, &n, rbpp,
- rsb, prod, &r);
+ error = xfs_rtallocate_extent_block(args,
+ bbno + i, minlen, maxavail, len,
+ &n, prod, &r);
if (error) {
return error;
}
/*
* If it worked, return it.
*/
- if (r != NULLRTBLOCK) {
- *rtblock = r;
+ if (r != NULLRTEXTNO) {
+ *rtx = r;
return 0;
}
}
@@ -516,68 +527,46 @@ xfs_rtallocate_extent_near(
* On the negative side of the starting location.
*/
else { /* i < 0 */
+ int maxblocks;
+
/*
- * Loop backwards through the bitmap blocks from
- * the starting point-1 up to where we are now.
- * There should be an extent which ends in this
- * bitmap block and is long enough.
+ * Loop backwards to find the end of the extent
+ * we found in the realtime summary.
+ *
+ * maxblocks is the maximum possible number of
+ * bitmap blocks from the start of the extent
+ * to the end of the extent.
*/
- for (j = -1; j > i; j--) {
- /*
- * Grab the summary information for
- * this bitmap block.
- */
- error = xfs_rtany_summary(mp, tp,
- log2len, mp->m_rsumlevels - 1,
- bbno + j, rbpp, rsb, &any);
- if (error) {
- return error;
- }
- /*
- * If there's no extent given in the
- * summary that means the extent we
- * found must carry over from an
- * earlier block. If there is an
- * extent given, we've already tried
- * that allocation, don't do it again.
- */
- if (any)
- continue;
- error = xfs_rtallocate_extent_block(mp,
- tp, bbno + j, minlen, maxlen,
- len, &n, rbpp, rsb, prod, &r);
+ if (maxlog == 0)
+ maxblocks = 0;
+ else if (maxlog < mp->m_blkbit_log)
+ maxblocks = 1;
+ else
+ maxblocks = 2 << (maxlog - mp->m_blkbit_log);
+
+ /*
+ * We need to check bbno + i + maxblocks down to
+ * bbno + i. We already checked bbno down to
+ * bbno + j + 1, so we don't need to check those
+ * again.
+ */
+ j = min(i + maxblocks, j);
+ for (; j >= i; j--) {
+ error = xfs_rtallocate_extent_block(args,
+ bbno + j, minlen,
+ maxavail, len, &n, prod,
+ &r);
if (error) {
return error;
}
/*
* If it works, return the extent.
*/
- if (r != NULLRTBLOCK) {
- *rtblock = r;
+ if (r != NULLRTEXTNO) {
+ *rtx = r;
return 0;
}
}
- /*
- * There weren't intervening bitmap blocks
- * with a long enough extent, or the
- * allocation didn't work for some reason
- * (i.e. it's a little * too short).
- * Try to allocate from the summary block
- * that we found.
- */
- error = xfs_rtallocate_extent_block(mp, tp,
- bbno + i, minlen, maxlen, len, &n, rbpp,
- rsb, prod, &r);
- if (error) {
- return error;
- }
- /*
- * If it works, return the extent.
- */
- if (r != NULLRTBLOCK) {
- *rtblock = r;
- return 0;
- }
}
}
/*
@@ -610,7 +599,7 @@ xfs_rtallocate_extent_near(
else
break;
}
- *rtblock = NULLRTBLOCK;
+ *rtx = NULLRTEXTNO;
return 0;
}
@@ -619,26 +608,25 @@ xfs_rtallocate_extent_near(
* specified. If we don't get maxlen then use prod to trim
* the length, if given. The lengths are all in rtextents.
*/
-STATIC int /* error */
+STATIC int
xfs_rtallocate_extent_size(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_extlen_t minlen, /* minimum length to allocate */
- xfs_extlen_t maxlen, /* maximum length to allocate */
- xfs_extlen_t *len, /* out: actual length allocated */
- struct xfs_buf **rbpp, /* in/out: summary block buffer */
- xfs_fsblock_t *rsb, /* in/out: summary block number */
- xfs_extlen_t prod, /* extent product factor */
- xfs_rtblock_t *rtblock) /* out: start block allocated */
+ struct xfs_rtalloc_args *args,
+ xfs_rtxlen_t minlen, /* minimum length to allocate */
+ xfs_rtxlen_t maxlen, /* maximum length to allocate */
+ xfs_rtxlen_t *len, /* out: actual length allocated */
+ xfs_rtxlen_t prod, /* extent product factor */
+ xfs_rtxnum_t *rtx) /* out: start rtext allocated */
{
- int error; /* error value */
- int i; /* bitmap block number */
- int l; /* level number (loop control) */
- xfs_rtblock_t n; /* next block to be tried */
- xfs_rtblock_t r; /* result block number */
- xfs_suminfo_t sum; /* summary information for extents */
-
- ASSERT(minlen % prod == 0 && maxlen % prod == 0);
+ struct xfs_mount *mp = args->mp;
+ int error;
+ xfs_fileoff_t i; /* bitmap block number */
+ int l; /* level number (loop control) */
+ xfs_rtxnum_t n; /* next rtext to be tried */
+ xfs_rtxnum_t r; /* result rtext number */
+ xfs_suminfo_t sum; /* summary information for extents */
+
+ ASSERT(minlen % prod == 0);
+ ASSERT(maxlen % prod == 0);
ASSERT(maxlen != 0);
/*
@@ -656,8 +644,7 @@ xfs_rtallocate_extent_size(
/*
* Get the summary for this level/block.
*/
- error = xfs_rtget_summary(mp, tp, l, i, rbpp, rsb,
- &sum);
+ error = xfs_rtget_summary(args, l, i, &sum);
if (error) {
return error;
}
@@ -669,16 +656,16 @@ xfs_rtallocate_extent_size(
/*
* Try allocating the extent.
*/
- error = xfs_rtallocate_extent_block(mp, tp, i, maxlen,
- maxlen, len, &n, rbpp, rsb, prod, &r);
+ error = xfs_rtallocate_extent_block(args, i, maxlen,
+ maxlen, len, &n, prod, &r);
if (error) {
return error;
}
/*
* If it worked, return that.
*/
- if (r != NULLRTBLOCK) {
- *rtblock = r;
+ if (r != NULLRTEXTNO) {
+ *rtx = r;
return 0;
}
/*
@@ -686,8 +673,8 @@ xfs_rtallocate_extent_size(
* allocator is beyond the next bitmap block,
* skip to that bitmap block.
*/
- if (XFS_BITTOBLOCK(mp, n) > i + 1)
- i = XFS_BITTOBLOCK(mp, n) - 1;
+ if (xfs_rtx_to_rbmblock(mp, n) > i + 1)
+ i = xfs_rtx_to_rbmblock(mp, n) - 1;
}
}
/*
@@ -695,7 +682,7 @@ xfs_rtallocate_extent_size(
* we're asking for a fixed size extent.
*/
if (minlen > --maxlen) {
- *rtblock = NULLRTBLOCK;
+ *rtx = NULLRTEXTNO;
return 0;
}
ASSERT(minlen != 0);
@@ -715,8 +702,7 @@ xfs_rtallocate_extent_size(
/*
* Get the summary information for this level/block.
*/
- error = xfs_rtget_summary(mp, tp, l, i, rbpp, rsb,
- &sum);
+ error = xfs_rtget_summary(args, l, i, &sum);
if (error) {
return error;
}
@@ -730,18 +716,18 @@ xfs_rtallocate_extent_size(
* minlen/maxlen are in the possible range for
* this summary level.
*/
- error = xfs_rtallocate_extent_block(mp, tp, i,
+ error = xfs_rtallocate_extent_block(args, i,
XFS_RTMAX(minlen, 1 << l),
XFS_RTMIN(maxlen, (1 << (l + 1)) - 1),
- len, &n, rbpp, rsb, prod, &r);
+ len, &n, prod, &r);
if (error) {
return error;
}
/*
* If it worked, return that extent.
*/
- if (r != NULLRTBLOCK) {
- *rtblock = r;
+ if (r != NULLRTEXTNO) {
+ *rtx = r;
return 0;
}
/*
@@ -749,14 +735,14 @@ xfs_rtallocate_extent_size(
* allocator is beyond the next bitmap block,
* skip to that bitmap block.
*/
- if (XFS_BITTOBLOCK(mp, n) > i + 1)
- i = XFS_BITTOBLOCK(mp, n) - 1;
+ if (xfs_rtx_to_rbmblock(mp, n) > i + 1)
+ i = xfs_rtx_to_rbmblock(mp, n) - 1;
}
}
/*
* Got nothing, return failure.
*/
- *rtblock = NULLRTBLOCK;
+ *rtx = NULLRTEXTNO;
return 0;
}
@@ -886,12 +872,14 @@ xfs_alloc_rsum_cache(
xfs_extlen_t rbmblocks) /* number of rt bitmap blocks */
{
/*
- * The rsum cache is initialized to all zeroes, which is trivially a
- * lower bound on the minimum level with any free extents. We can
- * continue without the cache if it couldn't be allocated.
+ * The rsum cache is initialized to the maximum value, which is
+ * trivially an upper bound on the maximum level with any free extents.
+ * We can continue without the cache if it couldn't be allocated.
*/
- mp->m_rsum_cache = kvzalloc(rbmblocks, GFP_KERNEL);
- if (!mp->m_rsum_cache)
+ mp->m_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL);
+ if (mp->m_rsum_cache)
+ memset(mp->m_rsum_cache, -1, rbmblocks);
+ else
xfs_warn(mp, "could not allocate realtime summary cache");
}
@@ -907,13 +895,13 @@ xfs_growfs_rt(
xfs_mount_t *mp, /* mount point for filesystem */
xfs_growfs_rt_t *in) /* growfs rt input struct */
{
- xfs_rtblock_t bmbno; /* bitmap block number */
+ xfs_fileoff_t bmbno; /* bitmap block number */
struct xfs_buf *bp; /* temporary buffer */
int error; /* error return value */
xfs_mount_t *nmp; /* new (fake) mount structure */
xfs_rfsblock_t nrblocks; /* new number of realtime blocks */
xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */
- xfs_rtblock_t nrextents; /* new number of realtime extents */
+ xfs_rtxnum_t nrextents; /* new number of realtime extents */
uint8_t nrextslog; /* new log2 of sb_rextents */
xfs_extlen_t nrsumblocks; /* new number of summary blocks */
uint nrsumlevels; /* new rt summary levels */
@@ -922,7 +910,6 @@ xfs_growfs_rt(
xfs_extlen_t rbmblocks; /* current number of rt bitmap blocks */
xfs_extlen_t rsumblocks; /* current number of rt summary blks */
xfs_sb_t *sbp; /* old superblock */
- xfs_fsblock_t sumbno; /* summary block number */
uint8_t *rsum_cache; /* old summary cache */
sbp = &mp->m_sb;
@@ -954,7 +941,7 @@ xfs_growfs_rt(
return -EINVAL;
/* Unsupported realtime features. */
- if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))
+ if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp))
return -EOPNOTSUPP;
nrblocks = in->newblocks;
@@ -976,11 +963,10 @@ xfs_growfs_rt(
*/
nrextents = nrblocks;
do_div(nrextents, in->extsize);
- nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize);
+ nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents);
nrextslog = xfs_highbit32(nrextents);
nrsumlevels = nrextslog + 1;
- nrsumsize = (uint)sizeof(xfs_suminfo_t) * nrsumlevels * nrbmblocks;
- nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
+ nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels, nrbmblocks);
nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
/*
* New summary size can't be more than half the size of
@@ -1023,6 +1009,12 @@ xfs_growfs_rt(
((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
bmbno < nrbmblocks;
bmbno++) {
+ struct xfs_rtalloc_args args = {
+ .mp = mp,
+ };
+ struct xfs_rtalloc_args nargs = {
+ .mp = nmp,
+ };
struct xfs_trans *tp;
xfs_rfsblock_t nrblocks_step;
@@ -1032,19 +1024,17 @@ xfs_growfs_rt(
* Calculate new sb and mount fields for this round.
*/
nsbp->sb_rextsize = in->extsize;
+ nmp->m_rtxblklog = -1; /* don't use shift or masking */
nsbp->sb_rbmblocks = bmbno + 1;
nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize *
nsbp->sb_rextsize;
nsbp->sb_rblocks = min(nrblocks, nrblocks_step);
- nsbp->sb_rextents = nsbp->sb_rblocks;
- do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
+ nsbp->sb_rextents = xfs_rtb_to_rtx(nmp, nsbp->sb_rblocks);
ASSERT(nsbp->sb_rextents != 0);
nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents);
nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1;
- nrsumsize =
- (uint)sizeof(xfs_suminfo_t) * nrsumlevels *
- nsbp->sb_rbmblocks;
- nrsumblocks = XFS_B_TO_FSB(mp, nrsumsize);
+ nrsumblocks = xfs_rtsummary_blockcount(mp, nrsumlevels,
+ nsbp->sb_rbmblocks);
nmp->m_rsumsize = nrsumsize = XFS_FSB_TO_B(mp, nrsumblocks);
/*
* Start a transaction, get the log reservation.
@@ -1053,6 +1043,9 @@ xfs_growfs_rt(
&tp);
if (error)
break;
+ args.tp = tp;
+ nargs.tp = tp;
+
/*
* Lock out other callers by grabbing the bitmap inode lock.
*/
@@ -1086,7 +1079,7 @@ xfs_growfs_rt(
*/
if (sbp->sb_rbmblocks != nsbp->sb_rbmblocks ||
mp->m_rsumlevels != nmp->m_rsumlevels) {
- error = xfs_rtcopy_summary(mp, nmp, tp);
+ error = xfs_rtcopy_summary(&args, &nargs);
if (error)
goto error_cancel;
}
@@ -1111,9 +1104,9 @@ xfs_growfs_rt(
/*
* Free new extent.
*/
- bp = NULL;
- error = xfs_rtfree_range(nmp, tp, sbp->sb_rextents,
- nsbp->sb_rextents - sbp->sb_rextents, &bp, &sumbno);
+ error = xfs_rtfree_range(&nargs, sbp->sb_rextents,
+ nsbp->sb_rextents - sbp->sb_rextents);
+ xfs_rtbuf_cache_relse(&nargs);
if (error) {
error_cancel:
xfs_trans_cancel(tp);
@@ -1171,59 +1164,60 @@ out_free:
* parameters. The length units are all in realtime extents, as is the
* result block number.
*/
-int /* error */
+int
xfs_rtallocate_extent(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number to allocate */
- xfs_extlen_t minlen, /* minimum length to allocate */
- xfs_extlen_t maxlen, /* maximum length to allocate */
- xfs_extlen_t *len, /* out: actual length allocated */
- int wasdel, /* was a delayed allocation extent */
- xfs_extlen_t prod, /* extent product factor */
- xfs_rtblock_t *rtblock) /* out: start block allocated */
+ struct xfs_trans *tp,
+ xfs_rtxnum_t start, /* starting rtext number to allocate */
+ xfs_rtxlen_t minlen, /* minimum length to allocate */
+ xfs_rtxlen_t maxlen, /* maximum length to allocate */
+ xfs_rtxlen_t *len, /* out: actual length allocated */
+ int wasdel, /* was a delayed allocation extent */
+ xfs_rtxlen_t prod, /* extent product factor */
+ xfs_rtxnum_t *rtblock) /* out: start rtext allocated */
{
- xfs_mount_t *mp = tp->t_mountp;
- int error; /* error value */
- xfs_rtblock_t r; /* result allocated block */
- xfs_fsblock_t sb; /* summary file block number */
- struct xfs_buf *sumbp; /* summary file block buffer */
-
- ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
+ struct xfs_rtalloc_args args = {
+ .mp = tp->t_mountp,
+ .tp = tp,
+ };
+ int error; /* error value */
+ xfs_rtxnum_t r; /* result allocated rtext */
+
+ ASSERT(xfs_isilocked(args.mp->m_rbmip, XFS_ILOCK_EXCL));
ASSERT(minlen > 0 && minlen <= maxlen);
/*
* If prod is set then figure out what to do to minlen and maxlen.
*/
if (prod > 1) {
- xfs_extlen_t i;
+ xfs_rtxlen_t i;
if ((i = maxlen % prod))
maxlen -= i;
if ((i = minlen % prod))
minlen += prod - i;
if (maxlen < minlen) {
- *rtblock = NULLRTBLOCK;
+ *rtblock = NULLRTEXTNO;
return 0;
}
}
retry:
- sumbp = NULL;
- if (bno == 0) {
- error = xfs_rtallocate_extent_size(mp, tp, minlen, maxlen, len,
- &sumbp, &sb, prod, &r);
+ if (start == 0) {
+ error = xfs_rtallocate_extent_size(&args, minlen,
+ maxlen, len, prod, &r);
} else {
- error = xfs_rtallocate_extent_near(mp, tp, bno, minlen, maxlen,
- len, &sumbp, &sb, prod, &r);
+ error = xfs_rtallocate_extent_near(&args, start, minlen,
+ maxlen, len, prod, &r);
}
+ xfs_rtbuf_cache_relse(&args);
if (error)
return error;
/*
* If it worked, update the superblock.
*/
- if (r != NULLRTBLOCK) {
+ if (r != NULLRTEXTNO) {
long slen = (long)*len;
ASSERT(*len >= minlen && *len <= maxlen);
@@ -1250,6 +1244,7 @@ xfs_rtmount_init(
struct xfs_buf *bp; /* buffer for last block of subvolume */
struct xfs_sb *sbp; /* filesystem superblock copy in mount */
xfs_daddr_t d; /* address of last block of subvolume */
+ unsigned int rsumblocks;
int error;
sbp = &mp->m_sb;
@@ -1261,10 +1256,9 @@ xfs_rtmount_init(
return -ENODEV;
}
mp->m_rsumlevels = sbp->sb_rextslog + 1;
- mp->m_rsumsize =
- (uint)sizeof(xfs_suminfo_t) * mp->m_rsumlevels *
- sbp->sb_rbmblocks;
- mp->m_rsumsize = roundup(mp->m_rsumsize, sbp->sb_blocksize);
+ rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels,
+ mp->m_sb.sb_rbmblocks);
+ mp->m_rsumsize = XFS_FSB_TO_B(mp, rsumblocks);
mp->m_rbmip = mp->m_rsumip = NULL;
/*
* Check that the realtime section is an ok size.
@@ -1418,27 +1412,27 @@ xfs_rtunmount_inodes(
* of rtextents and the fraction.
* The fraction sequence is 0, 1/2, 1/4, 3/4, 1/8, ..., 7/8, 1/16, ...
*/
-int /* error */
+int /* error */
xfs_rtpick_extent(
xfs_mount_t *mp, /* file system mount point */
xfs_trans_t *tp, /* transaction pointer */
- xfs_extlen_t len, /* allocation length (rtextents) */
- xfs_rtblock_t *pick) /* result rt extent */
- {
- xfs_rtblock_t b; /* result block */
+ xfs_rtxlen_t len, /* allocation length (rtextents) */
+ xfs_rtxnum_t *pick) /* result rt extent */
+{
+ xfs_rtxnum_t b; /* result rtext */
int log2; /* log of sequence number */
uint64_t resid; /* residual after log removed */
uint64_t seq; /* sequence number of file creation */
- struct timespec64 ts; /* temporary timespec64 storage */
+ struct timespec64 ts; /* timespec in inode */
ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
+ ts = inode_get_atime(VFS_I(mp->m_rbmip));
if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) {
mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
seq = 0;
} else {
- ts = inode_get_atime(VFS_I(mp->m_rbmip));
- seq = (uint64_t)ts.tv_sec;
+ seq = ts.tv_sec;
}
if ((log2 = xfs_highbit64(seq)) == -1)
b = 0;
@@ -1451,7 +1445,7 @@ xfs_rtpick_extent(
if (b + len > mp->m_sb.sb_rextents)
b = mp->m_sb.sb_rextents - len;
}
- ts.tv_sec = (time64_t)seq + 1;
+ ts.tv_sec = seq + 1;
inode_set_atime_to_ts(VFS_I(mp->m_rbmip), ts);
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
*pick = b;
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 62c7ad79cbb6..f7cb9ffe51ca 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -11,22 +11,6 @@
struct xfs_mount;
struct xfs_trans;
-/*
- * XXX: Most of the realtime allocation functions deal in units of realtime
- * extents, not realtime blocks. This looks funny when paired with the type
- * name and screams for a larger cleanup.
- */
-struct xfs_rtalloc_rec {
- xfs_rtblock_t ar_startext;
- xfs_rtblock_t ar_extcount;
-};
-
-typedef int (*xfs_rtalloc_query_range_fn)(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- const struct xfs_rtalloc_rec *rec,
- void *priv);
-
#ifdef CONFIG_XFS_RT
/*
* Function prototypes for exported functions.
@@ -40,23 +24,14 @@ typedef int (*xfs_rtalloc_query_range_fn)(
int /* error */
xfs_rtallocate_extent(
struct xfs_trans *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number to allocate */
- xfs_extlen_t minlen, /* minimum length to allocate */
- xfs_extlen_t maxlen, /* maximum length to allocate */
- xfs_extlen_t *len, /* out: actual length allocated */
+ xfs_rtxnum_t start, /* starting rtext number to allocate */
+ xfs_rtxlen_t minlen, /* minimum length to allocate */
+ xfs_rtxlen_t maxlen, /* maximum length to allocate */
+ xfs_rtxlen_t *len, /* out: actual length allocated */
int wasdel, /* was a delayed allocation extent */
- xfs_extlen_t prod, /* extent product factor */
- xfs_rtblock_t *rtblock); /* out: start block allocated */
+ xfs_rtxlen_t prod, /* extent product factor */
+ xfs_rtxnum_t *rtblock); /* out: start rtext allocated */
-/*
- * Free an extent in the realtime subvolume. Length is expressed in
- * realtime extents, as is the block number.
- */
-int /* error */
-xfs_rtfree_extent(
- struct xfs_trans *tp, /* transaction pointer */
- xfs_rtblock_t bno, /* starting block number to free */
- xfs_extlen_t len); /* length of extent freed */
/*
* Initialize realtime fields in the mount structure.
@@ -87,8 +62,8 @@ int /* error */
xfs_rtpick_extent(
struct xfs_mount *mp, /* file system mount point */
struct xfs_trans *tp, /* transaction pointer */
- xfs_extlen_t len, /* allocation length (rtextents) */
- xfs_rtblock_t *pick); /* result rt extent */
+ xfs_rtxlen_t len, /* allocation length (rtextents) */
+ xfs_rtxnum_t *pick); /* result rt extent */
/*
* Grow the realtime area of the filesystem.
@@ -98,55 +73,12 @@ xfs_growfs_rt(
struct xfs_mount *mp, /* file system mount structure */
xfs_growfs_rt_t *in); /* user supplied growfs struct */
-/*
- * From xfs_rtbitmap.c
- */
-int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t block, int issum, struct xfs_buf **bpp);
-int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len, int val,
- xfs_rtblock_t *new, int *stat);
-int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_rtblock_t limit,
- xfs_rtblock_t *rtblock);
-int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_rtblock_t limit,
- xfs_rtblock_t *rtblock);
-int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len, int val);
-int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
- int log, xfs_rtblock_t bbno, int delta,
- struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
- xfs_suminfo_t *sum);
-int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
- xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
- xfs_fsblock_t *rsb);
-int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len,
- struct xfs_buf **rbpp, xfs_fsblock_t *rsb);
-int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
- const struct xfs_rtalloc_rec *low_rec,
- const struct xfs_rtalloc_rec *high_rec,
- xfs_rtalloc_query_range_fn fn, void *priv);
-int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtalloc_query_range_fn fn,
- void *priv);
-bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
-int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtblock_t start, xfs_extlen_t len,
- bool *is_free);
int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
#else
-# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (ENOSYS)
-# define xfs_rtfree_extent(t,b,l) (ENOSYS)
-# define xfs_rtpick_extent(m,t,l,rb) (ENOSYS)
-# define xfs_growfs_rt(mp,in) (ENOSYS)
-# define xfs_rtalloc_query_range(t,l,h,f,p) (ENOSYS)
-# define xfs_rtalloc_query_all(m,t,f,p) (ENOSYS)
-# define xfs_rtbuf_get(m,t,b,i,p) (ENOSYS)
-# define xfs_verify_rtbno(m, r) (false)
-# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (ENOSYS)
-# define xfs_rtalloc_reinit_frextents(m) (0)
+# define xfs_rtallocate_extent(t,b,min,max,l,f,p,rb) (-ENOSYS)
+# define xfs_rtpick_extent(m,t,l,rb) (-ENOSYS)
+# define xfs_growfs_rt(mp,in) (-ENOSYS)
+# define xfs_rtalloc_reinit_frextents(m) (0)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
@@ -157,7 +89,7 @@ xfs_rtmount_init(
xfs_warn(mp, "Not built with CONFIG_XFS_RT");
return -ENOSYS;
}
-# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (ENOSYS))
+# define xfs_rtmount_inodes(m) (((mp)->m_sb.sb_rblocks == 0)? 0 : (-ENOSYS))
# define xfs_rtunmount_inodes(m)
#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f0ae07828153..764304595e8b 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -42,6 +42,7 @@
#include "xfs_xattr.h"
#include "xfs_iunlink_item.h"
#include "xfs_dahash_test.h"
+#include "xfs_rtbitmap.h"
#include "scrub/stats.h"
#include <linux/magic.h>
@@ -896,7 +897,7 @@ xfs_fs_statfs(
statp->f_blocks = sbp->sb_rblocks;
freertx = percpu_counter_sum_positive(&mp->m_frextents);
- statp->f_bavail = statp->f_bfree = freertx * sbp->sb_rextsize;
+ statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
}
return 0;
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8c0bfc9a33b1..305c9d07bf1b 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -24,6 +24,7 @@
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
#include "xfs_icache.h"
+#include "xfs_rtbitmap.h"
struct kmem_cache *xfs_trans_cache;
@@ -655,6 +656,10 @@ xfs_trans_unreserve_and_mod_sb(
mp->m_sb.sb_agcount += tp->t_agcount_delta;
mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
+ if (tp->t_rextsize_delta) {
+ mp->m_rtxblklog = log2_if_power2(mp->m_sb.sb_rextsize);
+ mp->m_rtxblkmask = mask64_if_power2(mp->m_sb.sb_rextsize);
+ }
mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
mp->m_sb.sb_rextents += tp->t_rextents_delta;
@@ -1196,7 +1201,7 @@ xfs_trans_alloc_inode(
retry:
error = xfs_trans_alloc(mp, resv, dblocks,
- rblocks / mp->m_sb.sb_rextsize,
+ xfs_extlen_to_rtxlen(mp, rblocks),
force ? XFS_TRANS_RESERVE : 0, &tp);
if (error)
return error;
diff --git a/include/dt-bindings/watchdog/aspeed-wdt.h b/include/dt-bindings/watchdog/aspeed-wdt.h
new file mode 100644
index 000000000000..7ae6d84b2bd9
--- /dev/null
+++ b/include/dt-bindings/watchdog/aspeed-wdt.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+
+#ifndef DT_BINDINGS_ASPEED_WDT_H
+#define DT_BINDINGS_ASPEED_WDT_H
+
+#define AST2500_WDT_RESET_CPU (1 << 0)
+#define AST2500_WDT_RESET_COPROC (1 << 1)
+#define AST2500_WDT_RESET_SDRAM (1 << 2)
+#define AST2500_WDT_RESET_AHB (1 << 3)
+#define AST2500_WDT_RESET_I2C (1 << 4)
+#define AST2500_WDT_RESET_MAC0 (1 << 5)
+#define AST2500_WDT_RESET_MAC1 (1 << 6)
+#define AST2500_WDT_RESET_GRAPHICS (1 << 7)
+#define AST2500_WDT_RESET_USB2_HOST_HUB (1 << 8)
+#define AST2500_WDT_RESET_USB_HOST (1 << 9)
+#define AST2500_WDT_RESET_HID_EHCI (1 << 10)
+#define AST2500_WDT_RESET_VIDEO (1 << 11)
+#define AST2500_WDT_RESET_HAC (1 << 12)
+#define AST2500_WDT_RESET_LPC (1 << 13)
+#define AST2500_WDT_RESET_SDIO (1 << 14)
+#define AST2500_WDT_RESET_MIC (1 << 15)
+#define AST2500_WDT_RESET_CRT (1 << 16)
+#define AST2500_WDT_RESET_PWM (1 << 17)
+#define AST2500_WDT_RESET_PECI (1 << 18)
+#define AST2500_WDT_RESET_JTAG (1 << 19)
+#define AST2500_WDT_RESET_ADC (1 << 20)
+#define AST2500_WDT_RESET_GPIO (1 << 21)
+#define AST2500_WDT_RESET_MCTP (1 << 22)
+#define AST2500_WDT_RESET_XDMA (1 << 23)
+#define AST2500_WDT_RESET_SPI (1 << 24)
+#define AST2500_WDT_RESET_SOC_MISC (1 << 25)
+
+#define AST2500_WDT_RESET_DEFAULT 0x023ffff3
+
+#define AST2600_WDT_RESET1_CPU (1 << 0)
+#define AST2600_WDT_RESET1_SDRAM (1 << 1)
+#define AST2600_WDT_RESET1_AHB (1 << 2)
+#define AST2600_WDT_RESET1_SLI (1 << 3)
+#define AST2600_WDT_RESET1_SOC_MISC0 (1 << 4)
+#define AST2600_WDT_RESET1_COPROC (1 << 5)
+#define AST2600_WDT_RESET1_USB_A (1 << 6)
+#define AST2600_WDT_RESET1_USB_B (1 << 7)
+#define AST2600_WDT_RESET1_UHCI (1 << 8)
+#define AST2600_WDT_RESET1_GRAPHICS (1 << 9)
+#define AST2600_WDT_RESET1_CRT (1 << 10)
+#define AST2600_WDT_RESET1_VIDEO (1 << 11)
+#define AST2600_WDT_RESET1_HAC (1 << 12)
+#define AST2600_WDT_RESET1_DP (1 << 13)
+#define AST2600_WDT_RESET1_DP_MCU (1 << 14)
+#define AST2600_WDT_RESET1_GP_MCU (1 << 15)
+#define AST2600_WDT_RESET1_MAC0 (1 << 16)
+#define AST2600_WDT_RESET1_MAC1 (1 << 17)
+#define AST2600_WDT_RESET1_SDIO0 (1 << 18)
+#define AST2600_WDT_RESET1_JTAG0 (1 << 19)
+#define AST2600_WDT_RESET1_MCTP0 (1 << 20)
+#define AST2600_WDT_RESET1_MCTP1 (1 << 21)
+#define AST2600_WDT_RESET1_XDMA0 (1 << 22)
+#define AST2600_WDT_RESET1_XDMA1 (1 << 23)
+#define AST2600_WDT_RESET1_GPIO0 (1 << 24)
+#define AST2600_WDT_RESET1_RVAS (1 << 25)
+
+#define AST2600_WDT_RESET1_DEFAULT 0x030f1ff1
+
+#define AST2600_WDT_RESET2_CPU (1 << 0)
+#define AST2600_WDT_RESET2_SPI (1 << 1)
+#define AST2600_WDT_RESET2_AHB2 (1 << 2)
+#define AST2600_WDT_RESET2_SLI2 (1 << 3)
+#define AST2600_WDT_RESET2_SOC_MISC1 (1 << 4)
+#define AST2600_WDT_RESET2_MAC2 (1 << 5)
+#define AST2600_WDT_RESET2_MAC3 (1 << 6)
+#define AST2600_WDT_RESET2_SDIO1 (1 << 7)
+#define AST2600_WDT_RESET2_JTAG1 (1 << 8)
+#define AST2600_WDT_RESET2_GPIO1 (1 << 9)
+#define AST2600_WDT_RESET2_MDIO (1 << 10)
+#define AST2600_WDT_RESET2_LPC (1 << 11)
+#define AST2600_WDT_RESET2_PECI (1 << 12)
+#define AST2600_WDT_RESET2_PWM (1 << 13)
+#define AST2600_WDT_RESET2_ADC (1 << 14)
+#define AST2600_WDT_RESET2_FSI (1 << 15)
+#define AST2600_WDT_RESET2_I2C (1 << 16)
+#define AST2600_WDT_RESET2_I3C_GLOBAL (1 << 17)
+#define AST2600_WDT_RESET2_I3C0 (1 << 18)
+#define AST2600_WDT_RESET2_I3C1 (1 << 19)
+#define AST2600_WDT_RESET2_I3C2 (1 << 20)
+#define AST2600_WDT_RESET2_I3C3 (1 << 21)
+#define AST2600_WDT_RESET2_I3C4 (1 << 22)
+#define AST2600_WDT_RESET2_I3C5 (1 << 23)
+#define AST2600_WDT_RESET2_ESPI (1 << 26)
+
+#define AST2600_WDT_RESET2_DEFAULT 0x03fffff1
+
+#endif
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 99a5201d9e62..dc7ed2f46886 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -33,126 +33,6 @@ struct pci_dev;
extern int amd_iommu_detect(void);
-/**
- * amd_iommu_init_device() - Init device for use with IOMMUv2 driver
- * @pdev: The PCI device to initialize
- * @pasids: Number of PASIDs to support for this device
- *
- * This function does all setup for the device pdev so that it can be
- * used with IOMMUv2.
- * Returns 0 on success or negative value on error.
- */
-extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
-
-/**
- * amd_iommu_free_device() - Free all IOMMUv2 related device resources
- * and disable IOMMUv2 usage for this device
- * @pdev: The PCI device to disable IOMMUv2 usage for'
- */
-extern void amd_iommu_free_device(struct pci_dev *pdev);
-
-/**
- * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
- * @pdev: The PCI device to bind the task to
- * @pasid: The PASID on the device the task should be bound to
- * @task: the task to bind
- *
- * The function returns 0 on success or a negative value on error.
- */
-extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
- struct task_struct *task);
-
-/**
- * amd_iommu_unbind_pasid() - Unbind a PASID from its task on
- * a device
- * @pdev: The device of the PASID
- * @pasid: The PASID to unbind
- *
- * When this function returns the device is no longer using the PASID
- * and the PASID is no longer bound to its task.
- */
-extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid);
-
-/**
- * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
- * PRI requests
- * @pdev: The PCI device the call-back should be registered for
- * @cb: The call-back function
- *
- * The IOMMUv2 driver invokes this call-back when it is unable to
- * successfully handle a PRI request. The device driver can then decide
- * which PRI response the device should see. Possible return values for
- * the call-back are:
- *
- * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
- * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
- * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device,
- * the device is required to disable
- * PRI when it receives this response
- *
- * The function returns 0 on success or negative value on error.
- */
-#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0
-#define AMD_IOMMU_INV_PRI_RSP_INVALID 1
-#define AMD_IOMMU_INV_PRI_RSP_FAIL 2
-
-typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
- u32 pasid,
- unsigned long address,
- u16);
-
-extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
- amd_iommu_invalid_ppr_cb cb);
-
-#define PPR_FAULT_EXEC (1 << 1)
-#define PPR_FAULT_READ (1 << 2)
-#define PPR_FAULT_WRITE (1 << 5)
-#define PPR_FAULT_USER (1 << 6)
-#define PPR_FAULT_RSVD (1 << 7)
-#define PPR_FAULT_GN (1 << 8)
-
-/**
- * amd_iommu_device_info() - Get information about IOMMUv2 support of a
- * PCI device
- * @pdev: PCI device to query information from
- * @info: A pointer to an amd_iommu_device_info structure which will contain
- * the information about the PCI device
- *
- * Returns 0 on success, negative value on error
- */
-
-#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */
-#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */
-#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */
-#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution
- on memory pages */
-#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request
- super-user privileges */
-
-struct amd_iommu_device_info {
- int max_pasids;
- u32 flags;
-};
-
-extern int amd_iommu_device_info(struct pci_dev *pdev,
- struct amd_iommu_device_info *info);
-
-/**
- * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating
- * a pasid context. This call-back is
- * invoked when the IOMMUv2 driver needs to
- * invalidate a PASID context, for example
- * because the task that is bound to that
- * context is about to exit.
- *
- * @pdev: The PCI device the call-back should be registered for
- * @cb: The call-back function
- */
-
-typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid);
-
-extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
- amd_iommu_invalidate_ctx cb);
#else /* CONFIG_AMD_IOMMU */
static inline int amd_iommu_detect(void) { return -ENODEV; }
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 8fb1b41b4d15..ec289c1016f5 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -66,6 +66,7 @@ struct iommu_domain_geometry {
#define __IOMMU_DOMAIN_DMA_FQ (1U << 3) /* DMA-API uses flush queue */
#define __IOMMU_DOMAIN_SVA (1U << 4) /* Shared process address space */
+#define __IOMMU_DOMAIN_PLATFORM (1U << 5)
#define __IOMMU_DOMAIN_NESTED (1U << 6) /* User-managed address space nested
on a stage-2 translation */
@@ -86,6 +87,8 @@ struct iommu_domain_geometry {
* invalidation.
* IOMMU_DOMAIN_SVA - DMA addresses are shared process addresses
* represented by mm_struct's.
+ * IOMMU_DOMAIN_PLATFORM - Legacy domain for drivers that do their own
+ * dma_api stuff. Do not use in new drivers.
*/
#define IOMMU_DOMAIN_BLOCKED (0U)
#define IOMMU_DOMAIN_IDENTITY (__IOMMU_DOMAIN_PT)
@@ -96,6 +99,7 @@ struct iommu_domain_geometry {
__IOMMU_DOMAIN_DMA_API | \
__IOMMU_DOMAIN_DMA_FQ)
#define IOMMU_DOMAIN_SVA (__IOMMU_DOMAIN_SVA)
+#define IOMMU_DOMAIN_PLATFORM (__IOMMU_DOMAIN_PLATFORM)
#define IOMMU_DOMAIN_NESTED (__IOMMU_DOMAIN_NESTED)
struct iommu_domain {
@@ -340,13 +344,12 @@ static inline int __iommu_copy_struct_from_user(
* NULL while the @user_data can be optionally provided, the
* new domain must support __IOMMU_DOMAIN_PAGING.
* Upon failure, ERR_PTR must be returned.
+ * @domain_alloc_paging: Allocate an iommu_domain that can be used for
+ * UNMANAGED, DMA, and DMA_FQ domain types.
* @probe_device: Add device to iommu driver handling
* @release_device: Remove device from iommu driver handling
* @probe_finalize: Do final setup work after the device is added to an IOMMU
* group and attached to the groups domain
- * @set_platform_dma_ops: Returning control back to the platform DMA ops. This op
- * is to support old IOMMU drivers, new drivers should use
- * default domains, and the common IOMMU DMA ops.
* @device_group: find iommu group for a particular device
* @get_resv_regions: Request list of reserved regions for a device
* @of_xlate: add OF master IDs to iommu grouping
@@ -365,6 +368,13 @@ static inline int __iommu_copy_struct_from_user(
* will be blocked by the hardware.
* @pgsize_bitmap: bitmap of all possible supported page sizes
* @owner: Driver module providing these ops
+ * @identity_domain: An always available, always attachable identity
+ * translation.
+ * @blocked_domain: An always available, always attachable blocking
+ * translation.
+ * @default_domain: If not NULL this will always be set as the default domain.
+ * This should be an IDENTITY/BLOCKED/PLATFORM domain.
+ * Do not use in new drivers.
*/
struct iommu_ops {
bool (*capable)(struct device *dev, enum iommu_cap);
@@ -375,11 +385,11 @@ struct iommu_ops {
struct iommu_domain *(*domain_alloc_user)(
struct device *dev, u32 flags, struct iommu_domain *parent,
const struct iommu_user_data *user_data);
+ struct iommu_domain *(*domain_alloc_paging)(struct device *dev);
struct iommu_device *(*probe_device)(struct device *dev);
void (*release_device)(struct device *dev);
void (*probe_finalize)(struct device *dev);
- void (*set_platform_dma_ops)(struct device *dev);
struct iommu_group *(*device_group)(struct device *dev);
/* Request/Free a list of reserved regions for a device */
@@ -402,6 +412,9 @@ struct iommu_ops {
const struct iommu_domain_ops *default_domain_ops;
unsigned long pgsize_bitmap;
struct module *owner;
+ struct iommu_domain *identity_domain;
+ struct iommu_domain *blocked_domain;
+ struct iommu_domain *default_domain;
};
/**
@@ -420,10 +433,8 @@ struct iommu_ops {
* * ENODEV - device specific errors, not able to be attached
* * <others> - treated as ENODEV by the caller. Use is discouraged
* @set_dev_pasid: set an iommu domain to a pasid of device
- * @map: map a physically contiguous memory region to an iommu domain
* @map_pages: map a physically contiguous set of pages of the same size to
* an iommu domain.
- * @unmap: unmap a physically contiguous memory region from an iommu domain
* @unmap_pages: unmap a number of pages of the same size from an iommu domain
* @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain
* @iotlb_sync_map: Sync mappings created recently using @map to the hardware
@@ -442,20 +453,16 @@ struct iommu_domain_ops {
int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
ioasid_t pasid);
- int (*map)(struct iommu_domain *domain, unsigned long iova,
- phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
int (*map_pages)(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t pgsize, size_t pgcount,
int prot, gfp_t gfp, size_t *mapped);
- size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
- size_t size, struct iommu_iotlb_gather *iotlb_gather);
size_t (*unmap_pages)(struct iommu_domain *domain, unsigned long iova,
size_t pgsize, size_t pgcount,
struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
- void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
- size_t size);
+ int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
void (*iotlb_sync)(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather);
@@ -476,6 +483,7 @@ struct iommu_domain_ops {
* @list: Used by the iommu-core to keep a list of registered iommus
* @ops: iommu-ops for talking to this iommu
* @dev: struct device for sysfs handling
+ * @singleton_group: Used internally for drivers that have only one group
* @max_pasids: number of supported PASIDs
*/
struct iommu_device {
@@ -483,6 +491,7 @@ struct iommu_device {
const struct iommu_ops *ops;
struct fwnode_handle *fwnode;
struct device *dev;
+ struct iommu_group *singleton_group;
u32 max_pasids;
};
@@ -526,6 +535,7 @@ struct iommu_fault_param {
* @attach_deferred: the dma domain attachment is deferred
* @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs
* @require_direct: device requires IOMMU_RESV_DIRECT regions
+ * @shadow_on_flush: IOTLB flushes are used to sync shadow tables
*
* TODO: migrate other per device data pointers under iommu_dev_data, e.g.
* struct iommu_group *iommu_group;
@@ -541,6 +551,7 @@ struct dev_iommu {
u32 attach_deferred:1;
u32 pci_32bit_workaround:1;
u32 require_direct:1;
+ u32 shadow_on_flush:1;
};
int iommu_device_register(struct iommu_device *iommu,
@@ -768,6 +779,7 @@ extern struct iommu_group *pci_device_group(struct device *dev);
extern struct iommu_group *generic_device_group(struct device *dev);
/* FSL-MC device grouping function */
struct iommu_group *fsl_mc_device_group(struct device *dev);
+extern struct iommu_group *generic_single_device_group(struct device *dev);
/**
* struct iommu_fwspec - per-device IOMMU instance data
@@ -1253,7 +1265,7 @@ static inline void iommu_free_global_pasid(ioasid_t pasid) {}
* Creates a mapping at @iova for the buffer described by a scatterlist
* stored in the given sg_table object in the provided IOMMU domain.
*/
-static inline size_t iommu_map_sgtable(struct iommu_domain *domain,
+static inline ssize_t iommu_map_sgtable(struct iommu_domain *domain,
unsigned long iova, struct sg_table *sgt, int prot)
{
return iommu_map_sg(domain, iova, sgt->sgl, sgt->orig_nents, prot,
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index cd628c4b011e..cd797e00fe35 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -239,6 +239,7 @@ struct nfs_server {
struct list_head delegations;
struct list_head ss_copies;
+ unsigned long delegation_gen;
unsigned long mig_gen;
unsigned long mig_status;
#define NFS_MIG_IN_TRANSITION (1)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 12bbb5c63664..539b57fbf3ce 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1772,7 +1772,7 @@ struct nfs_rpc_ops {
void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *);
int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir);
int (*link) (struct inode *, struct inode *, const struct qstr *);
- int (*symlink) (struct inode *, struct dentry *, struct page *,
+ int (*symlink) (struct inode *, struct dentry *, struct folio *,
unsigned int, struct iattr *);
int (*mkdir) (struct inode *, struct dentry *, struct iattr *);
int (*rmdir) (struct inode *, const struct qstr *);
diff --git a/include/linux/pwm.h b/include/linux/pwm.h
index d2f9f690a9c1..e3b437587b32 100644
--- a/include/linux/pwm.h
+++ b/include/linux/pwm.h
@@ -71,7 +71,6 @@ struct pwm_state {
* @hwpwm: per-chip relative index of the PWM device
* @pwm: global index of the PWM device
* @chip: PWM chip providing this PWM device
- * @chip_data: chip-private data associated with the PWM device
* @args: PWM arguments
* @state: last applied state
* @last: last implemented state (for PWM_DEBUG)
@@ -82,7 +81,6 @@ struct pwm_device {
unsigned int hwpwm;
unsigned int pwm;
struct pwm_chip *chip;
- void *chip_data;
struct pwm_args args;
struct pwm_state state;
@@ -267,7 +265,6 @@ struct pwm_capture {
* @get_state: get the current PWM state. This function is only
* called once per PWM device when the PWM chip is
* registered.
- * @owner: helps prevent removal of modules exporting active PWMs
*/
struct pwm_ops {
int (*request)(struct pwm_chip *chip, struct pwm_device *pwm);
@@ -278,13 +275,13 @@ struct pwm_ops {
const struct pwm_state *state);
int (*get_state)(struct pwm_chip *chip, struct pwm_device *pwm,
struct pwm_state *state);
- struct module *owner;
};
/**
* struct pwm_chip - abstract a PWM controller
* @dev: device providing the PWMs
* @ops: callbacks for this PWM controller
+ * @owner: module providing this chip
* @base: number of first PWM controlled by this chip
* @npwm: number of PWMs controlled by this chip
* @of_xlate: request a PWM device given a device tree PWM specifier
@@ -295,6 +292,7 @@ struct pwm_ops {
struct pwm_chip {
struct device *dev;
const struct pwm_ops *ops;
+ struct module *owner;
int base;
unsigned int npwm;
@@ -383,13 +381,13 @@ static inline void pwm_disable(struct pwm_device *pwm)
/* PWM provider APIs */
int pwm_capture(struct pwm_device *pwm, struct pwm_capture *result,
unsigned long timeout);
-int pwm_set_chip_data(struct pwm_device *pwm, void *data);
-void *pwm_get_chip_data(struct pwm_device *pwm);
-int pwmchip_add(struct pwm_chip *chip);
+int __pwmchip_add(struct pwm_chip *chip, struct module *owner);
+#define pwmchip_add(chip) __pwmchip_add(chip, THIS_MODULE)
void pwmchip_remove(struct pwm_chip *chip);
-int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip);
+int __devm_pwmchip_add(struct device *dev, struct pwm_chip *chip, struct module *owner);
+#define devm_pwmchip_add(dev, chip) __devm_pwmchip_add(dev, chip, THIS_MODULE)
struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
unsigned int index,
@@ -445,16 +443,6 @@ static inline int pwm_capture(struct pwm_device *pwm,
return -EINVAL;
}
-static inline int pwm_set_chip_data(struct pwm_device *pwm, void *data)
-{
- return -EINVAL;
-}
-
-static inline void *pwm_get_chip_data(struct pwm_device *pwm)
-{
- return NULL;
-}
-
static inline int pwmchip_add(struct pwm_chip *chip)
{
return -EINVAL;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index af7358277f1c..e9d4377d03c6 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -92,6 +92,7 @@ struct rpc_clnt {
};
const struct cred *cl_cred;
unsigned int cl_max_connect; /* max number of transports not to the same IP */
+ struct super_block *pipefs_sb;
};
/*
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index a5ef84944a06..71ae37d3bedd 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -96,7 +96,6 @@ struct tegra_smmu_soc {
struct tegra_mc;
struct tegra_smmu;
-struct gart_device;
#ifdef CONFIG_TEGRA_IOMMU_SMMU
struct tegra_smmu *tegra_smmu_probe(struct device *dev,
@@ -116,28 +115,6 @@ static inline void tegra_smmu_remove(struct tegra_smmu *smmu)
}
#endif
-#ifdef CONFIG_TEGRA_IOMMU_GART
-struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc);
-int tegra_gart_suspend(struct gart_device *gart);
-int tegra_gart_resume(struct gart_device *gart);
-#else
-static inline struct gart_device *
-tegra_gart_probe(struct device *dev, struct tegra_mc *mc)
-{
- return ERR_PTR(-ENODEV);
-}
-
-static inline int tegra_gart_suspend(struct gart_device *gart)
-{
- return -ENODEV;
-}
-
-static inline int tegra_gart_resume(struct gart_device *gart)
-{
- return -ENODEV;
-}
-#endif
-
struct tegra_mc_reset {
const char *name;
unsigned long id;
@@ -185,8 +162,6 @@ struct tegra_mc_ops {
*/
int (*probe)(struct tegra_mc *mc);
void (*remove)(struct tegra_mc *mc);
- int (*suspend)(struct tegra_mc *mc);
- int (*resume)(struct tegra_mc *mc);
irqreturn_t (*handle_irq)(int irq, void *data);
int (*probe_device)(struct tegra_mc *mc, struct device *dev);
};
@@ -225,7 +200,6 @@ struct tegra_mc {
struct tegra_bpmp *bpmp;
struct device *dev;
struct tegra_smmu *smmu;
- struct gart_device *gart;
void __iomem *regs;
void __iomem *bcast_ch_regs;
void __iomem **ch_regs;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 621037a0aa87..ce1bb2301c06 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -1006,6 +1006,9 @@ void kgdb_panic(const char *msg)
if (panic_timeout)
return;
+ debug_locks_off();
+ console_flush_on_panic(CONSOLE_FLUSH_PENDING);
+
if (dbg_kdb_mode)
kdb_printf("PANIC: %s\n", msg);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 438b868cbfa9..6b213c8252d6 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -272,11 +272,10 @@ char *kdbgetenv(const char *match)
* kdballocenv - This function is used to allocate bytes for
* environment entries.
* Parameters:
- * match A character string representing a numeric value
- * Outputs:
- * *value the unsigned long representation of the env variable 'match'
+ * bytes The number of bytes to allocate in the static buffer.
* Returns:
- * Zero on success, a kdb diagnostic on failure.
+ * A pointer to the allocated space in the buffer on success.
+ * NULL if bytes > size available in the envbuffer.
* Remarks:
* We use a static environment buffer (envbuffer) to hold the values
* of dynamically generated environment variables (see kdb_set). Buffer
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index 0d866eaa4cc8..b531c33e9545 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -500,6 +500,7 @@ static inline void rcu_expedite_gp(void) { }
static inline void rcu_unexpedite_gp(void) { }
static inline void rcu_async_hurry(void) { }
static inline void rcu_async_relax(void) { }
+static inline bool rcu_cpu_online(int cpu) { return true; }
#else /* #ifdef CONFIG_TINY_RCU */
bool rcu_gp_is_normal(void); /* Internal RCU use. */
bool rcu_gp_is_expedited(void); /* Internal RCU use. */
@@ -509,6 +510,7 @@ void rcu_unexpedite_gp(void);
void rcu_async_hurry(void);
void rcu_async_relax(void);
void rcupdate_announce_bootup_oddness(void);
+bool rcu_cpu_online(int cpu);
#ifdef CONFIG_TASKS_RCU_GENERIC
void show_rcu_tasks_gp_kthreads(void);
#else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 1fa631168594..f54d5782eca0 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -895,10 +895,36 @@ static void rcu_tasks_pregp_step(struct list_head *hop)
synchronize_rcu();
}
+/* Check for quiescent states since the pregp's synchronize_rcu() */
+static bool rcu_tasks_is_holdout(struct task_struct *t)
+{
+ int cpu;
+
+ /* Has the task been seen voluntarily sleeping? */
+ if (!READ_ONCE(t->on_rq))
+ return false;
+
+ /*
+ * Idle tasks (or idle injection) within the idle loop are RCU-tasks
+ * quiescent states. But CPU boot code performed by the idle task
+ * isn't a quiescent state.
+ */
+ if (is_idle_task(t))
+ return false;
+
+ cpu = task_cpu(t);
+
+ /* Idle tasks on offline CPUs are RCU-tasks quiescent states. */
+ if (t == idle_task(cpu) && !rcu_cpu_online(cpu))
+ return false;
+
+ return true;
+}
+
/* Per-task initial processing. */
static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop)
{
- if (t != current && READ_ONCE(t->on_rq) && !is_idle_task(t)) {
+ if (t != current && rcu_tasks_is_holdout(t)) {
get_task_struct(t);
t->rcu_tasks_nvcsw = READ_ONCE(t->nvcsw);
WRITE_ONCE(t->rcu_tasks_holdout, true);
@@ -947,7 +973,7 @@ static void check_holdout_task(struct task_struct *t,
if (!READ_ONCE(t->rcu_tasks_holdout) ||
t->rcu_tasks_nvcsw != READ_ONCE(t->nvcsw) ||
- !READ_ONCE(t->on_rq) ||
+ !rcu_tasks_is_holdout(t) ||
(IS_ENABLED(CONFIG_NO_HZ_FULL) &&
!is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
WRITE_ONCE(t->rcu_tasks_holdout, false);
@@ -1525,7 +1551,7 @@ static int trc_inspect_reader(struct task_struct *t, void *bhp_in)
} else {
// The task is not running, so C-language access is safe.
nesting = t->trc_reader_nesting;
- WARN_ON_ONCE(ofl && task_curr(t) && !is_idle_task(t));
+ WARN_ON_ONCE(ofl && task_curr(t) && (t != idle_task(task_cpu(t))));
if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && ofl)
n_heavy_reader_ofl_updates++;
}
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d3a97e129020..3ac3c846105f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -755,14 +755,19 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
}
/*
- * Return true if the specified CPU has passed through a quiescent
- * state by virtue of being in or having passed through an dynticks
- * idle state since the last call to dyntick_save_progress_counter()
- * for this same CPU, or by virtue of having been offline.
+ * Returns positive if the specified CPU has passed through a quiescent state
+ * by virtue of being in or having passed through an dynticks idle state since
+ * the last call to dyntick_save_progress_counter() for this same CPU, or by
+ * virtue of having been offline.
+ *
+ * Returns negative if the specified CPU needs a force resched.
+ *
+ * Returns zero otherwise.
*/
static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
{
unsigned long jtsq;
+ int ret = 0;
struct rcu_node *rnp = rdp->mynode;
/*
@@ -848,8 +853,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
(time_after(jiffies, READ_ONCE(rdp->last_fqs_resched) + jtsq * 3) ||
rcu_state.cbovld)) {
WRITE_ONCE(rdp->rcu_urgent_qs, true);
- resched_cpu(rdp->cpu);
WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+ ret = -1;
}
/*
@@ -862,8 +867,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
if (time_after(jiffies, rcu_state.jiffies_resched)) {
if (time_after(jiffies,
READ_ONCE(rdp->last_fqs_resched) + jtsq)) {
- resched_cpu(rdp->cpu);
WRITE_ONCE(rdp->last_fqs_resched, jiffies);
+ ret = -1;
}
if (IS_ENABLED(CONFIG_IRQ_WORK) &&
!rdp->rcu_iw_pending && rdp->rcu_iw_gp_seq != rnp->gp_seq &&
@@ -892,7 +897,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
}
}
- return 0;
+ return ret;
}
/* Trace-event wrapper function for trace_rcu_future_grace_period. */
@@ -2271,15 +2276,15 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
{
int cpu;
unsigned long flags;
- unsigned long mask;
- struct rcu_data *rdp;
struct rcu_node *rnp;
rcu_state.cbovld = rcu_state.cbovldnext;
rcu_state.cbovldnext = false;
rcu_for_each_leaf_node(rnp) {
+ unsigned long mask = 0;
+ unsigned long rsmask = 0;
+
cond_resched_tasks_rcu_qs();
- mask = 0;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
rcu_state.cbovldnext |= !!rnp->cbovldmask;
if (rnp->qsmask == 0) {
@@ -2297,11 +2302,17 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
continue;
}
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->qsmask) {
+ struct rcu_data *rdp;
+ int ret;
+
rdp = per_cpu_ptr(&rcu_data, cpu);
- if (f(rdp)) {
+ ret = f(rdp);
+ if (ret > 0) {
mask |= rdp->grpmask;
rcu_disable_urgency_upon_qs(rdp);
}
+ if (ret < 0)
+ rsmask |= rdp->grpmask;
}
if (mask != 0) {
/* Idle/offline CPUs, report (releases rnp->lock). */
@@ -2310,6 +2321,9 @@ static void force_qs_rnp(int (*f)(struct rcu_data *rdp))
/* Nothing to do here, so just drop the lock. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
+
+ for_each_leaf_node_cpu_mask(rnp, cpu, rsmask)
+ resched_cpu(cpu);
}
}
@@ -4195,6 +4209,13 @@ static bool rcu_rdp_cpu_online(struct rcu_data *rdp)
return !!(rdp->grpmask & rcu_rnp_online_cpus(rdp->mynode));
}
+bool rcu_cpu_online(int cpu)
+{
+ struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+
+ return rcu_rdp_cpu_online(rdp);
+}
+
#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
/*
diff --git a/mm/memblock.c b/mm/memblock.c
index fd492e5bbdbc..5a88d6d24d79 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -424,7 +424,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,
* of memory that aren't suitable for allocation
*/
if (!memblock_can_resize)
- return -1;
+ panic("memblock: cannot resize %s array\n", type->name);
/* Calculate new doubled size */
old_size = type->max * sizeof(struct memblock_region);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9c210273d06b..daa9582ec861 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -111,7 +111,8 @@ static void rpc_clnt_remove_pipedir(struct rpc_clnt *clnt)
pipefs_sb = rpc_get_sb_net(net);
if (pipefs_sb) {
- __rpc_clnt_remove_pipedir(clnt);
+ if (pipefs_sb == clnt->pipefs_sb)
+ __rpc_clnt_remove_pipedir(clnt);
rpc_put_sb_net(net);
}
}
@@ -151,6 +152,8 @@ rpc_setup_pipedir(struct super_block *pipefs_sb, struct rpc_clnt *clnt)
{
struct dentry *dentry;
+ clnt->pipefs_sb = pipefs_sb;
+
if (clnt->cl_program->pipe_dir_name != NULL) {
dentry = rpc_setup_pipedir_sb(pipefs_sb, clnt);
if (IS_ERR(dentry))
@@ -2171,6 +2174,7 @@ call_connect_status(struct rpc_task *task)
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
+ case -ECONNRESET:
/* A positive refusal suggests a rebind is needed. */
if (RPC_IS_SOFTCONN(task))
break;
@@ -2179,7 +2183,6 @@ call_connect_status(struct rpc_task *task)
goto out_retry;
}
fallthrough;
- case -ECONNRESET:
case -ECONNABORTED:
case -ENETDOWN:
case -ENETUNREACH:
@@ -2220,7 +2223,7 @@ call_connect_status(struct rpc_task *task)
}
xprt_switch_put(xps);
if (!task->tk_xprt)
- return;
+ goto out;
}
goto out_retry;
case -ENOBUFS:
@@ -2235,6 +2238,7 @@ out_next:
out_retry:
/* Check for timeouts before looping back to call_bind */
task->tk_action = call_bind;
+out:
rpc_check_timeout(task);
}
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 5988a5c5ff3f..102c3818bc54 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -769,6 +769,10 @@ void rpcb_getport_async(struct rpc_task *task)
child = rpcb_call_async(rpcb_clnt, map, proc);
rpc_release_client(rpcb_clnt);
+ if (IS_ERR(child)) {
+ /* rpcb_map_release() has freed the arguments */
+ return;
+ }
xprt->stat.bind_count++;
rpc_put_task(child);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ab453ede54f0..2364c485540c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -283,7 +283,7 @@ out_unlock:
xprt_clear_locked(xprt);
out_sleep:
task->tk_status = -EAGAIN;
- if (RPC_IS_SOFT(task))
+ if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
xprt_request_timeout(req));
else
@@ -349,7 +349,7 @@ out_unlock:
xprt_clear_locked(xprt);
out_sleep:
task->tk_status = -EAGAIN;
- if (RPC_IS_SOFT(task))
+ if (RPC_IS_SOFT(task) || RPC_IS_SOFTCONN(task))
rpc_sleep_on_timeout(&xprt->sending, task, NULL,
xprt_request_timeout(req));
else
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a15bf2ede89b..58f3dc8d0d71 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1181,6 +1181,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ transport->xprt_err = 0;
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
@@ -2772,18 +2773,13 @@ static void xs_wake_error(struct sock_xprt *transport)
{
int sockerr;
- if (!test_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
- return;
- mutex_lock(&transport->recv_mutex);
- if (transport->sock == NULL)
- goto out;
if (!test_and_clear_bit(XPRT_SOCK_WAKE_ERROR, &transport->sock_state))
- goto out;
+ return;
sockerr = xchg(&transport->xprt_err, 0);
- if (sockerr < 0)
+ if (sockerr < 0) {
xprt_wake_pending_tasks(&transport->xprt, sockerr);
-out:
- mutex_unlock(&transport->recv_mutex);
+ xs_tcp_force_close(&transport->xprt);
+ }
}
static void xs_wake_pending(struct sock_xprt *transport)
diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile
index ebdbb3c22e54..f224b84591fb 100644
--- a/tools/testing/selftests/riscv/hwprobe/Makefile
+++ b/tools/testing/selftests/riscv/hwprobe/Makefile
@@ -2,9 +2,14 @@
# Copyright (C) 2021 ARM Limited
# Originally tools/testing/arm64/abi/Makefile
-TEST_GEN_PROGS := hwprobe
+CFLAGS += -I$(top_srcdir)/tools/include
+
+TEST_GEN_PROGS := hwprobe cbo
include ../../lib.mk
$(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S
- $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/cbo: cbo.c sys_hwprobe.S
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
new file mode 100644
index 000000000000..50a2cc8aef38
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ *
+ * Run with 'taskset -c <cpu-list> cbo' to only execute hwprobe on a
+ * subset of cpus, as well as only executing the tests on those cpus.
+ */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <assert.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <asm/ucontext.h>
+
+#include "hwprobe.h"
+#include "../../kselftest.h"
+
+#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
+
+static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 };
+
+static bool illegal_insn;
+
+static void sigill_handler(int sig, siginfo_t *info, void *context)
+{
+ unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext;
+ uint32_t insn = *(uint32_t *)regs[0];
+
+ assert(insn == MK_CBO(regs[11]));
+
+ illegal_insn = true;
+ regs[0] += 4;
+}
+
+static void cbo_insn(char *base, int fn)
+{
+ uint32_t insn = MK_CBO(fn);
+
+ asm volatile(
+ "mv a0, %0\n"
+ "li a1, %1\n"
+ ".4byte %2\n"
+ : : "r" (base), "i" (fn), "i" (insn) : "a0", "a1", "memory");
+}
+
+static void cbo_inval(char *base) { cbo_insn(base, 0); }
+static void cbo_clean(char *base) { cbo_insn(base, 1); }
+static void cbo_flush(char *base) { cbo_insn(base, 2); }
+static void cbo_zero(char *base) { cbo_insn(base, 4); }
+
+static void test_no_zicbom(void *arg)
+{
+ ksft_print_msg("Testing Zicbom instructions remain privileged\n");
+
+ illegal_insn = false;
+ cbo_clean(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.clean\n");
+
+ illegal_insn = false;
+ cbo_flush(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.flush\n");
+
+ illegal_insn = false;
+ cbo_inval(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.inval\n");
+}
+
+static void test_no_zicboz(void *arg)
+{
+ ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n");
+
+ illegal_insn = false;
+ cbo_zero(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.zero\n");
+}
+
+static bool is_power_of_2(__u64 n)
+{
+ return n != 0 && (n & (n - 1)) == 0;
+}
+
+static void test_zicboz(void *arg)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE,
+ };
+ cpu_set_t *cpus = (cpu_set_t *)arg;
+ __u64 block_size;
+ int i, j;
+ long rc;
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0);
+ block_size = pair.value;
+ ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE &&
+ is_power_of_2(block_size), "Zicboz block size\n");
+ ksft_print_msg("Zicboz block size: %ld\n", block_size);
+
+ illegal_insn = false;
+ cbo_zero(&mem[block_size]);
+ ksft_test_result(!illegal_insn, "cbo.zero\n");
+
+ if (illegal_insn || !is_power_of_2(block_size)) {
+ ksft_test_result_skip("cbo.zero check\n");
+ return;
+ }
+
+ assert(block_size <= 1024);
+
+ for (i = 0; i < 4096 / block_size; ++i) {
+ if (i % 2)
+ cbo_zero(&mem[i * block_size]);
+ }
+
+ for (i = 0; i < 4096 / block_size; ++i) {
+ char expected = i % 2 ? 0x0 : 0xa5;
+
+ for (j = 0; j < block_size; ++j) {
+ if (mem[i * block_size + j] != expected) {
+ ksft_test_result_fail("cbo.zero check\n");
+ ksft_print_msg("cbo.zero check: mem[%d] != 0x%x\n",
+ i * block_size + j, expected);
+ return;
+ }
+ }
+ }
+
+ ksft_test_result_pass("cbo.zero check\n");
+}
+
+static void check_no_zicboz_cpus(cpu_set_t *cpus)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_IMA_EXT_0,
+ };
+ cpu_set_t one_cpu;
+ int i = 0, c = 0;
+ long rc;
+
+ while (i++ < CPU_COUNT(cpus)) {
+ while (!CPU_ISSET(c, cpus))
+ ++c;
+
+ CPU_ZERO(&one_cpu);
+ CPU_SET(c, &one_cpu);
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0);
+ assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ)
+ ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n"
+ "Use taskset to select a set of harts where Zicboz\n"
+ "presence (present or not) is consistent for each hart\n");
+ ++c;
+ }
+}
+
+enum {
+ TEST_ZICBOZ,
+ TEST_NO_ZICBOZ,
+ TEST_NO_ZICBOM,
+};
+
+static struct test_info {
+ bool enabled;
+ unsigned int nr_tests;
+ void (*test_fn)(void *arg);
+} tests[] = {
+ [TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz },
+ [TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz },
+ [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom },
+};
+
+int main(int argc, char **argv)
+{
+ struct sigaction act = {
+ .sa_sigaction = &sigill_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+ struct riscv_hwprobe pair;
+ unsigned int plan = 0;
+ cpu_set_t cpus;
+ long rc;
+ int i;
+
+ if (argc > 1 && !strcmp(argv[1], "--sigill")) {
+ rc = sigaction(SIGILL, &act, NULL);
+ assert(rc == 0);
+ tests[TEST_NO_ZICBOZ].enabled = true;
+ tests[TEST_NO_ZICBOM].enabled = true;
+ }
+
+ rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus);
+ assert(rc == 0);
+
+ ksft_print_header();
+
+ pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0);
+ if (rc < 0)
+ ksft_exit_fail_msg("hwprobe() failed with %d\n", rc);
+ assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) {
+ tests[TEST_ZICBOZ].enabled = true;
+ tests[TEST_NO_ZICBOZ].enabled = false;
+ } else {
+ check_no_zicboz_cpus(&cpus);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i)
+ plan += tests[i].enabled ? tests[i].nr_tests : 0;
+
+ if (plan == 0)
+ ksft_print_msg("No tests enabled.\n");
+ else
+ ksft_set_plan(plan);
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (tests[i].enabled)
+ tests[i].test_fn(&cpus);
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
index 09f290a67420..c474891df307 100644
--- a/tools/testing/selftests/riscv/hwprobe/hwprobe.c
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
@@ -1,14 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <stddef.h>
-#include <asm/hwprobe.h>
-
-/*
- * Rather than relying on having a new enough libc to define this, just do it
- * ourselves. This way we don't need to be coupled to a new-enough libc to
- * contain the call.
- */
-long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
- size_t cpu_count, unsigned long *cpus, unsigned int flags);
+#include "hwprobe.h"
+#include "../../kselftest.h"
int main(int argc, char **argv)
{
@@ -16,6 +8,9 @@ int main(int argc, char **argv)
unsigned long cpus;
long out;
+ ksft_print_header();
+ ksft_set_plan(5);
+
/* Fake the CPU_SET ops. */
cpus = -1;
@@ -25,13 +20,16 @@ int main(int argc, char **argv)
*/
for (long i = 0; i < 8; i++)
pairs[i].key = i;
+
out = riscv_hwprobe(pairs, 8, 1, &cpus, 0);
if (out != 0)
- return -1;
+ ksft_exit_fail_msg("hwprobe() failed with %ld\n", out);
+
for (long i = 0; i < 4; ++i) {
/* Fail if the kernel claims not to recognize a base key. */
if ((i < 4) && (pairs[i].key != i))
- return -2;
+ ksft_exit_fail_msg("Failed to recognize base key: key != i, "
+ "key=%ld, i=%ld\n", pairs[i].key, i);
if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
continue;
@@ -39,52 +37,30 @@ int main(int argc, char **argv)
if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
continue;
- return -3;
+ ksft_exit_fail_msg("Unexpected pair: (%ld, %ld)\n", pairs[i].key, pairs[i].value);
}
- /*
- * This should also work with a NULL CPU set, but should not work
- * with an improperly supplied CPU set.
- */
out = riscv_hwprobe(pairs, 8, 0, 0, 0);
- if (out != 0)
- return -4;
+ ksft_test_result(out == 0, "NULL CPU set\n");
out = riscv_hwprobe(pairs, 8, 0, &cpus, 0);
- if (out == 0)
- return -5;
+ ksft_test_result(out != 0, "Bad CPU set\n");
out = riscv_hwprobe(pairs, 8, 1, 0, 0);
- if (out == 0)
- return -6;
+ ksft_test_result(out != 0, "NULL CPU set with non-zero count\n");
- /*
- * Check that keys work by providing one that we know exists, and
- * checking to make sure the resultig pair is what we asked for.
- */
pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR;
out = riscv_hwprobe(pairs, 1, 1, &cpus, 0);
- if (out != 0)
- return -7;
- if (pairs[0].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
- return -8;
+ ksft_test_result(out == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
+ "Existing key is maintained\n");
- /*
- * Check that an unknown key gets overwritten with -1,
- * but doesn't block elements after it.
- */
pairs[0].key = 0x5555;
pairs[1].key = 1;
pairs[1].value = 0xAAAA;
out = riscv_hwprobe(pairs, 2, 0, 0, 0);
- if (out != 0)
- return -9;
-
- if (pairs[0].key != -1)
- return -10;
-
- if ((pairs[1].key != 1) || (pairs[1].value == 0xAAAA))
- return -11;
+ ksft_test_result(out == 0 && pairs[0].key == -1 &&
+ pairs[1].key == 1 && pairs[1].value != 0xAAAA,
+ "Unknown key overwritten with -1 and doesn't block other elements\n");
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
new file mode 100644
index 000000000000..721b0ce73a56
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_RISCV_HWPROBE_H
+#define SELFTEST_RISCV_HWPROBE_H
+#include <stddef.h>
+#include <asm/hwprobe.h>
+
+/*
+ * Rather than relying on having a new enough libc to define this, just do it
+ * ourselves. This way we don't need to be coupled to a new-enough libc to
+ * contain the call.
+ */
+long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpu_count, unsigned long *cpus, unsigned int flags);
+
+#endif