aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap4
-rw-r--r--Documentation/ABI/testing/sysfs-platform-mellanox-bootctl10
-rw-r--r--Documentation/admin-guide/device-mapper/dm-integrity.rst2
-rw-r--r--Documentation/admin-guide/device-mapper/index.rst1
-rw-r--r--Documentation/admin-guide/ext4.rst19
-rw-r--r--Documentation/admin-guide/xfs.rst2
-rw-r--r--Documentation/dev-tools/kcov.rst10
-rw-r--r--Documentation/dev-tools/kselftest.rst8
-rw-r--r--Documentation/dev-tools/kunit/index.rst1
-rw-r--r--Documentation/dev-tools/kunit/kunit-tool.rst57
-rw-r--r--Documentation/dev-tools/kunit/start.rst26
-rw-r--r--Documentation/dev-tools/kunit/usage.rst24
-rw-r--r--Documentation/devicetree/bindings/arm/sunxi.yaml2
-rw-r--r--Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml2
-rw-r--r--Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml2
-rw-r--r--Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml2
-rw-r--r--Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml2
-rw-r--r--Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml2
-rw-r--r--Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml2
-rw-r--r--Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml2
-rw-r--r--Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml2
-rw-r--r--Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml2
-rw-r--r--Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml5
-rw-r--r--Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml2
-rw-r--r--Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml2
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml2
-rw-r--r--Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml2
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml3
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml9
-rw-r--r--Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml3
-rw-r--r--Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml2
-rw-r--r--Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml2
-rw-r--r--Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/broadcom-bluetooth.txt7
-rw-r--r--Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml2
-rw-r--r--Documentation/devicetree/bindings/net/can/tcan4x5x.txt4
-rw-r--r--Documentation/devicetree/bindings/net/dsa/ar9331.txt148
-rw-r--r--Documentation/devicetree/bindings/net/mediatek-dwmac.txt33
-rw-r--r--Documentation/devicetree/bindings/net/snps,dwmac.yaml1
-rw-r--r--Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml22
-rw-r--r--Documentation/devicetree/bindings/net/ti,dp83867.txt12
-rw-r--r--Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml2
-rw-r--r--Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml2
-rw-r--r--Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml2
-rw-r--r--Documentation/devicetree/bindings/ptp/ptp-ines.txt35
-rw-r--r--Documentation/devicetree/bindings/ptp/timestamper.txt42
-rw-r--r--Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml2
-rw-r--r--Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml2
-rw-r--r--Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt2
-rw-r--r--Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml2
-rw-r--r--Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml2
-rw-r--r--Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml2
-rw-r--r--Documentation/devicetree/bindings/spi/spi-controller.yaml4
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml2
-rw-r--r--Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml2
-rw-r--r--Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml2
-rw-r--r--Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml2
-rw-r--r--Documentation/features/debug/gcov-profile-all/arch-support.txt2
-rw-r--r--Documentation/filesystems/erofs.txt27
-rw-r--r--Documentation/filesystems/overlayfs.rst (renamed from Documentation/filesystems/overlayfs.txt)10
-rw-r--r--Documentation/kbuild/kconfig-language.rst5
-rw-r--r--Documentation/kbuild/makefiles.rst16
-rw-r--r--Documentation/networking/device_drivers/index.rst1
-rw-r--r--Documentation/networking/device_drivers/netronome/nfp.rst116
-rw-r--r--Documentation/networking/device_drivers/stmicro/stmmac.rst697
-rw-r--r--Documentation/networking/device_drivers/stmicro/stmmac.txt401
-rw-r--r--Documentation/networking/device_drivers/ti/cpsw_switchdev.txt2
-rw-r--r--Documentation/networking/devlink-health.txt86
-rw-r--r--Documentation/networking/devlink-info-versions.rst64
-rw-r--r--Documentation/networking/devlink-params-bnxt.txt18
-rw-r--r--Documentation/networking/devlink-params-mlx5.txt17
-rw-r--r--Documentation/networking/devlink-params-mlxsw.txt10
-rw-r--r--Documentation/networking/devlink-params-mv88e6xxx.txt7
-rw-r--r--Documentation/networking/devlink-params-nfp.txt5
-rw-r--r--Documentation/networking/devlink-params-ti-cpsw-switch.txt10
-rw-r--r--Documentation/networking/devlink-params.txt71
-rw-r--r--Documentation/networking/devlink-trap-netdevsim.rst20
-rw-r--r--Documentation/networking/devlink/bnxt.rst41
-rw-r--r--Documentation/networking/devlink/devlink-dpipe.rst252
-rw-r--r--Documentation/networking/devlink/devlink-health.rst114
-rw-r--r--Documentation/networking/devlink/devlink-info.rst94
-rw-r--r--Documentation/networking/devlink/devlink-params.rst108
-rw-r--r--Documentation/networking/devlink/devlink-region.rst60
-rw-r--r--Documentation/networking/devlink/devlink-resource.rst62
-rw-r--r--Documentation/networking/devlink/devlink-trap.rst (renamed from Documentation/networking/devlink-trap.rst)2
-rw-r--r--Documentation/networking/devlink/index.rst42
-rw-r--r--Documentation/networking/devlink/ionic.rst29
-rw-r--r--Documentation/networking/devlink/mlx4.rst56
-rw-r--r--Documentation/networking/devlink/mlx5.rst59
-rw-r--r--Documentation/networking/devlink/mlxsw.rst59
-rw-r--r--Documentation/networking/devlink/mv88e6xxx.rst28
-rw-r--r--Documentation/networking/devlink/netdevsim.rst72
-rw-r--r--Documentation/networking/devlink/nfp.rst65
-rw-r--r--Documentation/networking/devlink/qed.rst26
-rw-r--r--Documentation/networking/devlink/ti-cpsw-switch.rst31
-rw-r--r--Documentation/networking/dsa/sja1105.rst6
-rw-r--r--Documentation/networking/ethtool-netlink.rst520
-rw-r--r--Documentation/networking/index.rst5
-rw-r--r--Documentation/networking/ip-sysctl.txt6
-rw-r--r--Documentation/networking/j1939.rst2
-rw-r--r--Documentation/networking/netdev-FAQ.rst4
-rw-r--r--Documentation/networking/phy.rst18
-rw-r--r--Documentation/networking/sfp-phylink.rst3
-rw-r--r--Documentation/process/coding-style.rst2
-rw-r--r--Documentation/process/index.rst1
-rw-r--r--Documentation/riscv/index.rst1
-rw-r--r--Documentation/riscv/patch-acceptance.rst35
-rw-r--r--Documentation/scsi/smartpqi.txt2
-rw-r--r--Documentation/translations/it_IT/process/coding-style.rst2
-rw-r--r--Documentation/translations/zh_CN/process/coding-style.rst2
-rw-r--r--MAINTAINERS76
-rw-r--r--Makefile5
-rw-r--r--arch/arc/include/asm/entry-arcv2.h8
-rw-r--r--arch/arc/include/asm/hugepage.h1
-rw-r--r--arch/arc/kernel/asm-offsets.c10
-rw-r--r--arch/arc/kernel/unwind.c6
-rw-r--r--arch/arc/plat-eznps/Kconfig2
-rw-r--r--arch/arm/boot/dts/am335x-sancloud-bbe.dts2
-rw-r--r--arch/arm/boot/dts/am437x-gp-evm.dts2
-rw-r--r--arch/arm/boot/dts/am43x-epos-evm.dts2
-rw-r--r--arch/arm/boot/dts/bcm-cygnus.dtsi4
-rw-r--r--arch/arm/boot/dts/bcm2711.dtsi2
-rw-r--r--arch/arm/boot/dts/bcm283x.dtsi2
-rw-r--r--arch/arm/boot/dts/bcm5301x.dtsi4
-rw-r--r--arch/arm/boot/dts/e60k02.dtsi5
-rw-r--r--arch/arm/boot/dts/imx6ul-14x14-evk.dtsi28
-rw-r--r--arch/arm/configs/exynos_defconfig1
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig1
-rw-r--r--arch/arm/configs/omap2plus_defconfig4
-rw-r--r--arch/arm/configs/shmobile_defconfig1
-rw-r--r--arch/arm/crypto/curve25519-glue.c7
-rw-r--r--arch/arm/mach-bcm/bcm2711.c1
-rw-r--r--arch/arm/mach-imx/cpu.c8
-rw-r--r--arch/arm/mach-ixp4xx/fsg-setup.c20
-rw-r--r--arch/arm/mach-ixp4xx/goramo_mlr.c24
-rw-r--r--arch/arm/mach-ixp4xx/include/mach/platform.h22
-rw-r--r--arch/arm/mach-ixp4xx/ixdp425-setup.c20
-rw-r--r--arch/arm/mach-ixp4xx/nas100d-setup.c10
-rw-r--r--arch/arm/mach-ixp4xx/nslu2-setup.c10
-rw-r--r--arch/arm/mach-ixp4xx/omixp-setup.c20
-rw-r--r--arch/arm/mach-ixp4xx/vulcan-setup.c20
-rw-r--r--arch/arm/mach-mmp/pxa168.h2
-rw-r--r--arch/arm/mach-vexpress/spc.c12
-rw-r--r--arch/arm/net/bpf_jit_32.c30
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi10
-rw-r--r--arch/arm64/include/asm/pgtable-prot.h5
-rw-r--r--arch/arm64/include/asm/pgtable.h10
-rw-r--r--arch/arm64/kernel/cpu_errata.c1
-rw-r--r--arch/arm64/kvm/sys_regs.c25
-rw-r--r--arch/arm64/kvm/sys_regs.h17
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/arm64/mm/mmu.c4
-rw-r--r--arch/hexagon/include/asm/atomic.h8
-rw-r--r--arch/hexagon/include/asm/bitops.h8
-rw-r--r--arch/hexagon/include/asm/cmpxchg.h2
-rw-r--r--arch/hexagon/include/asm/futex.h6
-rw-r--r--arch/hexagon/include/asm/io.h1
-rw-r--r--arch/hexagon/include/asm/spinlock.h20
-rw-r--r--arch/hexagon/kernel/stacktrace.c4
-rw-r--r--arch/hexagon/kernel/vm_entry.S2
-rw-r--r--arch/ia64/mm/init.c4
-rw-r--r--arch/m68k/emu/nfeth.c2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/mips/boot/compressed/Makefile3
-rw-r--r--arch/mips/boot/dts/qca/ar9331.dtsi119
-rw-r--r--arch/mips/boot/dts/qca/ar9331_dpt_module.dts13
-rw-r--r--arch/mips/cavium-octeon/executive/cvmx-bootmem.c9
-rw-r--r--arch/mips/include/asm/cpu-type.h3
-rw-r--r--arch/mips/include/asm/thread_info.h20
-rw-r--r--arch/mips/include/asm/vdso/gettimeofday.h13
-rw-r--r--arch/mips/kernel/cacheinfo.c27
-rw-r--r--arch/mips/net/ebpf_jit.c11
-rw-r--r--arch/mips/vdso/vgettimeofday.c20
-rw-r--r--arch/nios2/mm/ioremap.c8
-rw-r--r--arch/parisc/include/asm/cmpxchg.h10
-rw-r--r--arch/parisc/include/asm/kexec.h4
-rw-r--r--arch/parisc/kernel/Makefile2
-rw-r--r--arch/parisc/kernel/drivers.c2
-rw-r--r--arch/parisc/kernel/pdt.c3
-rw-r--r--arch/powerpc/include/asm/spinlock.h14
-rw-r--r--arch/powerpc/include/asm/uaccess.h9
-rw-r--r--arch/powerpc/kernel/irq.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c3
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S4
-rw-r--r--arch/powerpc/lib/string_32.S4
-rw-r--r--arch/powerpc/lib/string_64.S6
-rw-r--r--arch/powerpc/mm/mem.c11
-rw-r--r--arch/powerpc/mm/nohash/8xx.c2
-rw-r--r--arch/powerpc/mm/slice.c4
-rw-r--r--arch/powerpc/net/bpf_jit32.h4
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c16
-rw-r--r--arch/powerpc/platforms/pseries/cmm.c10
-rw-r--r--arch/powerpc/platforms/pseries/setup.c7
-rw-r--r--arch/riscv/Kconfig3
-rw-r--r--arch/riscv/Kconfig.socs4
-rw-r--r--arch/riscv/boot/Makefile2
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi15
-rw-r--r--arch/riscv/include/asm/csr.h18
-rw-r--r--arch/riscv/include/asm/perf_event.h4
-rw-r--r--arch/riscv/include/asm/pgtable.h42
-rw-r--r--arch/riscv/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--arch/riscv/kernel/entry.S1
-rw-r--r--arch/riscv/kernel/ftrace.c2
-rw-r--r--arch/riscv/kernel/head.S2
-rw-r--r--arch/riscv/kernel/irq.c6
-rw-r--r--arch/riscv/kernel/riscv_ksyms.c3
-rw-r--r--arch/riscv/lib/uaccess.S4
-rw-r--r--arch/riscv/mm/Makefile1
-rw-r--r--arch/riscv/mm/cacheflush.c1
-rw-r--r--arch/riscv/mm/init.c12
-rw-r--r--arch/riscv/net/bpf_jit_comp.c533
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/asm/timex.h4
-rw-r--r--arch/s390/include/asm/uv.h2
-rw-r--r--arch/s390/kernel/early.c16
-rw-r--r--arch/s390/kernel/mcount.S1
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c22
-rw-r--r--arch/s390/kernel/smp.c13
-rw-r--r--arch/s390/kernel/unwind_bc.c15
-rw-r--r--arch/s390/lib/spinlock.c1
-rw-r--r--arch/s390/lib/test_unwind.c2
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/s390/mm/kasan_init.c68
-rw-r--r--arch/s390/purgatory/.gitignore1
-rw-r--r--arch/s390/purgatory/Makefile19
-rw-r--r--arch/s390/purgatory/string.c3
-rw-r--r--arch/sh/drivers/platform_early.c11
-rw-r--r--arch/sh/kernel/kgdb.c1
-rw-r--r--arch/sh/mm/init.c4
-rw-r--r--arch/sparc/net/bpf_jit_comp_32.c8
-rw-r--r--arch/um/drivers/net_kern.c2
-rw-r--r--arch/um/drivers/vector_kern.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/boot/compressed/Makefile2
-rw-r--r--arch/x86/events/core.c19
-rw-r--r--arch/x86/events/intel/bts.c16
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c4
-rw-r--r--arch/x86/kernel/cpu/mce/core.c2
-rw-r--r--arch/x86/kernel/early-quirks.c4
-rw-r--r--arch/x86/kernel/fpu/xstate.c2
-rw-r--r--arch/x86/kernel/ftrace.c14
-rw-r--r--arch/x86/kvm/cpuid.c6
-rw-r--r--arch/x86/mm/init_32.c4
-rw-r--r--arch/x86/mm/init_64.c4
-rw-r--r--arch/x86/net/bpf_jit_comp.c150
-rw-r--r--arch/x86/platform/efi/quirks.c6
-rw-r--r--arch/xtensa/platforms/iss/network.c2
-rw-r--r--block/bio.c43
-rw-r--r--block/blk-cgroup.c20
-rw-r--r--block/blk-core.c20
-rw-r--r--block/blk-flush.c5
-rw-r--r--block/blk-iocost.c13
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-merge.c18
-rw-r--r--block/blk.h1
-rw-r--r--block/bsg-lib.c2
-rw-r--r--block/compat_ioctl.c16
-rw-r--r--crypto/adiantum.c4
-rw-r--r--crypto/asymmetric_keys/asym_tpm.c1
-rw-r--r--crypto/asymmetric_keys/public_key.c1
-rw-r--r--crypto/essiv.c2
-rw-r--r--drivers/acpi/device_pm.c12
-rw-r--r--drivers/android/binder.c4
-rw-r--r--drivers/ata/ahci_brcm.c133
-rw-r--r--drivers/ata/libahci_platform.c6
-rw-r--r--drivers/ata/libata-core.c24
-rw-r--r--drivers/ata/sata_fsl.c2
-rw-r--r--drivers/ata/sata_mv.c2
-rw-r--r--drivers/ata/sata_nv.c2
-rw-r--r--drivers/atm/eni.c4
-rw-r--r--drivers/atm/fore200e.c25
-rw-r--r--drivers/base/devtmpfs.c6
-rw-r--r--drivers/base/platform.c4
-rw-r--r--drivers/block/nbd.c6
-rw-r--r--drivers/block/null_blk_zoned.c5
-rw-r--r--drivers/block/pktcdvd.c2
-rw-r--r--drivers/block/xen-blkback/xenbus.c66
-rw-r--r--drivers/block/xen-blkfront.c4
-rw-r--r--drivers/bluetooth/btbcm.c46
-rw-r--r--drivers/bluetooth/btbcm.h16
-rw-r--r--drivers/bluetooth/btusb.c12
-rw-r--r--drivers/bluetooth/hci_bcm.c50
-rw-r--r--drivers/bus/ti-sysc.c7
-rw-r--r--drivers/char/agp/isoch.c9
-rw-r--r--drivers/char/pcmcia/synclink_cs.c2
-rw-r--r--drivers/char/random.c1
-rw-r--r--drivers/char/tpm/tpm-dev-common.c10
-rw-r--r--drivers/char/tpm/tpm-dev.h2
-rw-r--r--drivers/char/tpm/tpm.h1
-rw-r--r--drivers/char/tpm/tpm2-cmd.c1
-rw-r--r--drivers/char/tpm/tpm_ftpm_tee.c22
-rw-r--r--drivers/char/tpm/tpm_tis_core.c3
-rw-r--r--drivers/clk/at91/at91sam9260.c2
-rw-r--r--drivers/clk/at91/at91sam9rl.c2
-rw-r--r--drivers/clk/at91/at91sam9x5.c2
-rw-r--r--drivers/clk/at91/pmc.c2
-rw-r--r--drivers/clk/at91/sama5d2.c2
-rw-r--r--drivers/clk/at91/sama5d4.c2
-rw-r--r--drivers/clk/clk.c62
-rw-r--r--drivers/clk/imx/clk-composite-8m.c2
-rw-r--r--drivers/clk/imx/clk-imx7ulp.c1
-rw-r--r--drivers/clk/imx/clk-pll14xx.c2
-rw-r--r--drivers/clk/qcom/gcc-sc7180.c6
-rw-r--r--drivers/clk/qcom/gpucc-msm8998.c2
-rw-r--r--drivers/clocksource/timer-riscv.c2
-rw-r--r--drivers/cpufreq/vexpress-spc-cpufreq.c2
-rw-r--r--drivers/cpuidle/cpuidle.c3
-rw-r--r--drivers/cpuidle/driver.c10
-rw-r--r--drivers/crypto/chelsio/chtls/chtls_cm.c2
-rw-r--r--drivers/devfreq/Kconfig5
-rw-r--r--drivers/devfreq/devfreq.c273
-rw-r--r--drivers/dma-buf/sync_file.c2
-rw-r--r--drivers/dma/dma-jz4780.c3
-rw-r--r--drivers/dma/ioat/dma.c3
-rw-r--r--drivers/dma/k3dma.c12
-rw-r--r--drivers/dma/virt-dma.c3
-rw-r--r--drivers/edac/Kconfig2
-rw-r--r--drivers/firmware/broadcom/tee_bnxt_fw.c1
-rw-r--r--drivers/firmware/efi/earlycon.c40
-rw-r--r--drivers/firmware/efi/efi.c30
-rw-r--r--drivers/firmware/efi/libstub/gop.c80
-rw-r--r--drivers/firmware/efi/rci2-table.c3
-rw-r--r--drivers/gpio/Kconfig4
-rw-r--r--drivers/gpio/gpio-aspeed-sgpio.c2
-rw-r--r--drivers/gpio/gpio-mockup.c7
-rw-r--r--drivers/gpio/gpio-mpc8xxx.c1
-rw-r--r--drivers/gpio/gpio-pca953x.c26
-rw-r--r--drivers/gpio/gpio-xgs-iproc.c2
-rw-r--r--drivers/gpio/gpio-xtensa.c7
-rw-r--r--drivers/gpio/gpiolib-of.c27
-rw-r--r--drivers/gpio/gpiolib.c13
-rw-r--r--drivers/gpu/drm/amd/acp/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c22
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c15
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c29
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c32
-rw-r--r--drivers/gpu/drm/amd/amdkfd/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/display/Kconfig2
-rw-r--r--drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c1
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c134
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c9
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce/dce_aux.c33
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/Makefile1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c15
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c12
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/Makefile1
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c24
-rw-r--r--drivers/gpu/drm/amd/display/dc/dsc/Makefile1
-rw-r--r--drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h2
-rw-r--r--drivers/gpu/drm/amd/display/include/i2caux_interface.h2
-rw-r--r--drivers/gpu/drm/amd/display/modules/freesync/freesync.c32
-rw-r--r--drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/amdgpu_smu.c1
-rw-r--r--drivers/gpu/drm/amd/powerplay/arcturus_ppt.c8
-rw-r--r--drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h1
-rw-r--r--drivers/gpu/drm/amd/powerplay/navi10_ppt.c3
-rw-r--r--drivers/gpu/drm/amd/powerplay/vega20_ppt.c3
-rw-r--r--drivers/gpu/drm/arm/malidp_mw.c2
-rw-r--r--drivers/gpu/drm/exynos/exynos_drm_gsc.c1
-rw-r--r--drivers/gpu/drm/i915/display/intel_ddi.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_display.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_power.c153
-rw-r--r--drivers/gpu/drm/i915/display/intel_dp.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbc.c2
-rw-r--r--drivers/gpu/drm/i915/display/intel_frontbuffer.c16
-rw-r--r--drivers/gpu/drm/i915/display/intel_frontbuffer.h34
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp.c26
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdcp.h5
-rw-r--r--drivers/gpu/drm/i915/display/intel_hdmi.c3
-rw-r--r--drivers/gpu/drm/i915/display/intel_overlay.c17
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_clflush.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_context.c3
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_domain.c4
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c2
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.c26
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object.h23
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_object_types.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_pm.c3
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c51
-rw-r--r--drivers/gpu/drm/i915/gvt/dmabuf.c64
-rw-r--r--drivers/gpu/drm/i915/gvt/handlers.c16
-rw-r--r--drivers/gpu/drm/i915/gvt/hypercall.h2
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c23
-rw-r--r--drivers/gpu/drm/i915/gvt/mpt.h15
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c4
-rw-r--r--drivers/gpu/drm/i915/i915_gem.c36
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c204
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.c73
-rw-r--r--drivers/gpu/drm/i915/i915_pmu.h2
-rw-r--r--drivers/gpu/drm/i915/i915_reg.h6
-rw-r--r--drivers/gpu/drm/i915/i915_request.c114
-rw-r--r--drivers/gpu/drm/i915/i915_scheduler.c1
-rw-r--r--drivers/gpu/drm/i915/i915_sw_fence_work.c3
-rw-r--r--drivers/gpu/drm/i915/i915_vma.c10
-rw-r--r--drivers/gpu/drm/i915/intel_pm.c4
-rw-r--r--drivers/gpu/drm/mcde/mcde_dsi.c6
-rw-r--r--drivers/gpu/drm/mediatek/mtk_drm_crtc.c18
-rw-r--r--drivers/gpu/drm/mediatek/mtk_dsi.c67
-rw-r--r--drivers/gpu/drm/meson/meson_venc_cvbs.c48
-rw-r--r--drivers/gpu/drm/mgag200/mgag200_drv.c3
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/atom.h1
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.c108
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/head.c5
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c28
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.h116
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.c19
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_drv.c20
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.c19
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_gem.h4
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_perfcnt.c23
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_perfcnt.h2
-rw-r--r--drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c2
-rw-r--r--drivers/hid/hid-asus.c3
-rw-r--r--drivers/hid/hid-core.c6
-rw-r--r--drivers/hid/hid-ids.h3
-rw-r--r--drivers/hid/hid-input.c16
-rw-r--r--drivers/hid/hid-ite.c3
-rw-r--r--drivers/hid/hid-multitouch.c5
-rw-r--r--drivers/hid/hid-quirks.c1
-rw-r--r--drivers/hid/hid-steam.c4
-rw-r--r--drivers/hid/hidraw.c4
-rw-r--r--drivers/hid/i2c-hid/i2c-hid-core.c16
-rw-r--r--drivers/hid/intel-ish-hid/ipc/hw-ish.h2
-rw-r--r--drivers/hid/intel-ish-hid/ipc/pci-ish.c2
-rw-r--r--drivers/hid/uhid.c2
-rw-r--r--drivers/hid/usbhid/hiddev.c97
-rw-r--r--drivers/hid/wacom_wac.c6
-rw-r--r--drivers/hwtracing/intel_th/core.c7
-rw-r--r--drivers/hwtracing/intel_th/intel_th.h2
-rw-r--r--drivers/hwtracing/intel_th/msu.c14
-rw-r--r--drivers/hwtracing/intel_th/pci.c10
-rw-r--r--drivers/i2c/i2c-core-base.c23
-rw-r--r--drivers/iio/accel/st_accel_core.c8
-rw-r--r--drivers/iio/adc/ad7124.c7
-rw-r--r--drivers/iio/adc/ad7606.c2
-rw-r--r--drivers/iio/adc/ad7949.c22
-rw-r--r--drivers/iio/adc/intel_mrfld_adc.c2
-rw-r--r--drivers/iio/adc/max1027.c8
-rw-r--r--drivers/iio/adc/max9611.c16
-rw-r--r--drivers/iio/humidity/hdc100x.c2
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_core.c23
-rw-r--r--drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h16
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h6
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c50
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c34
-rw-r--r--drivers/iio/temperature/ltc2983.c6
-rw-r--r--drivers/infiniband/core/cma.c1
-rw-r--r--drivers/infiniband/core/counters.c3
-rw-r--r--drivers/infiniband/core/ib_core_uverbs.c48
-rw-r--r--drivers/infiniband/hw/efa/efa_verbs.c2
-rw-r--r--drivers/infiniband/hw/hfi1/sdma.c2
-rw-r--r--drivers/infiniband/hw/hfi1/verbs.h4
-rw-r--r--drivers/infiniband/hw/mlx4/main.c9
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.c16
-rw-r--r--drivers/infiniband/hw/mlx5/cmd.h2
-rw-r--r--drivers/infiniband/hw/mlx5/main.c133
-rw-r--r--drivers/infiniband/hw/mlx5/mlx5_ib.h19
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c2
-rw-r--r--drivers/infiniband/sw/rxe/rxe_req.c6
-rw-r--r--drivers/infiniband/sw/rxe/rxe_resp.c7
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c2
-rw-r--r--drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c2
-rw-r--r--drivers/interconnect/qcom/Kconfig14
-rw-r--r--drivers/interconnect/qcom/msm8974.c8
-rw-r--r--drivers/interconnect/qcom/qcs404.c8
-rw-r--r--drivers/interconnect/qcom/sdm845.c4
-rw-r--r--drivers/iommu/dma-iommu.c23
-rw-r--r--drivers/iommu/intel-iommu.c12
-rw-r--r--drivers/iommu/intel-svm.c6
-rw-r--r--drivers/iommu/iommu.c8
-rw-r--r--drivers/iommu/iova.c2
-rw-r--r--drivers/irqchip/irq-sifive-plic.c2
-rw-r--r--drivers/md/dm-clone-metadata.c136
-rw-r--r--drivers/md/dm-clone-metadata.h17
-rw-r--r--drivers/md/dm-clone-target.c53
-rw-r--r--drivers/md/dm-mpath.c37
-rw-r--r--drivers/md/dm-thin-metadata.c29
-rw-r--r--drivers/md/dm-thin-metadata.h7
-rw-r--r--drivers/md/dm-thin.c42
-rw-r--r--drivers/md/md.c1
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c8
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/md/raid5-ppl.c2
-rw-r--r--drivers/md/raid5.c2
-rw-r--r--drivers/media/cec/cec-adap.c40
-rw-r--r--drivers/media/platform/omap3isp/isppreview.c24
-rw-r--r--drivers/media/usb/pulse8-cec/pulse8-cec.c17
-rw-r--r--drivers/media/v4l2-core/v4l2-ioctl.c2
-rw-r--r--drivers/message/fusion/mptlan.c2
-rw-r--r--drivers/misc/habanalabs/command_submission.c5
-rw-r--r--drivers/misc/habanalabs/context.c2
-rw-r--r--drivers/misc/habanalabs/goya/goya.c15
-rw-r--r--drivers/misc/ocxl/context.c8
-rw-r--r--drivers/misc/ocxl/file.c23
-rw-r--r--drivers/misc/sgi-xp/xpnet.c2
-rw-r--r--drivers/mmc/host/mtk-sd.c2
-rw-r--r--drivers/mmc/host/sdhci-msm.c28
-rw-r--r--drivers/mmc/host/sdhci-of-esdhc.c17
-rw-r--r--drivers/mmc/host/sdhci-pci-core.c10
-rw-r--r--drivers/mmc/host/sdhci.c11
-rw-r--r--drivers/mmc/host/sdhci.h2
-rw-r--r--drivers/net/Kconfig43
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/appletalk/cops.c4
-rw-r--r--drivers/net/arcnet/arcdevice.h2
-rw-r--r--drivers/net/arcnet/arcnet.c2
-rw-r--r--drivers/net/bonding/bond_3ad.c122
-rw-r--r--drivers/net/bonding/bond_main.c42
-rw-r--r--drivers/net/caif/caif_serial.c4
-rw-r--r--drivers/net/can/flexcan.c73
-rw-r--r--drivers/net/can/m_can/tcan4x5x.c83
-rw-r--r--drivers/net/can/mscan/mscan.c21
-rw-r--r--drivers/net/can/usb/gs_usb.c4
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c2
-rw-r--r--drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c8
-rw-r--r--drivers/net/can/xilinx_can.c7
-rw-r--r--drivers/net/dsa/Kconfig5
-rw-r--r--drivers/net/dsa/Makefile1
-rw-r--r--drivers/net/dsa/b53/b53_common.c87
-rw-r--r--drivers/net/dsa/b53/b53_priv.h4
-rw-r--r--drivers/net/dsa/bcm_sf2_cfp.c6
-rw-r--r--drivers/net/dsa/dsa_loop.c3
-rw-r--r--drivers/net/dsa/lan9303-core.c3
-rw-r--r--drivers/net/dsa/lantiq_gswip.c3
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c3
-rw-r--r--drivers/net/dsa/microchip/ksz9477.c3
-rw-r--r--drivers/net/dsa/mt7530.c3
-rw-r--r--drivers/net/dsa/mv88e6060.c3
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c17
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.h6
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1.c5
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1.h1
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1_atu.c5
-rw-r--r--drivers/net/dsa/mv88e6xxx/global1_vtu.c5
-rw-r--r--drivers/net/dsa/mv88e6xxx/global2.c10
-rw-r--r--drivers/net/dsa/mv88e6xxx/port.c12
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.c100
-rw-r--r--drivers/net/dsa/mv88e6xxx/serdes.h9
-rw-r--r--drivers/net/dsa/ocelot/Kconfig3
-rw-r--r--drivers/net/dsa/ocelot/felix.c268
-rw-r--r--drivers/net/dsa/ocelot/felix.h16
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c501
-rw-r--r--drivers/net/dsa/qca/Kconfig9
-rw-r--r--drivers/net/dsa/qca/Makefile2
-rw-r--r--drivers/net/dsa/qca/ar9331.c856
-rw-r--r--drivers/net/dsa/qca8k.c3
-rw-r--r--drivers/net/dsa/rtl8366rb.c3
-rw-r--r--drivers/net/dsa/sja1105/sja1105_main.c131
-rw-r--r--drivers/net/dsa/sja1105/sja1105_ptp.c42
-rw-r--r--drivers/net/dsa/sja1105/sja1105_ptp.h1
-rw-r--r--drivers/net/dsa/sja1105/sja1105_static_config.c7
-rw-r--r--drivers/net/dsa/sja1105/sja1105_tas.c5
-rw-r--r--drivers/net/dsa/vitesse-vsc73xx-core.c5
-rw-r--r--drivers/net/ethernet/3com/3c509.c4
-rw-r--r--drivers/net/ethernet/3com/3c515.c4
-rw-r--r--drivers/net/ethernet/3com/3c574_cs.c4
-rw-r--r--drivers/net/ethernet/3com/3c589_cs.c4
-rw-r--r--drivers/net/ethernet/3com/3c59x.c8
-rw-r--r--drivers/net/ethernet/3com/typhoon.c2
-rw-r--r--drivers/net/ethernet/8390/8390.c4
-rw-r--r--drivers/net/ethernet/8390/8390.h4
-rw-r--r--drivers/net/ethernet/8390/8390p.c4
-rw-r--r--drivers/net/ethernet/8390/axnet_cs.c4
-rw-r--r--drivers/net/ethernet/8390/lib8390.c2
-rw-r--r--drivers/net/ethernet/adaptec/starfire.c4
-rw-r--r--drivers/net/ethernet/agere/et131x.c2
-rw-r--r--drivers/net/ethernet/allwinner/sun4i-emac.c2
-rw-r--r--drivers/net/ethernet/alteon/acenic.c4
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_com.h2
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_ethtool.c28
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.c969
-rw-r--r--drivers/net/ethernet/amazon/ena/ena_netdev.h73
-rw-r--r--drivers/net/ethernet/amd/7990.c2
-rw-r--r--drivers/net/ethernet/amd/7990.h2
-rw-r--r--drivers/net/ethernet/amd/a2065.c13
-rw-r--r--drivers/net/ethernet/amd/am79c961a.c2
-rw-r--r--drivers/net/ethernet/amd/amd8111e.c2
-rw-r--r--drivers/net/ethernet/amd/ariadne.c2
-rw-r--r--drivers/net/ethernet/amd/atarilance.c4
-rw-r--r--drivers/net/ethernet/amd/au1000_eth.c2
-rw-r--r--drivers/net/ethernet/amd/declance.c2
-rw-r--r--drivers/net/ethernet/amd/lance.c4
-rw-r--r--drivers/net/ethernet/amd/ni65.c4
-rw-r--r--drivers/net/ethernet/amd/nmclan_cs.c4
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c4
-rw-r--r--drivers/net/ethernet/amd/sunlance.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-drv.c2
-rw-r--r--drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c4
-rw-r--r--drivers/net/ethernet/apm/xgene-v2/main.c2
-rw-r--r--drivers/net/ethernet/apm/xgene/xgene_enet_main.c2
-rw-r--r--drivers/net/ethernet/apple/macmace.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/aq_nic.c4
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c3
-rw-r--r--drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c4
-rw-r--r--drivers/net/ethernet/atheros/ag71xx.c6
-rw-r--r--drivers/net/ethernet/atheros/alx/main.c2
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c2
-rw-r--r--drivers/net/ethernet/atheros/atl1e/atl1e_main.c2
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl2.c2
-rw-r--r--drivers/net/ethernet/atheros/atlx/atlx.c2
-rw-r--r--drivers/net/ethernet/broadcom/b44.c11
-rw-r--r--drivers/net/ethernet/broadcom/bcmsysport.c10
-rw-r--r--drivers/net/ethernet/broadcom/bnx2.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h4
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c13
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h5
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h1
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c12
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c67
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c107
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h3
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c38
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h4
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c8
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.c115
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmgenet.h4
-rw-r--r--drivers/net/ethernet/broadcom/sb1250-mac.c4
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c14
-rw-r--r--drivers/net/ethernet/brocade/bna/bfa_ioc.c3
-rw-r--r--drivers/net/ethernet/cadence/macb.h15
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c92
-rw-r--r--drivers/net/ethernet/calxeda/xgmac.c2
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_main.c2
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_main.c2
-rw-r--r--drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c4
-rw-r--r--drivers/net/ethernet/cavium/liquidio/octeon_console.c16
-rw-r--r--drivers/net/ethernet/cavium/thunder/nicvf_main.c2
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4.h12
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c29
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c253
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c66
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c8
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c12
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c18
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h14
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4_hw.c21
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c4
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h1
-rw-r--r--drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c18
-rw-r--r--drivers/net/ethernet/cirrus/cs89x0.c2
-rw-r--r--drivers/net/ethernet/cisco/enic/enic_main.c2
-rw-r--r--drivers/net/ethernet/cortina/gemini.c4
-rw-r--r--drivers/net/ethernet/davicom/dm9000.c2
-rw-r--r--drivers/net/ethernet/dec/tulip/de2104x.c2
-rw-r--r--drivers/net/ethernet/dec/tulip/dmfe.c7
-rw-r--r--drivers/net/ethernet/dec/tulip/tulip_core.c4
-rw-r--r--drivers/net/ethernet/dec/tulip/uli526x.c4
-rw-r--r--drivers/net/ethernet/dec/tulip/winbond-840.c4
-rw-r--r--drivers/net/ethernet/dlink/dl2k.c4
-rw-r--r--drivers/net/ethernet/dlink/sundance.c4
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c2
-rw-r--r--drivers/net/ethernet/ethoc.c2
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.c2
-rw-r--r--drivers/net/ethernet/fealnx.c4
-rw-r--r--drivers/net/ethernet/freescale/Makefile1
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c41
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c34
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h4
-rw-r--r--drivers/net/ethernet/freescale/dpaa2/dprtc.h2
-rw-r--r--drivers/net/ethernet/freescale/enetc/Kconfig1
-rw-r--r--drivers/net/ethernet/freescale/enetc/Makefile2
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.c14
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc.h3
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_ethtool.c1
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_hw.h11
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_mdio.c120
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_mdio.h12
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c43
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c47
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.h4
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_qos.c38
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c11
-rw-r--r--drivers/net/ethernet/freescale/fec_mpc52xx.c2
-rw-r--r--drivers/net/ethernet/freescale/fman/mac.c4
-rw-r--r--drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c2
-rw-r--r--drivers/net/ethernet/freescale/gianfar.c12
-rw-r--r--drivers/net/ethernet/freescale/ucc_geth.c2
-rw-r--r--drivers/net/ethernet/fujitsu/fmvj18x_cs.c4
-rw-r--r--drivers/net/ethernet/google/gve/gve_main.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_rx.c2
-rw-r--r--drivers/net/ethernet/google/gve/gve_tx.c6
-rw-r--r--drivers/net/ethernet/hisilicon/hip04_eth.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hix5hd2_gmac.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns/hns_enet.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/Makefile2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h9
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c240
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.h1
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_trace.h139
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c16
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c84
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c4
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c416
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h21
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c404
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h15
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_ethtool.c8
-rw-r--r--drivers/net/ethernet/huawei/hinic/hinic_main.c2
-rw-r--r--drivers/net/ethernet/i825xx/82596.c4
-rw-r--r--drivers/net/ethernet/i825xx/ether1.c4
-rw-r--r--drivers/net/ethernet/i825xx/lib82596.c4
-rw-r--r--drivers/net/ethernet/i825xx/sun3_82586.c4
-rw-r--r--drivers/net/ethernet/ibm/ehea/ehea_main.c2
-rw-r--r--drivers/net/ethernet/ibm/emac/core.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c4
-rw-r--r--drivers/net/ethernet/intel/e100.c2
-rw-r--r--drivers/net/ethernet/intel/e1000/e1000_main.c4
-rw-r--r--drivers/net/ethernet/intel/e1000e/netdev.c19
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/fm10k/fm10k_netdev.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c12
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_xsk.c8
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c16
-rw-r--r--drivers/net/ethernet/intel/ice/ice_common.c104
-rw-r--r--drivers/net/ethernet/intel/ice/ice_dcb_lib.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_devids.h18
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c67
-rw-r--r--drivers/net/ethernet/intel/ice/ice_hw_autogen.h9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c254
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.h8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c66
-rw-r--r--drivers/net/ethernet/intel/ice/ice_nvm.c12
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c28
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.h6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c485
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h4
-rw-r--r--drivers/net/ethernet/intel/ice/ice_xsk.c7
-rw-r--r--drivers/net/ethernet/intel/igb/igb.h1
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ethtool.c12
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c51
-rw-r--r--drivers/net/ethernet/intel/igbvf/netdev.c2
-rw-r--r--drivers/net/ethernet/intel/igc/Makefile2
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h47
-rw-r--r--drivers/net/ethernet/intel/igc/igc_defines.h97
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ethtool.c38
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c2829
-rw-r--r--drivers/net/ethernet/intel/igc/igc_ptp.c716
-rw-r--r--drivers/net/ethernet/intel/igc/igc_regs.h36
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ixgb/ixgb_main.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c9
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c12
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ethtool.c4
-rw-r--r--drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c2
-rw-r--r--drivers/net/ethernet/jme.c2
-rw-r--r--drivers/net/ethernet/korina.c2
-rw-r--r--drivers/net/ethernet/lantiq_etop.c2
-rw-r--r--drivers/net/ethernet/marvell/mv643xx_eth.c6
-rw-r--r--drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c53
-rw-r--r--drivers/net/ethernet/marvell/pxa168_eth.c2
-rw-r--r--drivers/net/ethernet/marvell/skge.c2
-rw-r--r--drivers/net/ethernet/marvell/sky2.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_ethtool.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_netdev.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/alloc.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/fs.h16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/health.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h22
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_fs.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c33
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_stats.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c64
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c117
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c6
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c74
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c16
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c135
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c244
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c79
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c23
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h86
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c96
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h27
-rw-r--r--drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h45
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.c53
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c25
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c226
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c634
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/trap.h4
-rw-r--r--drivers/net/ethernet/micrel/ks8842.c2
-rw-r--r--drivers/net/ethernet/micrel/ksz884x.c2
-rw-r--r--drivers/net/ethernet/microchip/enc28j60.c2
-rw-r--r--drivers/net/ethernet/microchip/encx24j600.c2
-rw-r--r--drivers/net/ethernet/microchip/lan743x_ptp.c3
-rw-r--r--drivers/net/ethernet/mscc/ocelot.c7
-rw-r--r--drivers/net/ethernet/mscc/ocelot.h7
-rw-r--r--drivers/net/ethernet/mscc/ocelot_board.c4
-rw-r--r--drivers/net/ethernet/myricom/myri10ge/myri10ge.c8
-rw-r--r--drivers/net/ethernet/natsemi/natsemi.c4
-rw-r--r--drivers/net/ethernet/natsemi/ns83820.c4
-rw-r--r--drivers/net/ethernet/natsemi/sonic.c2
-rw-r--r--drivers/net/ethernet/natsemi/sonic.h2
-rw-r--r--drivers/net/ethernet/neterion/s2io.c2
-rw-r--r--drivers/net/ethernet/neterion/s2io.h2
-rw-r--r--drivers/net/ethernet/neterion/vxge/vxge-main.c2
-rw-r--r--drivers/net/ethernet/netronome/Kconfig1
-rw-r--r--drivers/net/ethernet/netronome/nfp/abm/cls.c14
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c10
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/offload.c2
-rw-r--r--drivers/net/ethernet/netronome/nfp/ccm.h1
-rw-r--r--drivers/net/ethernet/netronome/nfp/crypto/crypto.h15
-rw-r--r--drivers/net/ethernet/netronome/nfp/crypto/fw.h8
-rw-r--r--drivers/net/ethernet/netronome/nfp/crypto/tls.c89
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/action.c65
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.c11
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/cmsg.h106
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/main.h40
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/match.c260
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/metadata.c12
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/offload.c144
-rw-r--r--drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c498
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net.h6
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_common.c35
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c48
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h25
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c116
-rw-r--r--drivers/net/ethernet/nvidia/forcedeth.c2
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c2
-rw-r--r--drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c2
-rw-r--r--drivers/net/ethernet/packetengines/hamachi.c4
-rw-r--r--drivers/net/ethernet/packetengines/yellowfin.c4
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic.h21
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c113
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.c58
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_dev.h7
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_if.h97
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c249
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.h1
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_main.c6
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_stats.c1
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_txrx.c23
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c4
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_sp_commands.c10
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede.h2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_filter.c2
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c4
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_ptp.c1
-rw-r--r--drivers/net/ethernet/qlogic/qla3xxx.c10
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c2
-rw-r--r--drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c4
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac.c2
-rw-r--r--drivers/net/ethernet/qualcomm/qca_spi.c2
-rw-r--r--drivers/net/ethernet/qualcomm/qca_uart.c2
-rw-r--r--drivers/net/ethernet/rdc/r6040.c2
-rw-r--r--drivers/net/ethernet/realtek/8139cp.c2
-rw-r--r--drivers/net/ethernet/realtek/8139too.c4
-rw-r--r--drivers/net/ethernet/realtek/Makefile2
-rw-r--r--drivers/net/ethernet/realtek/atp.c4
-rw-r--r--drivers/net/ethernet/realtek/r8169.h78
-rw-r--r--drivers/net/ethernet/realtek/r8169_firmware.c2
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c1461
-rw-r--r--drivers/net/ethernet/realtek/r8169_phy_config.c1307
-rw-r--r--drivers/net/ethernet/renesas/ravb_main.c2
-rw-r--r--drivers/net/ethernet/renesas/sh_eth.c2
-rw-r--r--drivers/net/ethernet/rocker/rocker_main.c4
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c2
-rw-r--r--drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c4
-rw-r--r--drivers/net/ethernet/seeq/ether3.c4
-rw-r--r--drivers/net/ethernet/seeq/sgiseeq.c2
-rw-r--r--drivers/net/ethernet/sfc/Kconfig2
-rw-r--r--drivers/net/ethernet/sfc/Makefile9
-rw-r--r--drivers/net/ethernet/sfc/ef10.c426
-rw-r--r--drivers/net/ethernet/sfc/ef10_sriov.c5
-rw-r--r--drivers/net/ethernet/sfc/efx.c2503
-rw-r--r--drivers/net/ethernet/sfc/efx.h65
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.c1234
-rw-r--r--drivers/net/ethernet/sfc/efx_channels.h55
-rw-r--r--drivers/net/ethernet/sfc/efx_common.c1102
-rw-r--r--drivers/net/ethernet/sfc/efx_common.h73
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c446
-rw-r--r--drivers/net/ethernet/sfc/ethtool_common.c456
-rw-r--r--drivers/net/ethernet/sfc/ethtool_common.h30
-rw-r--r--drivers/net/ethernet/sfc/falcon/efx.c2
-rw-r--r--drivers/net/ethernet/sfc/farch.c1
-rw-r--r--drivers/net/ethernet/sfc/mcdi.h3
-rw-r--r--drivers/net/ethernet/sfc/mcdi_functions.c386
-rw-r--r--drivers/net/ethernet/sfc/mcdi_functions.h32
-rw-r--r--drivers/net/ethernet/sfc/mcdi_port.c558
-rw-r--r--drivers/net/ethernet/sfc/mcdi_port_common.c568
-rw-r--r--drivers/net/ethernet/sfc/mcdi_port_common.h57
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h24
-rw-r--r--drivers/net/ethernet/sfc/nic.h7
-rw-r--r--drivers/net/ethernet/sfc/rx.c592
-rw-r--r--drivers/net/ethernet/sfc/rx_common.c851
-rw-r--r--drivers/net/ethernet/sfc/rx_common.h97
-rw-r--r--drivers/net/ethernet/sfc/selftest.c9
-rw-r--r--drivers/net/ethernet/sfc/selftest.h2
-rw-r--r--drivers/net/ethernet/sfc/siena.c2
-rw-r--r--drivers/net/ethernet/sfc/siena_sriov.c1
-rw-r--r--drivers/net/ethernet/sfc/tx.c398
-rw-r--r--drivers/net/ethernet/sfc/tx_common.c404
-rw-r--r--drivers/net/ethernet/sfc/tx_common.h36
-rw-r--r--drivers/net/ethernet/sgi/ioc3-eth.c4
-rw-r--r--drivers/net/ethernet/sgi/meth.c4
-rw-r--r--drivers/net/ethernet/silan/sc92031.c2
-rw-r--r--drivers/net/ethernet/sis/sis190.c2
-rw-r--r--drivers/net/ethernet/sis/sis900.c4
-rw-r--r--drivers/net/ethernet/smsc/epic100.c11
-rw-r--r--drivers/net/ethernet/smsc/smc911x.c4
-rw-r--r--drivers/net/ethernet/smsc/smc9194.c4
-rw-r--r--drivers/net/ethernet/smsc/smc91c92_cs.c4
-rw-r--r--drivers/net/ethernet/smsc/smc91x.c2
-rw-r--r--drivers/net/ethernet/socionext/netsec.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/common.h10
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c24
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c89
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c14
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c27
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4.h12
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c47
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac5.c119
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac5.h24
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h6
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c22
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h29
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c77
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c31
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/hwif.h20
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/mmc_core.c16
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac.h1
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c177
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c5
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c2
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c4
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c137
-rw-r--r--drivers/net/ethernet/sun/cassini.c2
-rw-r--r--drivers/net/ethernet/sun/niu.c2
-rw-r--r--drivers/net/ethernet/sun/sunbmac.c2
-rw-r--r--drivers/net/ethernet/sun/sungem.c2
-rw-r--r--drivers/net/ethernet/sun/sunhme.c2
-rw-r--r--drivers/net/ethernet/sun/sunqe.c2
-rw-r--r--drivers/net/ethernet/sun/sunvnet_common.c11
-rw-r--r--drivers/net/ethernet/sun/sunvnet_common.h2
-rw-r--r--drivers/net/ethernet/synopsys/dwc-xlgmac-net.c2
-rw-r--r--drivers/net/ethernet/ti/Kconfig1
-rw-r--r--drivers/net/ethernet/ti/Makefile1
-rw-r--r--drivers/net/ethernet/ti/cpmac.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw_ethtool.c6
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.c2
-rw-r--r--drivers/net/ethernet/ti/cpsw_priv.h2
-rw-r--r--drivers/net/ethernet/ti/davinci_cpdma.c5
-rw-r--r--drivers/net/ethernet/ti/davinci_emac.c2
-rw-r--r--drivers/net/ethernet/ti/netcp_core.c2
-rw-r--r--drivers/net/ethernet/ti/netcp_ethss.c40
-rw-r--r--drivers/net/ethernet/ti/tlan.c6
-rw-r--r--drivers/net/ethernet/toshiba/ps3_gelic_net.c2
-rw-r--r--drivers/net/ethernet/toshiba/ps3_gelic_net.h2
-rw-r--r--drivers/net/ethernet/toshiba/spider_net.c2
-rw-r--r--drivers/net/ethernet/toshiba/tc35815.c4
-rw-r--r--drivers/net/ethernet/via/via-rhine.c4
-rw-r--r--drivers/net/ethernet/via/via-velocity.c14
-rw-r--r--drivers/net/ethernet/via/via-velocity.h1
-rw-r--r--drivers/net/ethernet/wiznet/w5100.c2
-rw-r--r--drivers/net/ethernet/wiznet/w5300.c2
-rw-r--r--drivers/net/ethernet/xilinx/xilinx_emaclite.c2
-rw-r--r--drivers/net/ethernet/xircom/xirc2ps_cs.c4
-rw-r--r--drivers/net/ethernet/xscale/Kconfig14
-rw-r--r--drivers/net/ethernet/xscale/Makefile3
-rw-r--r--drivers/net/ethernet/xscale/ixp46x_ts.h (renamed from arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h)0
-rw-r--r--drivers/net/ethernet/xscale/ixp4xx_eth.c213
-rw-r--r--drivers/net/ethernet/xscale/ptp_ixp46x.c (renamed from drivers/ptp/ptp_ixp46x.c)3
-rw-r--r--drivers/net/fjes/fjes_ethtool.c2
-rw-r--r--drivers/net/fjes/fjes_main.c7
-rw-r--r--drivers/net/geneve.c2
-rw-r--r--drivers/net/gtp.c125
-rw-r--r--drivers/net/hamradio/6pack.c4
-rw-r--r--drivers/net/hamradio/hdlcdrv.c2
-rw-r--r--drivers/net/hamradio/mkiss.c4
-rw-r--r--drivers/net/hyperv/hyperv_net.h3
-rw-r--r--drivers/net/hyperv/netvsc_drv.c6
-rw-r--r--drivers/net/hyperv/rndis_filter.c16
-rw-r--r--drivers/net/macvlan.c6
-rw-r--r--drivers/net/netdevsim/dev.c2
-rw-r--r--drivers/net/netdevsim/fib.c5
-rw-r--r--drivers/net/phy/Kconfig6
-rw-r--r--drivers/net/phy/Makefile3
-rw-r--r--drivers/net/phy/aquantia_main.c9
-rw-r--r--drivers/net/phy/bcm84881.c269
-rw-r--r--drivers/net/phy/dp83640.c217
-rw-r--r--drivers/net/phy/dp83867.c62
-rw-r--r--drivers/net/phy/dp83869.c2
-rw-r--r--drivers/net/phy/fixed_phy.c11
-rw-r--r--drivers/net/phy/lxt.c24
-rw-r--r--drivers/net/phy/marvell.c209
-rw-r--r--drivers/net/phy/marvell10g.c13
-rw-r--r--drivers/net/phy/mdio-i2c.c28
-rw-r--r--drivers/net/phy/mii_timestamper.c125
-rw-r--r--drivers/net/phy/mscc.c6
-rw-r--r--drivers/net/phy/phy.c4
-rw-r--r--drivers/net/phy/phy_device.c111
-rw-r--r--drivers/net/phy/phylink.c339
-rw-r--r--drivers/net/phy/realtek.c59
-rw-r--r--drivers/net/phy/sfp-bus.c124
-rw-r--r--drivers/net/phy/sfp.c199
-rw-r--r--drivers/net/phy/sfp.h2
-rw-r--r--drivers/net/phy/uPD60620.c7
-rw-r--r--drivers/net/ppp/ppp_async.c18
-rw-r--r--drivers/net/ppp/ppp_generic.c2
-rw-r--r--drivers/net/slip/slip.c2
-rw-r--r--drivers/net/tap.c14
-rw-r--r--drivers/net/usb/catc.c2
-rw-r--r--drivers/net/usb/ch9200.c24
-rw-r--r--drivers/net/usb/hso.c2
-rw-r--r--drivers/net/usb/ipheth.c2
-rw-r--r--drivers/net/usb/kaweth.c2
-rw-r--r--drivers/net/usb/lan78xx.c14
-rw-r--r--drivers/net/usb/pegasus.c2
-rw-r--r--drivers/net/usb/r8152.c14
-rw-r--r--drivers/net/usb/rtl8150.c2
-rw-r--r--drivers/net/usb/sierra_net.c2
-rw-r--r--drivers/net/usb/usbnet.c4
-rw-r--r--drivers/net/vmxnet3/vmxnet3_drv.c2
-rw-r--r--drivers/net/vmxnet3/vmxnet3_ethtool.c4
-rw-r--r--drivers/net/vxlan.c29
-rw-r--r--drivers/net/wan/Kconfig3
-rw-r--r--drivers/net/wan/cosa.c4
-rw-r--r--drivers/net/wan/farsync.c2
-rw-r--r--drivers/net/wan/fsl_ucc_hdlc.c16
-rw-r--r--drivers/net/wan/hdlc_cisco.c4
-rw-r--r--drivers/net/wan/ixp4xx_hss.c39
-rw-r--r--drivers/net/wan/lmc/lmc_main.c4
-rw-r--r--drivers/net/wan/sdla.c2
-rw-r--r--drivers/net/wan/x25_asy.c2
-rw-r--r--drivers/net/wimax/i2400m/netdev.c2
-rw-r--r--drivers/net/wireguard/Makefile18
-rw-r--r--drivers/net/wireguard/allowedips.c376
-rw-r--r--drivers/net/wireguard/allowedips.h59
-rw-r--r--drivers/net/wireguard/cookie.c236
-rw-r--r--drivers/net/wireguard/cookie.h59
-rw-r--r--drivers/net/wireguard/device.c458
-rw-r--r--drivers/net/wireguard/device.h65
-rw-r--r--drivers/net/wireguard/main.c63
-rw-r--r--drivers/net/wireguard/messages.h128
-rw-r--r--drivers/net/wireguard/netlink.c642
-rw-r--r--drivers/net/wireguard/netlink.h12
-rw-r--r--drivers/net/wireguard/noise.c828
-rw-r--r--drivers/net/wireguard/noise.h137
-rw-r--r--drivers/net/wireguard/peer.c240
-rw-r--r--drivers/net/wireguard/peer.h83
-rw-r--r--drivers/net/wireguard/peerlookup.c221
-rw-r--r--drivers/net/wireguard/peerlookup.h64
-rw-r--r--drivers/net/wireguard/queueing.c53
-rw-r--r--drivers/net/wireguard/queueing.h194
-rw-r--r--drivers/net/wireguard/ratelimiter.c223
-rw-r--r--drivers/net/wireguard/ratelimiter.h19
-rw-r--r--drivers/net/wireguard/receive.c595
-rw-r--r--drivers/net/wireguard/selftest/allowedips.c683
-rw-r--r--drivers/net/wireguard/selftest/counter.c104
-rw-r--r--drivers/net/wireguard/selftest/ratelimiter.c226
-rw-r--r--drivers/net/wireguard/send.c413
-rw-r--r--drivers/net/wireguard/socket.c438
-rw-r--r--drivers/net/wireguard/socket.h44
-rw-r--r--drivers/net/wireguard/timers.c243
-rw-r--r--drivers/net/wireguard/timers.h31
-rw-r--r--drivers/net/wireguard/version.h1
-rw-r--r--drivers/net/wireless/ath/ath10k/mac.c1
-rw-r--r--drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c2
-rw-r--r--drivers/net/wireless/ath/wil6210/ethtool.c43
-rw-r--r--drivers/net/wireless/intel/ipw2x00/ipw2100.c2
-rw-r--r--drivers/net/wireless/intel/iwlwifi/mvm/tx.c9
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/drv.c12
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c25
-rw-r--r--drivers/net/wireless/intel/iwlwifi/pcie/trans.c30
-rw-r--r--drivers/net/wireless/intersil/hostap/hostap_main.c2
-rw-r--r--drivers/net/wireless/intersil/orinoco/main.c2
-rw-r--r--drivers/net/wireless/intersil/orinoco/orinoco.h2
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_eth.c2
-rw-r--r--drivers/net/wireless/intersil/prism54/islpci_eth.h2
-rw-r--r--drivers/net/wireless/marvell/libertas/debugfs.c2
-rw-r--r--drivers/net/wireless/marvell/mwifiex/main.c2
-rw-r--r--drivers/net/wireless/marvell/mwifiex/sta_ioctl.c13
-rw-r--r--drivers/net/wireless/marvell/mwifiex/tdls.c70
-rw-r--r--drivers/net/wireless/marvell/mwifiex/util.h4
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c5
-rw-r--r--drivers/net/wireless/quantenna/qtnfmac/core.c2
-rw-r--r--drivers/net/wireless/wl3501_cs.c2
-rw-r--r--drivers/net/wireless/zydas/zd1201.c2
-rw-r--r--drivers/net/xen-netback/interface.c34
-rw-r--r--drivers/net/xen-netback/netback.c20
-rw-r--r--drivers/net/xen-netback/xenbus.c349
-rw-r--r--drivers/nfc/nxp-nci/i2c.c2
-rw-r--r--drivers/nfc/pn544/pn544.c2
-rw-r--r--drivers/nfc/port100.c2
-rw-r--r--drivers/nfc/s3fwrn5/firmware.c5
-rw-r--r--drivers/nvme/host/core.c6
-rw-r--r--drivers/nvme/host/fc.c40
-rw-r--r--drivers/nvme/host/nvme.h6
-rw-r--r--drivers/nvme/host/pci.c23
-rw-r--r--drivers/nvme/host/rdma.c10
-rw-r--r--drivers/nvme/target/fcloop.c1
-rw-r--r--drivers/nvme/target/loop.c8
-rw-r--r--drivers/of/of_mdio.c33
-rw-r--r--drivers/of/platform.c6
-rw-r--r--drivers/pci/controller/pcie-rockchip-host.c4
-rw-r--r--drivers/perf/arm_smmuv3_pmu.c4
-rw-r--r--drivers/phy/marvell/phy-mvebu-cp110-comphy.c20
-rw-r--r--drivers/pinctrl/Kconfig1
-rw-r--r--drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c24
-rw-r--r--drivers/pinctrl/intel/pinctrl-baytrail.c200
-rw-r--r--drivers/pinctrl/intel/pinctrl-cherryview.c107
-rw-r--r--drivers/pinctrl/pinctrl-ingenic.c2
-rw-r--r--drivers/pinctrl/pinmux.c2
-rw-r--r--drivers/platform/mellanox/mlxbf-bootctl.c2
-rw-r--r--drivers/platform/mips/Kconfig2
-rw-r--r--drivers/platform/x86/hp-wmi.c2
-rw-r--r--drivers/platform/x86/pcengines-apuv2.c63
-rw-r--r--drivers/platform/x86/pmc_atom.c8
-rw-r--r--drivers/ptp/Kconfig26
-rw-r--r--drivers/ptp/Makefile4
-rw-r--r--drivers/ptp/idt8a340_reg.h2
-rw-r--r--drivers/ptp/ptp_clock.c37
-rw-r--r--drivers/ptp/ptp_clockmatrix.c79
-rw-r--r--drivers/ptp/ptp_ines.c852
-rw-r--r--drivers/ptp/ptp_private.h2
-rw-r--r--drivers/ptp/ptp_qoriq.c15
-rw-r--r--drivers/regulator/axp20x-regulator.c11
-rw-r--r--drivers/regulator/bd70528-regulator.c1
-rw-r--r--drivers/regulator/core.c16
-rw-r--r--drivers/regulator/max77650-regulator.c7
-rw-r--r--drivers/regulator/rn5t618-regulator.c1
-rw-r--r--drivers/regulator/s5m8767.c2
-rw-r--r--drivers/reset/core.c10
-rw-r--r--drivers/reset/reset-brcmstb.c6
-rw-r--r--drivers/rtc/rtc-mc146818-lib.c15
-rw-r--r--drivers/rtc/rtc-mt6397.c39
-rw-r--r--drivers/rtc/rtc-sun6i.c16
-rw-r--r--drivers/s390/block/dasd_eckd.c28
-rw-r--r--drivers/s390/block/dasd_fba.h2
-rw-r--r--drivers/s390/block/dasd_proc.c2
-rw-r--r--drivers/s390/cio/device_ops.c2
-rw-r--r--drivers/s390/net/qeth_core.h73
-rw-r--r--drivers/s390/net/qeth_core_main.c411
-rw-r--r--drivers/s390/net/qeth_core_mpc.h36
-rw-r--r--drivers/s390/net/qeth_core_sys.c2
-rw-r--r--drivers/s390/net/qeth_l2_main.c51
-rw-r--r--drivers/s390/net/qeth_l2_sys.c3
-rw-r--r--drivers/s390/net/qeth_l3.h6
-rw-r--r--drivers/s390/net/qeth_l3_main.c200
-rw-r--r--drivers/s390/net/qeth_l3_sys.c212
-rw-r--r--drivers/scsi/aacraid/aachba.c4
-rw-r--r--drivers/scsi/be2iscsi/be_cmds.h2
-rw-r--r--drivers/scsi/cxgbi/libcxgbi.c5
-rw-r--r--drivers/scsi/libiscsi.c4
-rw-r--r--drivers/scsi/libsas/sas_discover.c11
-rw-r--r--drivers/scsi/lpfc/lpfc_bsg.c15
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.c3
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c2
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c10
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c1
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c1
-rw-r--r--drivers/scsi/qla2xxx/qla_bsg.c2
-rw-r--r--drivers/scsi/qla2xxx/qla_def.h1
-rw-r--r--drivers/scsi/qla2xxx/qla_fw.h4
-rw-r--r--drivers/scsi/qla2xxx/qla_init.c21
-rw-r--r--drivers/scsi/qla2xxx/qla_iocb.c31
-rw-r--r--drivers/scsi/qla2xxx/qla_isr.c4
-rw-r--r--drivers/scsi/qla2xxx/qla_mbx.c3
-rw-r--r--drivers/scsi/qla2xxx/qla_nvme.c1
-rw-r--r--drivers/scsi/qla2xxx/qla_sup.c35
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c4
-rw-r--r--drivers/scsi/qla2xxx/tcm_qla2xxx.c3
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c1
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c7
-rw-r--r--drivers/scsi/smartpqi/smartpqi_init.c6
-rw-r--r--drivers/scsi/ufs/cdns-pltfrm.c6
-rw-r--r--drivers/scsi/ufs/ufs_bsg.c2
-rw-r--r--drivers/soc/Kconfig1
-rw-r--r--drivers/soc/Makefile1
-rw-r--r--drivers/soc/sifive/Kconfig10
-rw-r--r--drivers/soc/sifive/Makefile3
-rw-r--r--drivers/soc/sifive/sifive_l2_cache.c (renamed from arch/riscv/mm/sifive_l2_cache.c)0
-rw-r--r--drivers/spi/spi-cadence.c6
-rw-r--r--drivers/spi/spi-cavium-thunderx.c2
-rw-r--r--drivers/spi/spi-dw.c20
-rw-r--r--drivers/spi/spi-dw.h1
-rw-r--r--drivers/spi/spi-fsl-dspi.c24
-rw-r--r--drivers/spi/spi-fsl-spi.c22
-rw-r--r--drivers/spi/spi-nxp-fspi.c2
-rw-r--r--drivers/spi/spi-pxa2xx.c4
-rw-r--r--drivers/spi/spi-sprd.c2
-rw-r--r--drivers/spi/spi-ti-qspi.c6
-rw-r--r--drivers/spi/spi-uniphier.c31
-rw-r--r--drivers/spi/spi.c22
-rw-r--r--drivers/staging/axis-fifo/Kconfig2
-rw-r--r--drivers/staging/comedi/drivers/gsc_hpdi.c10
-rw-r--r--drivers/staging/exfat/exfat.h4
-rw-r--r--drivers/staging/exfat/exfat_core.c10
-rw-r--r--drivers/staging/exfat/exfat_super.c4
-rw-r--r--drivers/staging/fbtft/fb_uc1611.c12
-rw-r--r--drivers/staging/fbtft/fb_watterott.c13
-rw-r--r--drivers/staging/fbtft/fbtft-core.c6
-rw-r--r--drivers/staging/hp/Kconfig1
-rw-r--r--drivers/staging/isdn/gigaset/usb-gigaset.c23
-rw-r--r--drivers/staging/ks7010/ks_wlan_net.c4
-rw-r--r--drivers/staging/media/ipu3/include/intel-ipu3.h2
-rw-r--r--drivers/staging/octeon/Kconfig1
-rw-r--r--drivers/staging/qlge/qlge_ethtool.c2
-rw-r--r--drivers/staging/qlge/qlge_main.c2
-rw-r--r--drivers/staging/rtl8188eu/os_dep/usb_intf.c2
-rw-r--r--drivers/staging/rtl8192e/rtl8192e/rtl_core.c2
-rw-r--r--drivers/staging/rtl8192u/r8192U_core.c2
-rw-r--r--drivers/staging/rtl8712/usb_intf.c2
-rw-r--r--drivers/staging/unisys/visornic/visornic_main.c2
-rw-r--r--drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c2
-rw-r--r--drivers/staging/wfx/data_tx.c37
-rw-r--r--drivers/staging/wfx/data_tx.h5
-rw-r--r--drivers/staging/wfx/hif_tx_mib.h1
-rw-r--r--drivers/staging/wfx/main.c2
-rw-r--r--drivers/staging/wfx/queue.c1
-rw-r--r--drivers/staging/wfx/sta.c6
-rw-r--r--drivers/staging/wlan-ng/Kconfig1
-rw-r--r--drivers/staging/wlan-ng/p80211netdev.c4
-rw-r--r--drivers/target/iscsi/cxgbit/cxgbit_main.c2
-rw-r--r--drivers/target/target_core_iblock.c4
-rw-r--r--drivers/thermal/Kconfig2
-rw-r--r--drivers/tty/n_gsm.c2
-rw-r--r--drivers/tty/serial/atmel_serial.c43
-rw-r--r--drivers/tty/serial/msm_serial.c13
-rw-r--r--drivers/tty/serial/serial_core.c1
-rw-r--r--drivers/tty/serial/sprd_serial.c3
-rw-r--r--drivers/tty/synclink.c2
-rw-r--r--drivers/tty/synclink_gt.c2
-rw-r--r--drivers/tty/synclinkmp.c2
-rw-r--r--drivers/tty/tty_port.c3
-rw-r--r--drivers/usb/atm/ueagle-atm.c18
-rw-r--r--drivers/usb/atm/usbatm.c2
-rw-r--r--drivers/usb/common/usb-conn-gpio.c3
-rw-r--r--drivers/usb/core/hcd.c42
-rw-r--r--drivers/usb/core/urb.c1
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c6
-rw-r--r--drivers/usb/dwc3/ep0.c8
-rw-r--r--drivers/usb/dwc3/gadget.c5
-rw-r--r--drivers/usb/gadget/function/f_ecm.c6
-rw-r--r--drivers/usb/gadget/function/f_fs.c2
-rw-r--r--drivers/usb/gadget/function/f_rndis.c1
-rw-r--r--drivers/usb/host/ehci-q.c13
-rw-r--r--drivers/usb/host/xhci-hub.c22
-rw-r--r--drivers/usb/host/xhci-mem.c4
-rw-r--r--drivers/usb/host/xhci-pci.c13
-rw-r--r--drivers/usb/host/xhci-ring.c6
-rw-r--r--drivers/usb/host/xhci.c9
-rw-r--r--drivers/usb/host/xhci.h1
-rw-r--r--drivers/usb/misc/adutux.c2
-rw-r--r--drivers/usb/misc/idmouse.c2
-rw-r--r--drivers/usb/mon/mon_bin.c32
-rw-r--r--drivers/usb/roles/class.c2
-rw-r--r--drivers/usb/serial/io_edgeport.c10
-rw-r--r--drivers/usb/storage/scsiglue.c3
-rw-r--r--drivers/usb/typec/class.c6
-rw-r--r--drivers/usb/typec/tcpm/Kconfig1
-rw-r--r--drivers/usb/usbip/usbip_common.c3
-rw-r--r--drivers/usb/usbip/vhci_rx.c13
-rw-r--r--drivers/virtio/virtio_balloon.c36
-rw-r--r--drivers/watchdog/Kconfig2
-rw-r--r--drivers/watchdog/imx7ulp_wdt.c2
-rw-r--r--drivers/watchdog/orion_wdt.c4
-rw-r--r--drivers/watchdog/rn5t618_wdt.c1
-rw-r--r--drivers/watchdog/w83627hf_wdt.c2
-rw-r--r--drivers/xen/balloon.c3
-rw-r--r--drivers/xen/grant-table.c4
-rw-r--r--drivers/xen/xenbus/xenbus.h2
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c35
-rw-r--r--drivers/xen/xenbus/xenbus_probe_backend.c1
-rw-r--r--drivers/xen/xenbus/xenbus_probe_frontend.c24
-rw-r--r--fs/afs/dynroot.c3
-rw-r--r--fs/afs/mntpt.c6
-rw-r--r--fs/afs/proc.c7
-rw-r--r--fs/afs/server.c21
-rw-r--r--fs/afs/super.c2
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/compression.c7
-rw-r--r--fs/btrfs/ctree.c2
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/btrfs/extent-tree.c27
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/file-item.c7
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/inode.c18
-rw-r--r--fs/btrfs/ioctl.c26
-rw-r--r--fs/btrfs/qgroup.c4
-rw-r--r--fs/btrfs/relocation.c1
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/tests/free-space-tree-tests.c4
-rw-r--r--fs/btrfs/tests/qgroup-tests.c4
-rw-r--r--fs/btrfs/tree-checker.c20
-rw-r--r--fs/btrfs/tree-log.c52
-rw-r--r--fs/btrfs/uuid-tree.c2
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/buffer.c25
-rw-r--r--fs/ceph/caps.c41
-rw-r--r--fs/ceph/debugfs.c13
-rw-r--r--fs/ceph/mds_client.c8
-rw-r--r--fs/ceph/mds_client.h9
-rw-r--r--fs/ceph/mdsmap.c12
-rw-r--r--fs/ceph/super.c28
-rw-r--r--fs/ceph/super.h16
-rw-r--r--fs/cifs/cifsglob.h3
-rw-r--r--fs/cifs/cifssmb.c3
-rw-r--r--fs/cifs/readdir.c63
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2inode.c1
-rw-r--r--fs/cifs/smb2ops.c19
-rw-r--r--fs/cifs/smb2pdu.c2
-rw-r--r--fs/cifs/smb2proto.h2
-rw-r--r--fs/crypto/keyring.c2
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/erofs/xattr.c2
-rw-r--r--fs/ext4/block_validity.c6
-rw-r--r--fs/ext4/dir.c6
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode-test.c2
-rw-r--r--fs/ext4/inode.c4
-rw-r--r--fs/ext4/namei.c36
-rw-r--r--fs/ext4/super.c143
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c7
-rw-r--r--fs/io-wq.c46
-rw-r--r--fs/io-wq.h15
-rw-r--r--fs/io_uring.c1294
-rw-r--r--fs/locks.c2
-rw-r--r--fs/namespace.c12
-rw-r--r--fs/notify/fsnotify.c4
-rw-r--r--fs/nsfs.c3
-rw-r--r--fs/ocfs2/dlmglue.c1
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/overlayfs/copy_up.c53
-rw-r--r--fs/overlayfs/dir.c2
-rw-r--r--fs/overlayfs/export.c80
-rw-r--r--fs/overlayfs/inode.c8
-rw-r--r--fs/overlayfs/namei.c52
-rw-r--r--fs/overlayfs/overlayfs.h34
-rw-r--r--fs/overlayfs/ovl_entry.h2
-rw-r--r--fs/overlayfs/super.c24
-rw-r--r--fs/pipe.c38
-rw-r--r--fs/posix_acl.c7
-rw-r--r--fs/proc/stat.c4
-rw-r--r--fs/pstore/ram.c13
-rw-r--r--fs/quota/dquot.c1
-rw-r--r--fs/super.c4
-rw-r--r--fs/verity/enable.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c18
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c5
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c21
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h29
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c6
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c64
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c96
-rw-r--r--fs/xfs/xfs_bmap_util.c12
-rw-r--r--fs/xfs/xfs_buf_item.c2
-rw-r--r--fs/xfs/xfs_mount.c168
-rw-r--r--fs/xfs/xfs_trace.h21
-rw-r--r--include/linux/ahci_platform.h2
-rw-r--r--include/linux/bio.h1
-rw-r--r--include/linux/blk-cgroup.h2
-rw-r--r--include/linux/bpf-cgroup.h12
-rw-r--r--include/linux/bpf.h83
-rw-r--r--include/linux/can/dev.h34
-rw-r--r--include/linux/cpufreq.h11
-rw-r--r--include/linux/devfreq.h14
-rw-r--r--include/linux/device.h4
-rw-r--r--include/linux/dmaengine.h5
-rw-r--r--include/linux/dsa/sja1105.h6
-rw-r--r--include/linux/efi.h10
-rw-r--r--include/linux/ethtool_netlink.h17
-rw-r--r--include/linux/filter.h53
-rw-r--r--include/linux/fsl/enetc_mdio.h55
-rw-r--r--include/linux/fsl/ptp_qoriq.h1
-rw-r--r--include/linux/ftrace.h5
-rw-r--r--include/linux/i2c.h12
-rw-r--r--include/linux/if_ether.h8
-rw-r--r--include/linux/initrd.h2
-rw-r--r--include/linux/jbd2.h2
-rw-r--r--include/linux/kasan.h15
-rw-r--r--include/linux/kernel.h9
-rw-r--r--include/linux/kvm_host.h2
-rw-r--r--include/linux/libata.h1
-rw-r--r--include/linux/memory_hotplug.h7
-rw-r--r--include/linux/mfd/mt6397/rtc.h8
-rw-r--r--include/linux/mii.h50
-rw-r--r--include/linux/mii_timestamper.h121
-rw-r--r--include/linux/mlx4/device.h2
-rw-r--r--include/linux/mlx5/driver.h2
-rw-r--r--include/linux/mm.h3
-rw-r--r--include/linux/mod_devicetable.h4
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/netdevice.h24
-rw-r--r--include/linux/nvme-fc-driver.h4
-rw-r--r--include/linux/of_mdio.h6
-rw-r--r--include/linux/phy.h101
-rw-r--r--include/linux/phy_led_triggers.h2
-rw-r--r--include/linux/phylink.h2
-rw-r--r--include/linux/platform_data/eth_ixp4xx.h19
-rw-r--r--include/linux/platform_data/ti-sysc.h1
-rw-r--r--include/linux/platform_data/wan_ixp4xx_hss.h17
-rw-r--r--include/linux/posix-clock.h19
-rw-r--r--include/linux/printk.h3
-rw-r--r--include/linux/ptp_clock_kernel.h9
-rw-r--r--include/linux/rculist_nulls.h37
-rw-r--r--include/linux/sched/cpufreq.h3
-rw-r--r--include/linux/sfp.h95
-rw-r--r--include/linux/skbuff.h11
-rw-r--r--include/linux/spi/spi.h4
-rw-r--r--include/linux/stmmac.h13
-rw-r--r--include/linux/sxgbe_platform.h2
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/usb/usbnet.h2
-rw-r--r--include/net/addrconf.h8
-rw-r--r--include/net/af_unix.h5
-rw-r--r--include/net/af_vsock.h2
-rw-r--r--include/net/devlink.h8
-rw-r--r--include/net/dsa.h19
-rw-r--r--include/net/dsfield.h2
-rw-r--r--include/net/dst.h15
-rw-r--r--include/net/dst_ops.h3
-rw-r--r--include/net/garp.h2
-rw-r--r--include/net/inet_hashtables.h12
-rw-r--r--include/net/ip6_fib.h1
-rw-r--r--include/net/ip_tunnels.h6
-rw-r--r--include/net/mptcp.h85
-rw-r--r--include/net/mrp.h2
-rw-r--r--include/net/neighbour.h1
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h2
-rw-r--r--include/net/netfilter/nf_flow_table.h6
-rw-r--r--include/net/netfilter/nf_tables_core.h2
-rw-r--r--include/net/netlink.h8
-rw-r--r--include/net/netns/ipv4.h1
-rw-r--r--include/net/pkt_cls.h36
-rw-r--r--include/net/sch_generic.h5
-rw-r--r--include/net/sock.h38
-rw-r--r--include/net/tcp.h55
-rw-r--r--include/net/tls.h9
-rw-r--r--include/net/x25.h3
-rw-r--r--include/net/xdp_sock.h25
-rw-r--r--include/rdma/ib_verbs.h5
-rw-r--r--include/soc/mscc/ocelot.h2
-rw-r--r--include/soc/mscc/ocelot_ana.h (renamed from drivers/net/ethernet/mscc/ocelot_ana.h)0
-rw-r--r--include/soc/mscc/ocelot_dev.h (renamed from drivers/net/ethernet/mscc/ocelot_dev.h)0
-rw-r--r--include/soc/mscc/ocelot_qsys.h (renamed from drivers/net/ethernet/mscc/ocelot_qsys.h)0
-rw-r--r--include/sound/soc.h1
-rw-r--r--include/trace/events/preemptirq.h8
-rw-r--r--include/trace/events/sctp.h9
-rw-r--r--include/trace/events/sock.h5
-rw-r--r--include/uapi/linux/audit.h1
-rw-r--r--include/uapi/linux/bpf.h10
-rw-r--r--include/uapi/linux/btf.h3
-rw-r--r--include/uapi/linux/ethtool.h5
-rw-r--r--include/uapi/linux/ethtool_netlink.h204
-rw-r--r--include/uapi/linux/if_bonding.h10
-rw-r--r--include/uapi/linux/if_bridge.h10
-rw-r--r--include/uapi/linux/if_link.h1
-rw-r--r--include/uapi/linux/in.h2
-rw-r--r--include/uapi/linux/io_uring.h40
-rw-r--r--include/uapi/linux/kcov.h10
-rw-r--r--include/uapi/linux/mii.h12
-rw-r--r--include/uapi/linux/net_tstamp.h8
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h4
-rw-r--r--include/uapi/linux/netfilter/xt_sctp.h6
-rw-r--r--include/uapi/linux/nl80211.h5
-rw-r--r--include/uapi/linux/openvswitch.h31
-rw-r--r--include/uapi/linux/pkt_sched.h17
-rw-r--r--include/uapi/linux/tcp.h5
-rw-r--r--include/uapi/linux/tipc_netlink.h2
-rw-r--r--include/uapi/linux/vm_sockets.h8
-rw-r--r--include/uapi/linux/wireguard.h196
-rw-r--r--include/xen/interface/io/ring.h29
-rw-r--r--include/xen/xenbus.h1
-rw-r--r--init/Kconfig7
-rw-r--r--init/do_mounts.c33
-rw-r--r--init/do_mounts_initrd.c11
-rw-r--r--init/main.c17
-rw-r--r--ipc/util.c2
-rw-r--r--kernel/bpf/Makefile1
-rw-r--r--kernel/bpf/btf.c1
-rw-r--r--kernel/bpf/cgroup.c110
-rw-r--r--kernel/bpf/core.c23
-rw-r--r--kernel/bpf/cpumap.c76
-rw-r--r--kernel/bpf/devmap.c78
-rw-r--r--kernel/bpf/dispatcher.c158
-rw-r--r--kernel/bpf/local_storage.c28
-rw-r--r--kernel/bpf/syscall.c63
-rw-r--r--kernel/bpf/trampoline.c88
-rw-r--r--kernel/bpf/verifier.c76
-rw-r--r--kernel/bpf/xskmap.c18
-rw-r--r--kernel/cgroup/cgroup.c5
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/exit.c12
-rw-r--r--kernel/locking/mutex.c4
-rw-r--r--kernel/locking/spinlock_debug.c32
-rw-r--r--kernel/module.c6
-rw-r--r--kernel/sched/cpufreq.c18
-rw-r--r--kernel/sched/cpufreq_schedutil.c8
-rw-r--r--kernel/sched/fair.c13
-rw-r--r--kernel/sched/psi.c5
-rw-r--r--kernel/seccomp.c7
-rw-r--r--kernel/taskstats.c30
-rw-r--r--kernel/time/posix-clock.c31
-rw-r--r--kernel/trace/fgraph.c23
-rw-r--r--kernel/trace/ftrace.c25
-rw-r--r--kernel/trace/ring_buffer.c2
-rw-r--r--kernel/trace/trace.c14
-rw-r--r--kernel/trace/trace_events.c8
-rw-r--r--kernel/trace/trace_events_filter.c2
-rw-r--r--kernel/trace/trace_events_hist.c21
-rw-r--r--kernel/trace/trace_events_inject.c4
-rw-r--r--kernel/trace/trace_sched_wakeup.c4
-rw-r--r--kernel/trace/trace_seq.c2
-rw-r--r--kernel/trace/trace_stack.c5
-rw-r--r--kernel/trace/tracing_map.c4
-rw-r--r--kernel/workqueue.c4
-rw-r--r--lib/Kconfig.debug100
-rw-r--r--lib/iov_iter.c3
-rw-r--r--lib/raid6/unroll.awk2
-rw-r--r--lib/sbitmap.c2
-rw-r--r--mm/gup_benchmark.c8
-rw-r--r--mm/hugetlb.c51
-rw-r--r--mm/kasan/common.c36
-rw-r--r--mm/memory.c136
-rw-r--r--mm/memory_hotplug.c31
-rw-r--r--mm/memremap.c2
-rw-r--r--mm/migrate.c23
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/oom_kill.c2
-rw-r--r--mm/vmalloc.c129
-rw-r--r--mm/vmscan.c2
-rw-r--r--mm/zsmalloc.c5
-rw-r--r--net/802/mrp.c6
-rw-r--r--net/8021q/vlan.h1
-rw-r--r--net/8021q/vlan_dev.c7
-rw-r--r--net/8021q/vlan_netlink.c19
-rw-r--r--net/Kconfig15
-rw-r--r--net/Makefile2
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/batman-adv/main.c2
-rw-r--r--net/bluetooth/bnep/netdev.c2
-rw-r--r--net/bpf/test_run.c54
-rw-r--r--net/bridge/br.c2
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/br_netlink.c13
-rw-r--r--net/bridge/br_nf_core.c3
-rw-r--r--net/bridge/br_private.h2
-rw-r--r--net/bridge/br_stp.c15
-rw-r--r--net/bridge/br_stp_bpdu.c4
-rw-r--r--net/bridge/netfilter/ebtables.c33
-rw-r--r--net/can/j1939/socket.c10
-rw-r--r--net/core/Makefile2
-rw-r--r--net/core/dev.c23
-rw-r--r--net/core/dev_ioctl.c1
-rw-r--r--net/core/devlink.c62
-rw-r--r--net/core/filter.c272
-rw-r--r--net/core/flow_dissector.c12
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/net-sysfs.c7
-rw-r--r--net/core/page_pool.c89
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/core/skbuff.c52
-rw-r--r--net/core/sock.c2
-rw-r--r--net/core/sysctl_net_core.c2
-rw-r--r--net/core/timestamping.c20
-rw-r--r--net/core/xdp.c4
-rw-r--r--net/dccp/proto.c2
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/decnet/dn_route.c6
-rw-r--r--net/dsa/Kconfig6
-rw-r--r--net/dsa/Makefile1
-rw-r--r--net/dsa/dsa2.c34
-rw-r--r--net/dsa/dsa_priv.h19
-rw-r--r--net/dsa/master.c30
-rw-r--r--net/dsa/port.c39
-rw-r--r--net/dsa/slave.c41
-rw-r--r--net/dsa/tag_ar9331.c96
-rw-r--r--net/dsa/tag_ksz.c8
-rw-r--r--net/dsa/tag_sja1105.c18
-rw-r--r--net/ethtool/Makefile8
-rw-r--r--net/ethtool/bitset.c735
-rw-r--r--net/ethtool/bitset.h28
-rw-r--r--net/ethtool/common.c227
-rw-r--r--net/ethtool/common.h29
-rw-r--r--net/ethtool/ioctl.c (renamed from net/core/ethtool.c)166
-rw-r--r--net/ethtool/linkinfo.c167
-rw-r--r--net/ethtool/linkmodes.c375
-rw-r--r--net/ethtool/linkstate.c74
-rw-r--r--net/ethtool/netlink.c696
-rw-r--r--net/ethtool/netlink.h341
-rw-r--r--net/ethtool/strset.c426
-rw-r--r--net/hsr/hsr_debugfs.c52
-rw-r--r--net/hsr/hsr_device.c28
-rw-r--r--net/hsr/hsr_framereg.c73
-rw-r--r--net/hsr/hsr_framereg.h6
-rw-r--r--net/hsr/hsr_main.c7
-rw-r--r--net/hsr/hsr_main.h22
-rw-r--r--net/hsr/hsr_netlink.c1
-rw-r--r--net/ipv4/fib_trie.c121
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/inet_connection_sock.c16
-rw-r--r--net/ipv4/inet_diag.c3
-rw-r--r--net/ipv4/inet_hashtables.c16
-rw-r--r--net/ipv4/ip_gre.c4
-rw-r--r--net/ipv4/ip_tunnel.c2
-rw-r--r--net/ipv4/ip_vti.c6
-rw-r--r--net/ipv4/netfilter/arp_tables.c27
-rw-r--r--net/ipv4/route.c9
-rw-r--r--net/ipv4/sysctl_net_ipv4.c9
-rw-r--r--net/ipv4/tcp.c18
-rw-r--r--net/ipv4/tcp_bbr.c3
-rw-r--r--net/ipv4/tcp_cubic.c83
-rw-r--r--net/ipv4/tcp_input.c18
-rw-r--r--net/ipv4/tcp_ipv4.c136
-rw-r--r--net/ipv4/tcp_metrics.c13
-rw-r--r--net/ipv4/tcp_output.c32
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv4/xfrm4_policy.c5
-rw-r--r--net/ipv6/addrconf.c8
-rw-r--r--net/ipv6/inet6_connection_sock.c2
-rw-r--r--net/ipv6/ip6_fib.c108
-rw-r--r--net/ipv6/ip6_gre.c6
-rw-r--r--net/ipv6/ip6_tunnel.c4
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/route.c108
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c111
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/ipv6/xfrm6_policy.c5
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/l2tp/l2tp_core.c2
-rw-r--r--net/llc/llc_station.c4
-rw-r--r--net/mac80211/airtime.c2
-rw-r--r--net/mac80211/debugfs_sta.c76
-rw-r--r--net/mac80211/main.c4
-rw-r--r--net/mac80211/sta_info.c3
-rw-r--r--net/mac80211/sta_info.h1
-rw-r--r--net/mac80211/tx.c13
-rw-r--r--net/ncsi/internal.h20
-rw-r--r--net/ncsi/ncsi-cmd.c10
-rw-r--r--net/ncsi/ncsi-manage.c72
-rw-r--r--net/ncsi/ncsi-rsp.c6
-rw-r--r--net/netfilter/ipset/ip_set_core.c3
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_core.c8
-rw-r--r--net/netfilter/nf_conntrack_extend.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c3
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c3
-rw-r--r--net/netfilter/nf_flow_table_core.c7
-rw-r--r--net/netfilter/nf_flow_table_ip.c4
-rw-r--r--net/netfilter/nf_flow_table_offload.c119
-rw-r--r--net/netfilter/nf_queue.c2
-rw-r--r--net/netfilter/nf_tables_api.c30
-rw-r--r--net/netfilter/nf_tables_offload.c6
-rw-r--r--net/netfilter/nfnetlink_cthelper.c2
-rw-r--r--net/netfilter/nft_bitwise.c4
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_ct.c12
-rw-r--r--net/netfilter/nft_flow_offload.c3
-rw-r--r--net/netfilter/nft_masq.c2
-rw-r--r--net/netfilter/nft_meta.c440
-rw-r--r--net/netfilter/nft_nat.c6
-rw-r--r--net/netfilter/nft_range.c10
-rw-r--r--net/netfilter/nft_redir.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c21
-rw-r--r--net/netfilter/nft_tproxy.c8
-rw-r--r--net/netfilter/nft_tunnel.c52
-rw-r--r--net/netfilter/xt_RATEEST.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/nfc/nci/uart.c2
-rw-r--r--net/openvswitch/actions.c30
-rw-r--r--net/openvswitch/datapath.c2
-rw-r--r--net/openvswitch/flow.h4
-rw-r--r--net/openvswitch/flow_netlink.c34
-rw-r--r--net/packet/af_packet.c31
-rw-r--r--net/phonet/pn_dev.c2
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rfkill/core.c7
-rw-r--r--net/rose/rose_route.c1
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h10
-rw-r--r--net/rxrpc/call_accept.c60
-rw-r--r--net/rxrpc/conn_event.c16
-rw-r--r--net/rxrpc/conn_service.c4
-rw-r--r--net/rxrpc/input.c18
-rw-r--r--net/rxrpc/rxkad.c5
-rw-r--r--net/rxrpc/security.c70
-rw-r--r--net/sched/Kconfig17
-rw-r--r--net/sched/Makefile1
-rw-r--r--net/sched/act_ct.c4
-rw-r--r--net/sched/act_mirred.c22
-rw-r--r--net/sched/cls_api.c31
-rw-r--r--net/sched/cls_flower.c14
-rw-r--r--net/sched/sch_cake.c61
-rw-r--r--net/sched/sch_ets.c828
-rw-r--r--net/sched/sch_fq.c23
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_prio.c10
-rw-r--r--net/sctp/associola.c10
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/endpointola.c6
-rw-r--r--net/sctp/input.c5
-rw-r--r--net/sctp/output.c2
-rw-r--r--net/sctp/outqueue.c13
-rw-r--r--net/sctp/protocol.c5
-rw-r--r--net/sctp/sm_make_chunk.c7
-rw-r--r--net/sctp/sm_sideeffect.c44
-rw-r--r--net/sctp/sm_statefuns.c20
-rw-r--r--net/sctp/socket.c12
-rw-r--r--net/sctp/stream.c25
-rw-r--r--net/sctp/stream_interleave.c23
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/sctp/ulpqueue.c15
-rw-r--r--net/smc/af_smc.c14
-rw-r--r--net/smc/smc_core.c5
-rw-r--r--net/socket.c17
-rw-r--r--net/tipc/Makefile4
-rw-r--r--net/tipc/bcast.c35
-rw-r--r--net/tipc/bearer.c11
-rw-r--r--net/tipc/bearer.h6
-rw-r--r--net/tipc/discover.c6
-rw-r--r--net/tipc/eth_media.c3
-rw-r--r--net/tipc/ib_media.c5
-rw-r--r--net/tipc/link.c199
-rw-r--r--net/tipc/link.h9
-rw-r--r--net/tipc/name_table.c279
-rw-r--r--net/tipc/net.c56
-rw-r--r--net/tipc/net.h1
-rw-r--r--net/tipc/netlink.c6
-rw-r--r--net/tipc/netlink_compat.c4
-rw-r--r--net/tipc/node.c16
-rw-r--r--net/tipc/socket.c89
-rw-r--r--net/tipc/udp_media.c3
-rw-r--r--net/tls/tls_device.c5
-rw-r--r--net/unix/af_unix.c58
-rw-r--r--net/vmw_vsock/Kconfig12
-rw-r--r--net/vmw_vsock/Makefile1
-rw-r--r--net/vmw_vsock/af_vsock.c45
-rw-r--r--net/vmw_vsock/virtio_transport.c61
-rw-r--r--net/vmw_vsock/virtio_transport_common.c28
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/vmw_vsock/vsock_loopback.c180
-rw-r--r--net/wireless/core.c1
-rw-r--r--net/wireless/nl80211.c3
-rw-r--r--net/x25/af_x25.c8
-rw-r--r--net/x25/x25_in.c32
-rw-r--r--net/xdp/xsk.c101
-rw-r--r--net/xdp/xsk_queue.c15
-rw-r--r--net/xdp/xsk_queue.h371
-rw-r--r--samples/bpf/Makefile5
-rw-r--r--samples/bpf/syscall_tp_kern.c18
-rw-r--r--samples/bpf/trace_event_user.c4
-rw-r--r--samples/bpf/xdp1_user.c5
-rw-r--r--samples/bpf/xdp_adjust_tail_user.c5
-rw-r--r--samples/bpf/xdp_fwd_user.c17
-rw-r--r--samples/bpf/xdp_redirect_cpu_user.c63
-rw-r--r--samples/bpf/xdp_redirect_map_user.c5
-rw-r--r--samples/bpf/xdp_redirect_user.c5
-rw-r--r--samples/bpf/xdp_router_ipv4_user.c3
-rw-r--r--samples/bpf/xdp_rxq_info_user.c4
-rw-r--r--samples/bpf/xdp_sample_pkts_user.c12
-rw-r--r--samples/bpf/xdp_tx_iptunnel_user.c5
-rw-r--r--samples/bpf/xdpsock_user.c431
-rw-r--r--samples/seccomp/user-trap.c4
-rw-r--r--samples/trace_printk/trace-printk.c1
-rwxr-xr-xscripts/checkpatch.pl9
-rw-r--r--scripts/gcc-plugins/Kconfig9
-rw-r--r--scripts/kallsyms.c38
-rw-r--r--scripts/kconfig/expr.c7
-rwxr-xr-xscripts/mkcompile_h10
-rwxr-xr-xscripts/package/mkdebian4
-rw-r--r--security/apparmor/apparmorfs.c2
-rw-r--r--security/apparmor/domain.c82
-rw-r--r--security/apparmor/file.c12
-rw-r--r--security/apparmor/mount.c2
-rw-r--r--security/apparmor/policy.c4
-rw-r--r--security/integrity/ima/ima_policy.c4
-rw-r--r--security/keys/Kconfig4
-rw-r--r--security/keys/Makefile2
-rw-r--r--security/keys/compat.c5
-rw-r--r--security/keys/internal.h4
-rw-r--r--security/keys/trusted-keys/trusted_tpm2.c1
-rw-r--r--security/tomoyo/common.c9
-rw-r--r--security/tomoyo/domain.c15
-rw-r--r--security/tomoyo/group.c9
-rw-r--r--security/tomoyo/realpath.c32
-rw-r--r--security/tomoyo/util.c6
-rw-r--r--sound/core/pcm_native.c4
-rw-r--r--sound/firewire/fireface/ff-pcm.c2
-rw-r--r--sound/firewire/motu/motu-pcm.c8
-rw-r--r--sound/firewire/oxfw/oxfw-pcm.c2
-rw-r--r--sound/hda/hdac_stream.c4
-rw-r--r--sound/pci/echoaudio/echoaudio_dsp.c20
-rw-r--r--sound/pci/hda/hda_controller.c2
-rw-r--r--sound/pci/hda/hda_intel.c22
-rw-r--r--sound/pci/hda/patch_ca0132.c23
-rw-r--r--sound/pci/hda/patch_hdmi.c2
-rw-r--r--sound/pci/hda/patch_realtek.c52
-rw-r--r--sound/pci/ice1712/ice1724.c9
-rw-r--r--sound/soc/amd/acp-da7219-max98357a.c46
-rw-r--r--sound/soc/codecs/hdmi-codec.c2
-rw-r--r--sound/soc/codecs/max98090.c30
-rw-r--r--sound/soc/codecs/max98090.h1
-rw-r--r--sound/soc/codecs/rt5677-spi.h16
-rw-r--r--sound/soc/codecs/rt5682.c2
-rw-r--r--sound/soc/codecs/wm8904.c6
-rw-r--r--sound/soc/codecs/wm8962.c4
-rw-r--r--sound/soc/generic/simple-card.c6
-rw-r--r--sound/soc/intel/atom/sst/sst.c1
-rw-r--r--sound/soc/intel/boards/bytcr_rt5640.c8
-rw-r--r--sound/soc/intel/common/soc-acpi-intel-cml-match.c41
-rw-r--r--sound/soc/soc-compress.c6
-rw-r--r--sound/soc/soc-core.c13
-rw-r--r--sound/soc/soc-pcm.c11
-rw-r--r--sound/soc/soc-topology.c27
-rw-r--r--sound/soc/sof/intel/byt.c25
-rw-r--r--sound/soc/sof/loader.c7
-rw-r--r--sound/soc/sof/topology.c4
-rw-r--r--sound/usb/card.h1
-rw-r--r--sound/usb/pcm.c25
-rw-r--r--sound/usb/quirks-table.h3
-rw-r--r--sound/usb/quirks.c11
-rw-r--r--sound/usb/usbaudio.h3
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h3
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h3
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h3
-rw-r--r--tools/arch/x86/include/asm/msr-index.h18
-rw-r--r--tools/arch/x86/lib/memcpy_64.S20
-rw-r--r--tools/arch/x86/lib/memset_64.S16
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst305
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst12
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst18
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst3
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool156
-rw-r--r--tools/bpf/bpftool/cgroup.c56
-rw-r--r--tools/bpf/bpftool/gen.c609
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/main.h5
-rw-r--r--tools/bpf/bpftool/map.c384
-rw-r--r--tools/bpf/bpftool/net.c1
-rw-r--r--tools/bpf/bpftool/prog.c390
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c2
-rw-r--r--tools/include/uapi/asm/bpf_perf_event.h2
-rw-r--r--tools/include/uapi/drm/drm.h3
-rw-r--r--tools/include/uapi/drm/i915_drm.h128
-rw-r--r--tools/include/uapi/linux/bpf.h10
-rw-r--r--tools/include/uapi/linux/btf.h7
-rw-r--r--tools/include/uapi/linux/fscrypt.h3
-rw-r--r--tools/include/uapi/linux/in.h2
-rw-r--r--tools/include/uapi/linux/kvm.h12
-rw-r--r--tools/include/uapi/linux/sched.h60
-rw-r--r--tools/include/uapi/linux/stat.h2
-rw-r--r--tools/lib/bpf/Makefile29
-rw-r--r--tools/lib/bpf/bpf.c17
-rw-r--r--tools/lib/bpf/bpf.h17
-rw-r--r--tools/lib/bpf/bpf_helpers.h11
-rw-r--r--tools/lib/bpf/btf.c48
-rw-r--r--tools/lib/bpf/btf.h29
-rw-r--r--tools/lib/bpf/btf_dump.c115
-rw-r--r--tools/lib/bpf/libbpf.c1741
-rw-r--r--tools/lib/bpf/libbpf.h107
-rw-r--r--tools/lib/bpf/libbpf.map16
-rw-r--r--tools/lib/bpf/libbpf.pc.template2
-rw-r--r--tools/lib/bpf/libbpf_common.h40
-rw-r--r--tools/lib/bpf/libbpf_internal.h21
-rw-r--r--tools/lib/traceevent/Makefile11
-rw-r--r--tools/lib/traceevent/plugins/Makefile5
-rw-r--r--tools/perf/Documentation/perf-kvm.txt5
-rw-r--r--tools/perf/arch/arm/tests/regs_load.S4
-rw-r--r--tools/perf/arch/arm64/tests/regs_load.S4
-rw-r--r--tools/perf/arch/x86/tests/regs_load.S8
-rw-r--r--tools/perf/builtin-inject.c13
-rw-r--r--tools/perf/builtin-record.c2
-rw-r--r--tools/perf/builtin-report.c8
-rw-r--r--tools/perf/builtin-stat.c288
-rw-r--r--tools/perf/builtin-top.c10
-rwxr-xr-xtools/perf/check-headers.sh4
-rw-r--r--tools/perf/lib/cpumap.c73
-rw-r--r--tools/perf/lib/evlist.c1
-rw-r--r--tools/perf/lib/evsel.c76
-rw-r--r--tools/perf/lib/include/internal/evlist.h1
-rw-r--r--tools/perf/lib/include/perf/cpumap.h2
-rw-r--r--tools/perf/lib/include/perf/evsel.h3
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z13/extended.json2
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z14/extended.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json2
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/builtin-test.c9
-rw-r--r--tools/perf/tests/cpumap.c16
-rw-r--r--tools/perf/tests/event-times.c4
-rw-r--r--tools/perf/tests/genelf.c51
-rw-r--r--tools/perf/tests/tests.h2
-rw-r--r--tools/perf/trace/beauty/clone.c1
-rw-r--r--tools/perf/util/cpumap.h1
-rw-r--r--tools/perf/util/evlist.c113
-rw-r--r--tools/perf/util/evlist.h11
-rw-r--r--tools/perf/util/evsel.c35
-rw-r--r--tools/perf/util/evsel.h9
-rw-r--r--tools/perf/util/genelf.c46
-rw-r--r--tools/perf/util/header.c23
-rw-r--r--tools/perf/util/include/linux/linkage.h89
-rw-r--r--tools/perf/util/machine.c1
-rw-r--r--tools/perf/util/metricgroup.c7
-rw-r--r--tools/perf/util/sort.c16
-rw-r--r--tools/perf/util/stat.c5
-rw-r--r--tools/perf/util/stat.h3
-rwxr-xr-xtools/testing/kunit/kunit.py18
-rw-r--r--tools/testing/kunit/kunit_kernel.py10
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py10
-rw-r--r--tools/testing/nvdimm/Kbuild1
-rw-r--r--tools/testing/nvdimm/test/iomap.c6
-rw-r--r--tools/testing/selftests/bpf/.gitignore4
-rw-r--r--tools/testing/selftests/bpf/Makefile87
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c161
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c111
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c285
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c148
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_extern.c169
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpu_mask.c78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c69
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rdonly_maps.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c (renamed from tools/testing/selftests/bpf/test_select_reuseport.c)514
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c63
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_perf.c25
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c3
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h39
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c34
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_extern.c62
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c46
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_attach.c571
-rw-r--r--tools/testing/selftests/bpf/test_cpp.cpp10
-rwxr-xr-xtools/testing/selftests/bpf/test_ftrace.sh39
-rw-r--r--tools/testing/selftests/bpf/test_progs.h4
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c43
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/runtime_jit.c151
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh176
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh46
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh67
-rw-r--r--tools/testing/selftests/filesystems/epoll/Makefile2
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh6
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions5
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc6
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc4
-rwxr-xr-xtools/testing/selftests/kselftest/module.sh2
-rwxr-xr-xtools/testing/selftests/kselftest/prefix.pl1
-rw-r--r--tools/testing/selftests/kselftest/runner.sh1
-rw-r--r--tools/testing/selftests/livepatch/functions.sh15
-rwxr-xr-xtools/testing/selftests/livepatch/test-state.sh3
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh466
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh41
-rwxr-xr-xtools/testing/selftests/net/forwarding/loopback.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh189
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh44
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_core.sh300
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh227
-rw-r--r--tools/testing/selftests/net/nettest.c84
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh6
-rw-r--r--tools/testing/selftests/net/so_txtime.c84
-rwxr-xr-xtools/testing/selftests/net/so_txtime.sh9
-rw-r--r--tools/testing/selftests/net/tls.c28
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh39
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh332
-rw-r--r--tools/testing/selftests/rseq/param_test.c18
-rw-r--r--tools/testing/selftests/rseq/rseq.h12
-rw-r--r--tools/testing/selftests/rseq/settings1
-rw-r--r--tools/testing/selftests/safesetid/Makefile5
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c15
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c15
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/basic.json2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json22
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/u32.json205
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json940
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh6
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py19
-rw-r--r--tools/testing/selftests/tpm2/tpm2_tests.py13
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh534
-rw-r--r--tools/testing/selftests/wireguard/qemu/.gitignore2
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile387
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/arm.config9
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/armeb.config10
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/i686.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/m68k.config9
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips.config11
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64.config14
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64el.config15
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mipsel.config12
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc.config10
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config12
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/x86_64.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config67
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c285
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config88
-rw-r--r--tools/testing/vsock/.gitignore1
-rw-r--r--tools/testing/vsock/Makefile9
-rw-r--r--tools/testing/vsock/README3
-rw-r--r--tools/testing/vsock/control.c15
-rw-r--r--tools/testing/vsock/control.h2
-rw-r--r--tools/testing/vsock/timeout.h1
-rw-r--r--tools/testing/vsock/util.c375
-rw-r--r--tools/testing/vsock/util.h49
-rw-r--r--tools/testing/vsock/vsock_diag_test.c202
-rw-r--r--tools/testing/vsock/vsock_test.c379
-rwxr-xr-xusr/gen_initramfs_list.sh2
-rw-r--r--usr/include/Makefile2
-rw-r--r--virt/kvm/arm/arm.c4
-rw-r--r--virt/kvm/arm/mmu.c30
-rw-r--r--virt/kvm/arm/vgic/vgic-init.c20
2103 files changed, 63203 insertions, 23506 deletions
diff --git a/.mailmap b/.mailmap
index c24773db04a7..a7bc8cabd157 100644
--- a/.mailmap
+++ b/.mailmap
@@ -152,6 +152,7 @@ Linus Lüssing <[email protected]> <[email protected]>
Maciej W. Rozycki <[email protected]> <[email protected]>
Marcin Nowakowski <[email protected]> <[email protected]>
@@ -265,6 +266,7 @@ Vinod Koul <[email protected]> <[email protected]>
@@ -276,3 +278,5 @@ Gustavo Padovan <[email protected]>
Gustavo Padovan <[email protected]>
diff --git a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
index c65a80574869..401d202f478b 100644
--- a/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
+++ b/Documentation/ABI/testing/sysfs-platform-mellanox-bootctl
@@ -1,4 +1,4 @@
-What: /sys/bus/platform/devices/MLNXBF04:00/driver/lifecycle_state
+What: /sys/bus/platform/devices/MLNXBF04:00/lifecycle_state
Date: Oct 2019
KernelVersion: 5.5
Contact: "Liming Sun <[email protected]>"
@@ -10,7 +10,7 @@ Description:
GA Non-Secured - Non-Secure chip and not able to change state
RMA - Return Merchandise Authorization
-What: /sys/bus/platform/devices/MLNXBF04:00/driver/post_reset_wdog
+What: /sys/bus/platform/devices/MLNXBF04:00/post_reset_wdog
Date: Oct 2019
KernelVersion: 5.5
Contact: "Liming Sun <[email protected]>"
@@ -19,7 +19,7 @@ Description:
to reboot the chip and recover it to the old state if the new
boot partition fails.
-What: /sys/bus/platform/devices/MLNXBF04:00/driver/reset_action
+What: /sys/bus/platform/devices/MLNXBF04:00/reset_action
Date: Oct 2019
KernelVersion: 5.5
Contact: "Liming Sun <[email protected]>"
@@ -30,7 +30,7 @@ Description:
emmc - boot from the onchip eMMC
emmc_legacy - boot from the onchip eMMC in legacy (slow) mode
-What: /sys/bus/platform/devices/MLNXBF04:00/driver/second_reset_action
+What: /sys/bus/platform/devices/MLNXBF04:00/second_reset_action
Date: Oct 2019
KernelVersion: 5.5
Contact: "Liming Sun <[email protected]>"
@@ -44,7 +44,7 @@ Description:
swap_emmc - swap the primary / secondary boot partition
none - cancel the action
-What: /sys/bus/platform/devices/MLNXBF04:00/driver/secure_boot_fuse_state
+What: /sys/bus/platform/devices/MLNXBF04:00/secure_boot_fuse_state
Date: Oct 2019
KernelVersion: 5.5
Contact: "Liming Sun <[email protected]>"
diff --git a/Documentation/admin-guide/device-mapper/dm-integrity.rst b/Documentation/admin-guide/device-mapper/dm-integrity.rst
index 594095b54b29..c00f9f11e3f3 100644
--- a/Documentation/admin-guide/device-mapper/dm-integrity.rst
+++ b/Documentation/admin-guide/device-mapper/dm-integrity.rst
@@ -144,7 +144,7 @@ journal_crypt:algorithm(:key) (the key is optional)
Encrypt the journal using given algorithm to make sure that the
attacker can't read the journal. You can use a block cipher here
(such as "cbc(aes)") or a stream cipher (for example "chacha20",
- "salsa20", "ctr(aes)" or "ecb(arc4)").
+ "salsa20" or "ctr(aes)").
The journal contains history of last writes to the block device,
an attacker reading the journal could see the last sector nubmers
diff --git a/Documentation/admin-guide/device-mapper/index.rst b/Documentation/admin-guide/device-mapper/index.rst
index 4872fb6d2952..ec62fcc8eece 100644
--- a/Documentation/admin-guide/device-mapper/index.rst
+++ b/Documentation/admin-guide/device-mapper/index.rst
@@ -8,6 +8,7 @@ Device Mapper
cache-policies
cache
delay
+ dm-clone
dm-crypt
dm-dust
dm-flakey
diff --git a/Documentation/admin-guide/ext4.rst b/Documentation/admin-guide/ext4.rst
index 059ddcbe769d..9bc93f0ce0c9 100644
--- a/Documentation/admin-guide/ext4.rst
+++ b/Documentation/admin-guide/ext4.rst
@@ -181,14 +181,17 @@ When mounting an ext4 filesystem, the following option are accepted:
system after its metadata has been committed to the journal.
commit=nrsec (*)
- Ext4 can be told to sync all its data and metadata every 'nrsec'
- seconds. The default value is 5 seconds. This means that if you lose
- your power, you will lose as much as the latest 5 seconds of work (your
- filesystem will not be damaged though, thanks to the journaling). This
- default value (or any low value) will hurt performance, but it's good
- for data-safety. Setting it to 0 will have the same effect as leaving
- it at the default (5 seconds). Setting it to very large values will
- improve performance.
+ This setting limits the maximum age of the running transaction to
+ 'nrsec' seconds. The default value is 5 seconds. This means that if
+ you lose your power, you will lose as much as the latest 5 seconds of
+ metadata changes (your filesystem will not be damaged though, thanks
+ to the journaling). This default value (or any low value) will hurt
+ performance, but it's good for data-safety. Setting it to 0 will have
+ the same effect as leaving it at the default (5 seconds). Setting it
+ to very large values will improve performance. Note that due to
+ delayed allocation even older data can be lost on power failure since
+ writeback of those data begins only after time set in
+ /proc/sys/vm/dirty_expire_centisecs.
barrier=<0|1(*)>, barrier(*), nobarrier
This enables/disables the use of write barriers in the jbd code.
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index fb5b39f73059..ad911be5b5e9 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -253,7 +253,7 @@ The following sysctls are available for the XFS filesystem:
pool.
fs.xfs.speculative_prealloc_lifetime
- (Units: seconds Min: 1 Default: 300 Max: 86400)
+ (Units: seconds Min: 1 Default: 300 Max: 86400)
The interval at which the background scanning for inodes
with unused speculative preallocation runs. The scan
removes unused preallocation from clean inodes and releases
diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst
index 36890b026e77..1c4e1825d769 100644
--- a/Documentation/dev-tools/kcov.rst
+++ b/Documentation/dev-tools/kcov.rst
@@ -251,11 +251,11 @@ selectively from different subsystems.
.. code-block:: c
struct kcov_remote_arg {
- unsigned trace_mode;
- unsigned area_size;
- unsigned num_handles;
- uint64_t common_handle;
- uint64_t handles[0];
+ __u32 trace_mode;
+ __u32 area_size;
+ __u32 num_handles;
+ __aligned_u64 common_handle;
+ __aligned_u64 handles[0];
};
#define KCOV_INIT_TRACE _IOR('c', 1, unsigned long)
diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index ecdfdc9d4b03..61ae13c44f91 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -203,12 +203,12 @@ Test Module
Kselftest tests the kernel from userspace. Sometimes things need
testing from within the kernel, one method of doing this is to create a
test module. We can tie the module into the kselftest framework by
-using a shell script test runner. ``kselftest_module.sh`` is designed
+using a shell script test runner. ``kselftest/module.sh`` is designed
to facilitate this process. There is also a header file provided to
assist writing kernel modules that are for use with kselftest:
- ``tools/testing/kselftest/kselftest_module.h``
-- ``tools/testing/kselftest/kselftest_module.sh``
+- ``tools/testing/kselftest/kselftest/module.sh``
How to use
----------
@@ -247,7 +247,7 @@ A bare bones test module might look like this:
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
- #include "../tools/testing/selftests/kselftest_module.h"
+ #include "../tools/testing/selftests/kselftest/module.h"
KSTM_MODULE_GLOBALS();
@@ -276,7 +276,7 @@ Example test script
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
- $(dirname $0)/../kselftest_module.sh "foo" test_foo
+ $(dirname $0)/../kselftest/module.sh "foo" test_foo
Test Harness
diff --git a/Documentation/dev-tools/kunit/index.rst b/Documentation/dev-tools/kunit/index.rst
index 26ffb46bdf99..c60d760a0eed 100644
--- a/Documentation/dev-tools/kunit/index.rst
+++ b/Documentation/dev-tools/kunit/index.rst
@@ -9,6 +9,7 @@ KUnit - Unit Testing for the Linux Kernel
start
usage
+ kunit-tool
api/index
faq
diff --git a/Documentation/dev-tools/kunit/kunit-tool.rst b/Documentation/dev-tools/kunit/kunit-tool.rst
new file mode 100644
index 000000000000..50d46394e97e
--- /dev/null
+++ b/Documentation/dev-tools/kunit/kunit-tool.rst
@@ -0,0 +1,57 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+kunit_tool How-To
+=================
+
+What is kunit_tool?
+===================
+
+kunit_tool is a script (``tools/testing/kunit/kunit.py``) that aids in building
+the Linux kernel as UML (`User Mode Linux
+<http://user-mode-linux.sourceforge.net/>`_), running KUnit tests, parsing
+the test results and displaying them in a user friendly manner.
+
+What is a kunitconfig?
+======================
+
+It's just a defconfig that kunit_tool looks for in the base directory.
+kunit_tool uses it to generate a .config as you might expect. In addition, it
+verifies that the generated .config contains the CONFIG options in the
+kunitconfig; the reason it does this is so that it is easy to be sure that a
+CONFIG that enables a test actually ends up in the .config.
+
+How do I use kunit_tool?
+========================
+
+If a kunitconfig is present at the root directory, all you have to do is:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run
+
+However, you most likely want to use it with the following options:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run --timeout=30 --jobs=`nproc --all`
+
+- ``--timeout`` sets a maximum amount of time to allow tests to run.
+- ``--jobs`` sets the number of threads to use to build the kernel.
+
+If you just want to use the defconfig that ships with the kernel, you can
+append the ``--defconfig`` flag as well:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run --timeout=30 --jobs=`nproc --all` --defconfig
+
+.. note::
+ This command is particularly helpful for getting started because it
+ just works. No kunitconfig needs to be present.
+
+For a list of all the flags supported by kunit_tool, you can run:
+
+.. code-block:: bash
+
+ ./tools/testing/kunit/kunit.py run --help
diff --git a/Documentation/dev-tools/kunit/start.rst b/Documentation/dev-tools/kunit/start.rst
index aeeddfafeea2..4e1d24db6b13 100644
--- a/Documentation/dev-tools/kunit/start.rst
+++ b/Documentation/dev-tools/kunit/start.rst
@@ -19,21 +19,21 @@ The wrapper can be run with:
.. code-block:: bash
- ./tools/testing/kunit/kunit.py run
+ ./tools/testing/kunit/kunit.py run --defconfig
-Creating a kunitconfig
-======================
-The Python script is a thin wrapper around Kbuild as such, it needs to be
-configured with a ``kunitconfig`` file. This file essentially contains the
+For more information on this wrapper (also called kunit_tool) checkout the
+:doc:`kunit-tool` page.
+
+Creating a .kunitconfig
+=======================
+The Python script is a thin wrapper around Kbuild. As such, it needs to be
+configured with a ``.kunitconfig`` file. This file essentially contains the
regular Kernel config, with the specific test targets as well.
.. code-block:: bash
- git clone -b master https://kunit.googlesource.com/kunitconfig $PATH_TO_KUNITCONFIG_REPO
cd $PATH_TO_LINUX_REPO
- ln -s $PATH_TO_KUNIT_CONFIG_REPO/kunitconfig kunitconfig
-
-You may want to add kunitconfig to your local gitignore.
+ cp arch/um/configs/kunit_defconfig .kunitconfig
Verifying KUnit Works
---------------------
@@ -59,8 +59,8 @@ If everything worked correctly, you should see the following:
followed by a list of tests that are run. All of them should be passing.
.. note::
- Because it is building a lot of sources for the first time, the ``Building
- kunit kernel`` step may take a while.
+ Because it is building a lot of sources for the first time, the
+ ``Building KUnit kernel`` step may take a while.
Writing your first test
=======================
@@ -148,7 +148,7 @@ and the following to ``drivers/misc/Makefile``:
obj-$(CONFIG_MISC_EXAMPLE_TEST) += example-test.o
-Now add it to your ``kunitconfig``:
+Now add it to your ``.kunitconfig``:
.. code-block:: none
@@ -159,7 +159,7 @@ Now you can run the test:
.. code-block:: bash
- ./tools/testing/kunit/kunit.py
+ ./tools/testing/kunit/kunit.py run
You should see the following failure:
diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst
index c6e69634e274..b9a065ab681e 100644
--- a/Documentation/dev-tools/kunit/usage.rst
+++ b/Documentation/dev-tools/kunit/usage.rst
@@ -16,7 +16,7 @@ Organization of this document
=============================
This document is organized into two main sections: Testing and Isolating
-Behavior. The first covers what a unit test is and how to use KUnit to write
+Behavior. The first covers what unit tests are and how to use KUnit to write
them. The second covers how to use KUnit to isolate code and make it possible
to unit test code that was otherwise un-unit-testable.
@@ -174,13 +174,13 @@ Test Suites
~~~~~~~~~~~
Now obviously one unit test isn't very helpful; the power comes from having
-many test cases covering all of your behaviors. Consequently it is common to
-have many *similar* tests; in order to reduce duplication in these closely
-related tests most unit testing frameworks provide the concept of a *test
-suite*, in KUnit we call it a *test suite*; all it is is just a collection of
-test cases for a unit of code with a set up function that gets invoked before
-every test cases and then a tear down function that gets invoked after every
-test case completes.
+many test cases covering all of a unit's behaviors. Consequently it is common
+to have many *similar* tests; in order to reduce duplication in these closely
+related tests most unit testing frameworks - including KUnit - provide the
+concept of a *test suite*. A *test suite* is just a collection of test cases
+for a unit of code with a set up function that gets invoked before every test
+case and then a tear down function that gets invoked after every test case
+completes.
Example:
@@ -211,7 +211,7 @@ KUnit test framework.
.. note::
A test case will only be run if it is associated with a test suite.
-For a more information on these types of things see the :doc:`api/test`.
+For more information on these types of things see the :doc:`api/test`.
Isolating Behavior
==================
@@ -338,7 +338,7 @@ We can easily test this code by *faking out* the underlying EEPROM:
return count;
}
- ssize_t fake_eeprom_write(struct eeprom *this, size_t offset, const char *buffer, size_t count)
+ ssize_t fake_eeprom_write(struct eeprom *parent, size_t offset, const char *buffer, size_t count)
{
struct fake_eeprom *this = container_of(parent, struct fake_eeprom, parent);
@@ -454,7 +454,7 @@ KUnit on non-UML architectures
By default KUnit uses UML as a way to provide dependencies for code under test.
Under most circumstances KUnit's usage of UML should be treated as an
implementation detail of how KUnit works under the hood. Nevertheless, there
-are instances where being able to run architecture specific code, or test
+are instances where being able to run architecture specific code or test
against real hardware is desirable. For these reasons KUnit supports running on
other architectures.
@@ -557,7 +557,7 @@ run your tests on your hardware setup just by compiling for your architecture.
.. important::
Always prefer tests that run on UML to tests that only run under a particular
architecture, and always prefer tests that run under QEMU or another easy
- (and monitarily free) to obtain software environment to a specific piece of
+ (and monetarily free) to obtain software environment to a specific piece of
hardware.
Nevertheless, there are still valid reasons to write an architecture or hardware
diff --git a/Documentation/devicetree/bindings/arm/sunxi.yaml b/Documentation/devicetree/bindings/arm/sunxi.yaml
index 8a1e38a1d7ab..cffe8bb0bad1 100644
--- a/Documentation/devicetree/bindings/arm/sunxi.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi.yaml
@@ -8,7 +8,7 @@ title: Allwinner platforms device tree bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
$nodename:
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
index d2a872286437..f0b3d30fbb76 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun50i-a64-de2.yaml
@@ -8,7 +8,7 @@ title: Allwinner A64 Display Engine Bus Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
$nodename:
diff --git a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
index be32f087c529..9fe11ceecdba 100644
--- a/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
+++ b/Documentation/devicetree/bindings/bus/allwinner,sun8i-a23-rsb.yaml
@@ -8,7 +8,7 @@ title: Allwinner A23 RSB Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#address-cells":
diff --git a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
index 64938fdaea55..4d382128b711 100644
--- a/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
+++ b/Documentation/devicetree/bindings/clock/allwinner,sun4i-a10-ccu.yaml
@@ -8,7 +8,7 @@ title: Allwinner Clock Control Unit Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#clock-cells":
diff --git a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
index 80b3e7350a73..33c7842917f6 100644
--- a/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
+++ b/Documentation/devicetree/bindings/crypto/allwinner,sun4i-a10-crypto.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Security System Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
index dafc0980c4fa..0f7074977c04 100644
--- a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
@@ -8,7 +8,7 @@ title: Allwinner A31 MIPI-DSI Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#address-cells": true
diff --git a/Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml b/Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml
index 0e7987f1cdb7..d67617f6f74a 100644
--- a/Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml
+++ b/Documentation/devicetree/bindings/display/panel/ronbo,rb070d30.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Ronbo RB070D30 DSI Display Panel
maintainers:
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml
index 15abc0f9429f..83808199657b 100644
--- a/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/allwinner,sun4i-a10-dma.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 DMA Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: "dma-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
index 387d599522c7..9e53472be194 100644
--- a/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/allwinner,sun50i-a64-dma.yaml
@@ -8,7 +8,7 @@ title: Allwinner A64 DMA Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: "dma-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml b/Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml
index 740b7f9b535b..c1676b96daac 100644
--- a/Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/allwinner,sun6i-a31-dma.yaml
@@ -8,7 +8,7 @@ title: Allwinner A31 DMA Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: "dma-controller.yaml#"
diff --git a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
index 9346ef6ba61b..6097e8ac46c1 100644
--- a/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
+++ b/Documentation/devicetree/bindings/i2c/allwinner,sun6i-a31-p2wi.yaml
@@ -8,7 +8,7 @@ title: Allwinner A31 P2WI (Push/Pull 2 Wires Interface) Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: /schemas/i2c/i2c-controller.yaml#
diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml
index b68be3aaf587..e1f6d64bdccd 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7292.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/iio/adc/adi,ad7292.yaml#
@@ -53,7 +53,8 @@ patternProperties:
description: |
The channel number. It can have up to 8 channels numbered from 0 to 7.
items:
- maximum: 7
+ - minimum: 0
+ maximum: 7
diff-channels:
description: see Documentation/devicetree/bindings/iio/adc/adc.txt
diff --git a/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml b/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
index d74962c0f5ae..15c514b83583 100644
--- a/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
@@ -8,7 +8,7 @@ title: Allwinner A33 Thermal Sensor Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#io-channel-cells":
diff --git a/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml b/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
index b3bd8ef7fbd6..5b3b71c9c018 100644
--- a/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
+++ b/Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 LRADC Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml
index 23a202d24e43..953d875b5e74 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun4i-a10-ic.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Interrupt Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: /schemas/interrupt-controller.yaml#
diff --git a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml
index 8cd08cfb25be..cf09055da78b 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/allwinner,sun7i-a20-sc-nmi.yaml
@@ -8,7 +8,7 @@ title: Allwinner A20 Non-Maskable Interrupt Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: /schemas/interrupt-controller.yaml#
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
index d3e423fcb6c2..0f6374ceaa69 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-csi.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 CMOS Sensor Interface (CSI) Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
description: |-
The Allwinner A10 and later has a CMOS Sensor Interface to retrieve
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml
index dea36d68cdbe..7838804700d6 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun4i-a10-ir.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Infrared Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: "rc.yaml#"
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml
index 30d9fb193d7f..22a94b6fdbde 100644
--- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra124-mc.yaml
@@ -60,7 +60,8 @@ patternProperties:
maximum: 1066000000
nvidia,emem-configuration:
- $ref: /schemas/types.yaml#/definitions/uint32-array
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
description: |
Values to be written to the EMEM register block. See section
"15.6.1 MC Registers" in the TRM.
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml
index 7fe0ca14e324..e4135bac6957 100644
--- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-emc.yaml
@@ -56,7 +56,8 @@ patternProperties:
maximum: 900000000
nvidia,emc-auto-cal-interval:
- $ref: /schemas/types.yaml#/definitions/uint32
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
description:
Pad calibration interval in microseconds.
minimum: 0
@@ -78,7 +79,8 @@ patternProperties:
Mode Register 0.
nvidia,emc-zcal-cnt-long:
- $ref: /schemas/types.yaml#/definitions/uint32
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
description:
Number of EMC clocks to wait before issuing any commands after
sending ZCAL_MRW_CMD.
@@ -96,7 +98,8 @@ patternProperties:
FBIO "read" FIFO periodic resetting enabled.
nvidia,emc-configuration:
- $ref: /schemas/types.yaml#/definitions/uint32-array
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
description:
EMC timing characterization data. These are the registers
(see section "18.13.2 EMC Registers" in the TRM) whose values
diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml
index 84fd57bcf0dc..4b9196c83291 100644
--- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra30-mc.yaml
@@ -77,7 +77,8 @@ patternProperties:
maximum: 900000000
nvidia,emem-configuration:
- $ref: /schemas/types.yaml#/definitions/uint32-array
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
description: |
Values to be written to the EMEM register block. See section
"18.13.1 MC Registers" in the TRM.
diff --git a/Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml b/Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml
index 4b1a09acb98b..39afacc447b2 100644
--- a/Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml
+++ b/Documentation/devicetree/bindings/mfd/allwinner,sun4i-a10-ts.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Resistive Touchscreen Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#thermal-sensor-cells":
diff --git a/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml b/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
index 64bca41031d5..e82c9a07b6fb 100644
--- a/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
+++ b/Documentation/devicetree/bindings/mmc/allwinner,sun4i-a10-mmc.yaml
@@ -11,7 +11,7 @@ allOf:
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#address-cells": true
diff --git a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
index b5b3cf5b1ac2..5d3fa412aabd 100644
--- a/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/allwinner,sun4i-a10-nand.yaml
@@ -11,7 +11,7 @@ allOf:
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#address-cells": true
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
index ae4796ec50a0..8d8560a67abf 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
@@ -11,7 +11,7 @@ allOf:
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
index e5562c525ed9..767193ec1d32 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 MDIO Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: "mdio.yaml#"
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
index f683b7104e3e..703d0d886884 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun7i-a20-gmac.yaml
@@ -11,7 +11,7 @@ allOf:
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
index 11654d4b80fb..db36b4d86484 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun8i-a83t-emac.yaml
@@ -8,7 +8,7 @@ title: Allwinner A83t EMAC Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
index f16b99571af1..b5eadee4a9a7 100644
--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.txt
@@ -30,6 +30,12 @@ Optional properties:
- "lpo": external low power 32.768 kHz clock
- vbat-supply: phandle to regulator supply for VBAT
- vddio-supply: phandle to regulator supply for VDDIO
+ - brcm,bt-pcm-int-params: configure PCM parameters via a 5-byte array
+ - sco-routing: 0 = PCM, 1 = Transport, 2 = Codec, 3 = I2S
+ - pcm-interface-rate: 128KBps, 256KBps, 512KBps, 1024KBps, 2048KBps
+ - pcm-frame-type: short, long
+ - pcm-sync-mode: slave, master
+ - pcm-clock-mode: slave, master
Example:
@@ -41,5 +47,6 @@ Example:
bluetooth {
compatible = "brcm,bcm43438-bt";
max-speed = <921600>;
+ brcm,bt-pcm-int-params = [01 02 00 01 01];
};
};
diff --git a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml
index 770af7c46114..a95960ee3feb 100644
--- a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml
+++ b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 CAN Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
index 27e1b4cebfbd..6bdcc3f84bd3 100644
--- a/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+++ b/Documentation/devicetree/bindings/net/can/tcan4x5x.txt
@@ -10,7 +10,6 @@ Required properties:
- #size-cells: 0
- spi-max-frequency: Maximum frequency of the SPI bus the chip can
operate at should be less than or equal to 18 MHz.
- - device-wake-gpios: Wake up GPIO to wake up the TCAN device.
- interrupt-parent: the phandle to the interrupt controller which provides
the interrupt.
- interrupts: interrupt specification for data-ready.
@@ -23,6 +22,7 @@ Optional properties:
reset.
- device-state-gpios: Input GPIO that indicates if the device is in
a sleep state or if the device is active.
+ - device-wake-gpios: Wake up GPIO to wake up the TCAN device.
Example:
tcan4x5x: tcan4x5x@0 {
@@ -36,5 +36,5 @@ tcan4x5x: tcan4x5x@0 {
interrupts = <14 GPIO_ACTIVE_LOW>;
device-state-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>;
device-wake-gpios = <&gpio1 15 GPIO_ACTIVE_HIGH>;
- reset-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
+ reset-gpios = <&gpio1 27 GPIO_ACTIVE_HIGH>;
};
diff --git a/Documentation/devicetree/bindings/net/dsa/ar9331.txt b/Documentation/devicetree/bindings/net/dsa/ar9331.txt
new file mode 100644
index 000000000000..320607cbbb17
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/dsa/ar9331.txt
@@ -0,0 +1,148 @@
+Atheros AR9331 built-in switch
+=============================
+
+It is a switch built-in to Atheros AR9331 WiSoC and addressable over internal
+MDIO bus. All PHYs are built-in as well.
+
+Required properties:
+
+ - compatible: should be: "qca,ar9331-switch"
+ - reg: Address on the MII bus for the switch.
+ - resets : Must contain an entry for each entry in reset-names.
+ - reset-names : Must include the following entries: "switch"
+ - interrupt-parent: Phandle to the parent interrupt controller
+ - interrupts: IRQ line for the switch
+ - interrupt-controller: Indicates the switch is itself an interrupt
+ controller. This is used for the PHY interrupts.
+ - #interrupt-cells: must be 1
+ - mdio: Container of PHY and devices on the switches MDIO bus.
+
+See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
+required and optional properties.
+Examples:
+
+eth0: ethernet@19000000 {
+ compatible = "qca,ar9330-eth";
+ reg = <0x19000000 0x200>;
+ interrupts = <4>;
+
+ resets = <&rst 9>, <&rst 22>;
+ reset-names = "mac", "mdio";
+ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+ clock-names = "eth", "mdio";
+
+ phy-mode = "mii";
+ phy-handle = <&phy_port4>;
+};
+
+eth1: ethernet@1a000000 {
+ compatible = "qca,ar9330-eth";
+ reg = <0x1a000000 0x200>;
+ interrupts = <5>;
+ resets = <&rst 13>, <&rst 23>;
+ reset-names = "mac", "mdio";
+ clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
+ clock-names = "eth", "mdio";
+
+ phy-mode = "gmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch10: switch@10 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ compatible = "qca,ar9331-switch";
+ reg = <0x10>;
+ resets = <&rst 8>;
+ reset-names = "switch";
+
+ interrupt-parent = <&miscintc>;
+ interrupts = <12>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch_port0: port@0 {
+ reg = <0x0>;
+ label = "cpu";
+ ethernet = <&eth1>;
+
+ phy-mode = "gmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ switch_port1: port@1 {
+ reg = <0x1>;
+ phy-handle = <&phy_port0>;
+ phy-mode = "internal";
+ };
+
+ switch_port2: port@2 {
+ reg = <0x2>;
+ phy-handle = <&phy_port1>;
+ phy-mode = "internal";
+ };
+
+ switch_port3: port@3 {
+ reg = <0x3>;
+ phy-handle = <&phy_port2>;
+ phy-mode = "internal";
+ };
+
+ switch_port4: port@4 {
+ reg = <0x4>;
+ phy-handle = <&phy_port3>;
+ phy-mode = "internal";
+ };
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupt-parent = <&switch10>;
+
+ phy_port0: phy@0 {
+ reg = <0x0>;
+ interrupts = <0>;
+ };
+
+ phy_port1: phy@1 {
+ reg = <0x1>;
+ interrupts = <0>;
+ };
+
+ phy_port2: phy@2 {
+ reg = <0x2>;
+ interrupts = <0>;
+ };
+
+ phy_port3: phy@3 {
+ reg = <0x3>;
+ interrupts = <0>;
+ };
+
+ phy_port4: phy@4 {
+ reg = <0x4>;
+ interrupts = <0>;
+ };
+ };
+ };
+ };
+};
diff --git a/Documentation/devicetree/bindings/net/mediatek-dwmac.txt b/Documentation/devicetree/bindings/net/mediatek-dwmac.txt
index 8a08621a5b54..afbcaebf062e 100644
--- a/Documentation/devicetree/bindings/net/mediatek-dwmac.txt
+++ b/Documentation/devicetree/bindings/net/mediatek-dwmac.txt
@@ -14,7 +14,7 @@ Required properties:
Should be "macirq" for the main MAC IRQ
- clocks: Must contain a phandle for each entry in clock-names.
- clock-names: The name of the clock listed in the clocks property. These are
- "axi", "apb", "mac_main", "ptp_ref" for MT2712 SoC
+ "axi", "apb", "mac_main", "ptp_ref", "rmii_internal" for MT2712 SoC.
- mac-address: See ethernet.txt in the same directory
- phy-mode: See ethernet.txt in the same directory
- mediatek,pericfg: A phandle to the syscon node that control ethernet
@@ -23,8 +23,10 @@ Required properties:
Optional properties:
- mediatek,tx-delay-ps: TX clock delay macro value. Default is 0.
It should be defined for RGMII/MII interface.
+ It should be defined for RMII interface when the reference clock is from MT2712 SoC.
- mediatek,rx-delay-ps: RX clock delay macro value. Default is 0.
- It should be defined for RGMII/MII/RMII interface.
+ It should be defined for RGMII/MII interface.
+ It should be defined for RMII interface.
Both delay properties need to be a multiple of 170 for RGMII interface,
or will round down. Range 0~31*170.
Both delay properties need to be a multiple of 550 for MII/RMII interface,
@@ -34,13 +36,20 @@ or will round down. Range 0~31*550.
reference clock, which is from external PHYs, is connected to RXC pin
on MT2712 SoC.
Otherwise, is connected to TXC pin.
+- mediatek,rmii-clk-from-mac: boolean property, if present indicates that
+ MT2712 SoC provides the RMII reference clock, which outputs to TXC pin only.
- mediatek,txc-inverse: boolean property, if present indicates that
1. tx clock will be inversed in MII/RGMII case,
2. tx clock inside MAC will be inversed relative to reference clock
which is from external PHYs in RMII case, and it rarely happen.
+ 3. the reference clock, which outputs to TXC pin will be inversed in RMII case
+ when the reference clock is from MT2712 SoC.
- mediatek,rxc-inverse: boolean property, if present indicates that
1. rx clock will be inversed in MII/RGMII case.
- 2. reference clock will be inversed when arrived at MAC in RMII case.
+ 2. reference clock will be inversed when arrived at MAC in RMII case, when
+ the reference clock is from external PHYs.
+ 3. the inside clock, which be sent to MAC, will be inversed in RMII case when
+ the reference clock is from MT2712 SoC.
- assigned-clocks: mac_main and ptp_ref clocks
- assigned-clock-parents: parent clocks of the assigned clocks
@@ -50,29 +59,33 @@ Example:
reg = <0 0x1101c000 0 0x1300>;
interrupts = <GIC_SPI 237 IRQ_TYPE_LEVEL_LOW>;
interrupt-names = "macirq";
- phy-mode ="rgmii";
+ phy-mode ="rgmii-rxid";
mac-address = [00 55 7b b5 7d f7];
clock-names = "axi",
"apb",
"mac_main",
"ptp_ref",
- "ptp_top";
+ "rmii_internal";
clocks = <&pericfg CLK_PERI_GMAC>,
<&pericfg CLK_PERI_GMAC_PCLK>,
<&topckgen CLK_TOP_ETHER_125M_SEL>,
- <&topckgen CLK_TOP_ETHER_50M_SEL>;
+ <&topckgen CLK_TOP_ETHER_50M_SEL>,
+ <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>;
assigned-clocks = <&topckgen CLK_TOP_ETHER_125M_SEL>,
- <&topckgen CLK_TOP_ETHER_50M_SEL>;
+ <&topckgen CLK_TOP_ETHER_50M_SEL>,
+ <&topckgen CLK_TOP_ETHER_50M_RMII_SEL>;
assigned-clock-parents = <&topckgen CLK_TOP_ETHERPLL_125M>,
- <&topckgen CLK_TOP_APLL1_D3>;
+ <&topckgen CLK_TOP_APLL1_D3>,
+ <&topckgen CLK_TOP_ETHERPLL_50M>;
+ power-domains = <&scpsys MT2712_POWER_DOMAIN_AUDIO>;
mediatek,pericfg = <&pericfg>;
mediatek,tx-delay-ps = <1530>;
mediatek,rx-delay-ps = <1530>;
mediatek,rmii-rxc;
mediatek,txc-inverse;
mediatek,rxc-inverse;
- snps,txpbl = <32>;
- snps,rxpbl = <32>;
+ snps,txpbl = <1>;
+ snps,rxpbl = <1>;
snps,reset-gpio = <&pio 87 GPIO_ACTIVE_LOW>;
snps,reset-active-low;
};
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 4845e29411e4..e08cd4c4d568 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -347,6 +347,7 @@ allOf:
- st,spear600-gmac
then:
+ properties:
snps,tso:
$ref: /schemas/types.yaml#definitions/flag
description:
diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
index 81ae8cafabc1..ac8c76369a86 100644
--- a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
+++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/net/ti,cpsw-switch.yaml#
@@ -44,7 +44,6 @@ properties:
description: CPSW functional clock
clock-names:
- maxItems: 1
items:
- const: fck
@@ -70,7 +69,6 @@ properties:
Phandle to the system control device node which provides access to
efuse IO range with MAC addresses
-
ethernet-ports:
type: object
properties:
@@ -82,8 +80,6 @@ properties:
patternProperties:
"^port@[0-9]+$":
type: object
- minItems: 1
- maxItems: 2
description: CPSW external ports
allOf:
@@ -91,23 +87,20 @@ properties:
properties:
reg:
- maxItems: 1
- enum: [1, 2]
+ items:
+ - enum: [1, 2]
description: CPSW port number
phys:
- $ref: /schemas/types.yaml#definitions/phandle-array
maxItems: 1
description: phandle on phy-gmii-sel PHY
label:
- $ref: /schemas/types.yaml#/definitions/string-array
- maxItems: 1
description: label associated with this port
ti,dual-emac-pvid:
- $ref: /schemas/types.yaml#/definitions/uint32
- maxItems: 1
+ allOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32
minimum: 1
maximum: 1024
description:
@@ -136,7 +129,6 @@ properties:
description: CPTS reference clock
clock-names:
- maxItems: 1
items:
- const: cpts
@@ -201,7 +193,7 @@ examples:
phys = <&phy_gmii_sel 1>;
phy-handle = <&ethphy0_sw>;
phy-mode = "rgmii";
- ti,dual_emac_pvid = <1>;
+ ti,dual-emac-pvid = <1>;
};
cpsw_port2: port@2 {
@@ -211,7 +203,7 @@ examples:
phys = <&phy_gmii_sel 2>;
phy-handle = <&ethphy1_sw>;
phy-mode = "rgmii";
- ti,dual_emac_pvid = <2>;
+ ti,dual-emac-pvid = <2>;
};
};
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
index 388ff48f53ae..44e2a4fab29e 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.txt
@@ -8,8 +8,6 @@ Required properties:
- ti,tx-internal-delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h
for applicable values. Required only if interface type is
PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_TXID
- - ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
- for applicable values
Note: If the interface type is PHY_INTERFACE_MODE_RGMII the TX/RX clock delays
will be left at their default values, as set by the PHY's pin strapping.
@@ -42,6 +40,14 @@ Optional property:
Some MACs work with differential SGMII clock.
See data manual for details.
+ - ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
+ for applicable values (deprecated)
+
+ -tx-fifo-depth - As defined in the ethernet-controller.yaml. Values for
+ the depth can be found in dt-bindings/net/ti-dp83867.h
+ -rx-fifo-depth - As defined in the ethernet-controller.yaml. Values for
+ the depth can be found in dt-bindings/net/ti-dp83867.h
+
Note: ti,min-output-impedance and ti,max-output-impedance are mutually
exclusive. When both properties are present ti,max-output-impedance
takes precedence.
@@ -55,7 +61,7 @@ Example:
reg = <0>;
ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>;
ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_75_NS>;
- ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+ tx-fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
};
Datasheet can be found:
diff --git a/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml b/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
index 659b02002a35..daf1321d76ad 100644
--- a/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
+++ b/Documentation/devicetree/bindings/nvmem/allwinner,sun4i-a10-sid.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Security ID Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
allOf:
- $ref: "nvmem.yaml#"
diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
index fa46670de299..230d74f22136 100644
--- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
@@ -8,7 +8,7 @@ title: Allwinner A31 MIPI D-PHY Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#phy-cells":
diff --git a/Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml
index cd0503b6fe36..bfefd09d8c1e 100644
--- a/Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/allwinner,sun4i-a10-pinctrl.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Pin Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#gpio-cells":
diff --git a/Documentation/devicetree/bindings/ptp/ptp-ines.txt b/Documentation/devicetree/bindings/ptp/ptp-ines.txt
new file mode 100644
index 000000000000..4c242bd1ce9c
--- /dev/null
+++ b/Documentation/devicetree/bindings/ptp/ptp-ines.txt
@@ -0,0 +1,35 @@
+ZHAW InES PTP time stamping IP core
+
+The IP core needs two different kinds of nodes. The control node
+lives somewhere in the memory map and specifies the address of the
+control registers. There can be up to three port handles placed as
+attributes of PHY nodes. These associate a particular MII bus with a
+port index within the IP core.
+
+Required properties of the control node:
+
+- compatible: "ines,ptp-ctrl"
+- reg: physical address and size of the register bank
+
+Required format of the port handle within the PHY node:
+
+- timestamper: provides control node reference and
+ the port channel within the IP core
+
+Example:
+
+ tstamper: timestamper@60000000 {
+ compatible = "ines,ptp-ctrl";
+ reg = <0x60000000 0x80>;
+ };
+
+ ethernet@80000000 {
+ ...
+ mdio {
+ ...
+ ethernet-phy@3 {
+ ...
+ timestamper = <&tstamper 0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/ptp/timestamper.txt b/Documentation/devicetree/bindings/ptp/timestamper.txt
new file mode 100644
index 000000000000..fc550ce4d4ea
--- /dev/null
+++ b/Documentation/devicetree/bindings/ptp/timestamper.txt
@@ -0,0 +1,42 @@
+Time stamps from MII bus snooping devices
+
+This binding supports non-PHY devices that snoop the MII bus and
+provide time stamps. In contrast to PHY time stamping drivers (which
+can simply attach their interface directly to the PHY instance), stand
+alone MII time stamping drivers use this binding to specify the
+connection between the snooping device and a given network interface.
+
+Non-PHY MII time stamping drivers typically talk to the control
+interface over another bus like I2C, SPI, UART, or via a memory mapped
+peripheral. This controller device is associated with one or more
+time stamping channels, each of which snoops on a MII bus.
+
+The "timestamper" property lives in a phy node and links a time
+stamping channel from the controller device to that phy's MII bus.
+
+Example:
+
+ tstamper: timestamper@10000000 {
+ compatible = "ines,ptp-ctrl";
+ reg = <0x10000000 0x80>;
+ };
+
+ ethernet@20000000 {
+ mdio {
+ ethernet-phy@1 {
+ timestamper = <&tstamper 0>;
+ };
+ };
+ };
+
+ ethernet@30000000 {
+ mdio {
+ ethernet-phy@2 {
+ timestamper = <&tstamper 1>;
+ };
+ };
+ };
+
+In this example, time stamps from the MII bus attached to phy@1 will
+appear on time stamp channel 0 (zero), and those from phy@2 appear on
+channel 1.
diff --git a/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml b/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
index 0ac52f83a58c..4a21fe77ee1d 100644
--- a/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/allwinner,sun4i-a10-pwm.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 PWM Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#pwm-cells":
diff --git a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
index acf18d170352..c0d83865e933 100644
--- a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
@@ -50,6 +50,8 @@ properties:
description: Should contain the WWDG1 watchdog reset interrupt
maxItems: 1
+ wakeup-source: true
+
mboxes:
description:
This property is required only if the rpmsg/virtio functionality is used.
diff --git a/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt b/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt
index 6e5341b4f891..ee59409640f2 100644
--- a/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt
+++ b/Documentation/devicetree/bindings/reset/brcm,brcmstb-reset.txt
@@ -22,6 +22,6 @@ Example:
};
&ethernet_switch {
- resets = <&reset>;
+ resets = <&reset 26>;
reset-names = "switch";
};
diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
index 46d69c32b89b..478b0234e8fa 100644
--- a/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/allwinner,sun4i-a10-rtc.yaml
@@ -11,7 +11,7 @@ allOf:
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
index d7a57ec4a640..37c2a601c3fa 100644
--- a/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/allwinner,sun6i-a31-rtc.yaml
@@ -8,7 +8,7 @@ title: Allwinner A31 RTC Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#clock-cells":
diff --git a/Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml b/Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml
index ee9712f1c97d..2ecab8ed702a 100644
--- a/Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml
+++ b/Documentation/devicetree/bindings/serio/allwinner,sun4i-a10-ps2.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 PS2 Host Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
description:
A20 PS2 is dual role controller (PS2 host and PS2 device). These
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml
index b8f89c7258eb..ea1d2efb2aaa 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-codec.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Codec Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#sound-dai-cells":
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
index eb3992138eec..112ae00d63c1 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-i2s.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 I2S Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#sound-dai-cells":
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
index 38d4cede0860..444a432912bb 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun4i-a10-spdif.yaml
@@ -10,7 +10,7 @@ maintainers:
- Chen-Yu Tsai <[email protected]>
- Liam Girdwood <[email protected]>
- Mark Brown <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#sound-dai-cells":
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml
index f290eb72a878..3b764415c9ab 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun50i-a64-codec-analog.yaml
@@ -8,7 +8,7 @@ title: Allwinner A64 Analog Codec Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml
index 85305b4c2729..9718358826ab 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun8i-a23-codec-analog.yaml
@@ -8,7 +8,7 @@ title: Allwinner A23 Analog Codec Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml b/Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml
index 5e7cc05bbff1..55d28268d2f4 100644
--- a/Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml
+++ b/Documentation/devicetree/bindings/sound/allwinner,sun8i-a33-codec.yaml
@@ -8,7 +8,7 @@ title: Allwinner A33 Codec Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#sound-dai-cells":
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
index 6d1329c28170..8036499112f5 100644
--- a/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun4i-a10-spi.yaml
@@ -11,7 +11,7 @@ allOf:
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#address-cells": true
diff --git a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
index f36c46d236d7..0565dc49e449 100644
--- a/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/allwinner,sun6i-a31-spi.yaml
@@ -11,7 +11,7 @@ allOf:
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
"#address-cells": true
diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml
index 732339275848..1e0ca6ccf64b 100644
--- a/Documentation/devicetree/bindings/spi/spi-controller.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml
@@ -111,7 +111,7 @@ patternProperties:
spi-rx-bus-width:
allOf:
- $ref: /schemas/types.yaml#/definitions/uint32
- - enum: [ 1, 2, 4 ]
+ - enum: [ 1, 2, 4, 8 ]
- default: 1
description:
Bus width to the SPI bus used for MISO.
@@ -123,7 +123,7 @@ patternProperties:
spi-tx-bus-width:
allOf:
- $ref: /schemas/types.yaml#/definitions/uint32
- - enum: [ 1, 2, 4 ]
+ - enum: [ 1, 2, 4, 8 ]
- default: 1
description:
Bus width to the SPI bus used for MOSI.
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
index 20adc1c8e9cc..23e989e09766 100644
--- a/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun4i-a10-timer.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 Timer Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml
index dfa0c41fd261..40fc4bcb3145 100644
--- a/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml
+++ b/Documentation/devicetree/bindings/timer/allwinner,sun5i-a13-hstimer.yaml
@@ -8,7 +8,7 @@ title: Allwinner A13 High-Speed Timer Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml b/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml
index 0af70fc8de5a..d9207bf9d894 100644
--- a/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml
+++ b/Documentation/devicetree/bindings/usb/allwinner,sun4i-a10-musb.yaml
@@ -8,7 +8,7 @@ title: Allwinner A10 mUSB OTG Controller Device Tree Bindings
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
index 3a54f58683a0..e8f226376108 100644
--- a/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/allwinner,sun4i-a10-wdt.yaml
@@ -11,7 +11,7 @@ allOf:
maintainers:
- Chen-Yu Tsai <[email protected]>
- - Maxime Ripard <[email protected]>
+ - Maxime Ripard <[email protected]>
properties:
compatible:
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 059d58a549c7..6fb2b0671994 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -23,7 +23,7 @@
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | TODO |
diff --git a/Documentation/filesystems/erofs.txt b/Documentation/filesystems/erofs.txt
index b0c085326e2e..db6d39c3ae71 100644
--- a/Documentation/filesystems/erofs.txt
+++ b/Documentation/filesystems/erofs.txt
@@ -24,11 +24,11 @@ Here is the main features of EROFS:
- Metadata & data could be mixed by design;
- 2 inode versions for different requirements:
- v1 v2
+ compact (v1) extended (v2)
Inode metadata size: 32 bytes 64 bytes
Max file size: 4 GB 16 EB (also limited by max. vol size)
Max uids/gids: 65536 4294967296
- File creation time: no yes (64 + 32-bit timestamp)
+ File change time: no yes (64 + 32-bit timestamp)
Max hardlinks: 65536 4294967296
Metadata reserved: 4 bytes 14 bytes
@@ -39,7 +39,7 @@ Here is the main features of EROFS:
- Support POSIX.1e ACLs by using xattrs;
- Support transparent file compression as an option:
- LZ4 algorithm with 4 KB fixed-output compression for high performance;
+ LZ4 algorithm with 4 KB fixed-sized output compression for high performance.
The following git tree provides the file system user-space tools under
development (ex, formatting tool mkfs.erofs):
@@ -85,7 +85,7 @@ All data areas should be aligned with the block size, but metadata areas
may not. All metadatas can be now observed in two different spaces (views):
1. Inode metadata space
Each valid inode should be aligned with an inode slot, which is a fixed
- value (32 bytes) and designed to be kept in line with v1 inode size.
+ value (32 bytes) and designed to be kept in line with compact inode size.
Each inode can be directly found with the following formula:
inode offset = meta_blkaddr * block_size + 32 * nid
@@ -117,10 +117,10 @@ may not. All metadatas can be now observed in two different spaces (views):
|-> aligned with 4B
Inode could be 32 or 64 bytes, which can be distinguished from a common
- field which all inode versions have -- i_advise:
+ field which all inode versions have -- i_format:
__________________ __________________
- | i_advise | | i_advise |
+ | i_format | | i_format |
|__________________| |__________________|
| ... | | ... |
| | | |
@@ -129,12 +129,13 @@ may not. All metadatas can be now observed in two different spaces (views):
|__________________| 64 bytes
Xattrs, extents, data inline are followed by the corresponding inode with
- proper alignes, and they could be optional for different data mappings,
- _currently_ there are totally 3 valid data mappings supported:
+ proper alignment, and they could be optional for different data mappings.
+ _currently_ total 4 valid data mappings are supported:
- 1) flat file data without data inline (no extent);
- 2) fixed-output size data compression (must have extents);
- 3) flat file data with tail-end data inline (no extent);
+ 0 flat file data without data inline (no extent);
+ 1 fixed-sized output data compression (with non-compacted indexes);
+ 2 flat file data with tail packing data inline (no extent);
+ 3 fixed-sized output data compression (with compacted indexes, v5.3+).
The size of the optional xattrs is indicated by i_xattr_count in inode
header. Large xattrs or xattrs shared by many different files can be
@@ -182,8 +183,8 @@ introduce another on-disk field at all.
Compression
-----------
-Currently, EROFS supports 4KB fixed-output clustersize transparent file
-compression, as illustrated below:
+Currently, EROFS supports 4KB fixed-sized output transparent file compression,
+as illustrated below:
|---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
clusterofs clusterofs clusterofs
diff --git a/Documentation/filesystems/overlayfs.txt b/Documentation/filesystems/overlayfs.rst
index 845d689e0fd7..e443be7928db 100644
--- a/Documentation/filesystems/overlayfs.txt
+++ b/Documentation/filesystems/overlayfs.rst
@@ -1,3 +1,5 @@
+.. SPDX-License-Identifier: GPL-2.0
+
Written by: Neil Brown
Please see MAINTAINERS file for where to send questions.
@@ -181,7 +183,7 @@ Kernel config options:
worried about backward compatibility with kernels that have the redirect_dir
feature and follow redirects even if turned off.
-Module options (can also be changed through /sys/module/overlay/parameters/*):
+Module options (can also be changed through /sys/module/overlay/parameters/):
- "redirect_dir=BOOL":
See OVERLAY_FS_REDIRECT_DIR kernel config option above.
@@ -263,7 +265,7 @@ top, lower2 the middle and lower3 the bottom layer.
Metadata only copy up
---------------------
+---------------------
When metadata only copy up feature is enabled, overlayfs will only copy
up metadata (as opposed to whole file), when a metadata specific operation
@@ -286,10 +288,10 @@ pointed by REDIRECT. This should not be possible on local system as setting
"trusted." xattrs will require CAP_SYS_ADMIN. But it should be possible
for untrusted layers like from a pen drive.
-Note: redirect_dir={off|nofollow|follow(*)} conflicts with metacopy=on, and
+Note: redirect_dir={off|nofollow|follow[*]} conflicts with metacopy=on, and
results in an error.
-(*) redirect_dir=follow only conflicts with metacopy=on if upperdir=... is
+[*] redirect_dir=follow only conflicts with metacopy=on if upperdir=... is
given.
Sharing and copying layers
diff --git a/Documentation/kbuild/kconfig-language.rst b/Documentation/kbuild/kconfig-language.rst
index 74bef19f69f0..231e6a64957f 100644
--- a/Documentation/kbuild/kconfig-language.rst
+++ b/Documentation/kbuild/kconfig-language.rst
@@ -196,14 +196,11 @@ applicable everywhere (see syntax).
or equal to the first symbol and smaller than or equal to the second
symbol.
-- help text: "help" or "---help---"
+- help text: "help"
This defines a help text. The end of the help text is determined by
the indentation level, this means it ends at the first line which has
a smaller indentation than the first line of the help text.
- "---help---" and "help" do not differ in behaviour, "---help---" is
- used to help visually separate configuration logic from help within
- the file as an aid to developers.
- misc options: "option" <symbol>[=<value>]
diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst
index b9b50553bfc5..d7e6534a8505 100644
--- a/Documentation/kbuild/makefiles.rst
+++ b/Documentation/kbuild/makefiles.rst
@@ -297,9 +297,19 @@ more details, with real examples.
If CONFIG_EXT2_FS is set to either 'y' (built-in) or 'm' (modular)
the corresponding obj- variable will be set, and kbuild will descend
down in the ext2 directory.
- Kbuild only uses this information to decide that it needs to visit
- the directory, it is the Makefile in the subdirectory that
- specifies what is modular and what is built-in.
+
+ Kbuild uses this information not only to decide that it needs to visit
+ the directory, but also to decide whether or not to link objects from
+ the directory into vmlinux.
+
+ When Kbuild descends into the directory with 'y', all built-in objects
+ from that directory are combined into the built-in.a, which will be
+ eventually linked into vmlinux.
+
+ When Kbuild descends into the directory with 'm', in contrast, nothing
+ from that directory will be linked into vmlinux. If the Makefile in
+ that directory specifies obj-y, those objects will be left orphan.
+ It is very likely a bug of the Makefile or of dependencies in Kconfig.
It is good practice to use a `CONFIG_` variable when assigning directory
names. This allows kbuild to totally skip the directory if the
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index c1f7f75e5fd9..4bc6ff29976a 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -25,6 +25,7 @@ Contents:
mellanox/mlx5
netronome/nfp
pensando/ionic
+ stmicro/stmmac
.. only:: subproject and html
diff --git a/Documentation/networking/device_drivers/netronome/nfp.rst b/Documentation/networking/device_drivers/netronome/nfp.rst
index 6c08ac8b5147..ada611fb427c 100644
--- a/Documentation/networking/device_drivers/netronome/nfp.rst
+++ b/Documentation/networking/device_drivers/netronome/nfp.rst
@@ -131,3 +131,119 @@ abi_drv_reset
abi_drv_load_ifc
Defines a list of PF devices allowed to load FW on the device.
This variable is not currently user configurable.
+
+Statistics
+==========
+
+Following device statistics are available through the ``ethtool -S`` interface:
+
+.. flat-table:: NFP device statistics
+ :header-rows: 1
+ :widths: 3 1 11
+
+ * - Name
+ - ID
+ - Meaning
+
+ * - dev_rx_discards
+ - 1
+ - Packet can be discarded on the RX path for one of the following reasons:
+
+ * The NIC is not in promisc mode, and the destination MAC address
+ doesn't match the interfaces' MAC address.
+ * The received packet is larger than the max buffer size on the host.
+ I.e. it exceeds the Layer 3 MRU.
+ * There is no freelist descriptor available on the host for the packet.
+ It is likely that the NIC couldn't cache one in time.
+ * A BPF program discarded the packet.
+ * The datapath drop action was executed.
+ * The MAC discarded the packet due to lack of ingress buffer space
+ on the NIC.
+
+ * - dev_rx_errors
+ - 2
+ - A packet can be counted (and dropped) as RX error for the following
+ reasons:
+
+ * A problem with the VEB lookup (only when SR-IOV is used).
+ * A physical layer problem that causes Ethernet errors, like FCS or
+ alignment errors. The cause is usually faulty cables or SFPs.
+
+ * - dev_rx_bytes
+ - 3
+ - Total number of bytes received.
+
+ * - dev_rx_uc_bytes
+ - 4
+ - Unicast bytes received.
+
+ * - dev_rx_mc_bytes
+ - 5
+ - Multicast bytes received.
+
+ * - dev_rx_bc_bytes
+ - 6
+ - Broadcast bytes received.
+
+ * - dev_rx_pkts
+ - 7
+ - Total number of packets received.
+
+ * - dev_rx_mc_pkts
+ - 8
+ - Multicast packets received.
+
+ * - dev_rx_bc_pkts
+ - 9
+ - Broadcast packets received.
+
+ * - dev_tx_discards
+ - 10
+ - A packet can be discarded in the TX direction if the MAC is
+ being flow controlled and the NIC runs out of TX queue space.
+
+ * - dev_tx_errors
+ - 11
+ - A packet can be counted as TX error (and dropped) for one for the
+ following reasons:
+
+ * The packet is an LSO segment, but the Layer 3 or Layer 4 offset
+ could not be determined. Therefore LSO could not continue.
+ * An invalid packet descriptor was received over PCIe.
+ * The packet Layer 3 length exceeds the device MTU.
+ * An error on the MAC/physical layer. Usually due to faulty cables or
+ SFPs.
+ * A CTM buffer could not be allocated.
+ * The packet offset was incorrect and could not be fixed by the NIC.
+
+ * - dev_tx_bytes
+ - 12
+ - Total number of bytes transmitted.
+
+ * - dev_tx_uc_bytes
+ - 13
+ - Unicast bytes transmitted.
+
+ * - dev_tx_mc_bytes
+ - 14
+ - Multicast bytes transmitted.
+
+ * - dev_tx_bc_bytes
+ - 15
+ - Broadcast bytes transmitted.
+
+ * - dev_tx_pkts
+ - 16
+ - Total number of packets transmitted.
+
+ * - dev_tx_mc_pkts
+ - 17
+ - Multicast packets transmitted.
+
+ * - dev_tx_bc_pkts
+ - 18
+ - Broadcast packets transmitted.
+
+Note that statistics unknown to the driver will be displayed as
+``dev_unknown_stat$ID``, where ``$ID`` refers to the second column
+above.
diff --git a/Documentation/networking/device_drivers/stmicro/stmmac.rst b/Documentation/networking/device_drivers/stmicro/stmmac.rst
new file mode 100644
index 000000000000..c34bab3d2df0
--- /dev/null
+++ b/Documentation/networking/device_drivers/stmicro/stmmac.rst
@@ -0,0 +1,697 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==============================================================
+Linux Driver for the Synopsys(R) Ethernet Controllers "stmmac"
+==============================================================
+
+Authors: Giuseppe Cavallaro <[email protected]>,
+Alexandre Torgue <[email protected]>, Jose Abreu <[email protected]>
+
+Contents
+========
+
+- In This Release
+- Feature List
+- Kernel Configuration
+- Command Line Parameters
+- Driver Information and Notes
+- Debug Information
+- Support
+
+In This Release
+===============
+
+This file describes the stmmac Linux Driver for all the Synopsys(R) Ethernet
+Controllers.
+
+Currently, this network device driver is for all STi embedded MAC/GMAC
+(i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XILINX XC2V3000
+FF1152AMT0221 D1215994A VIRTEX FPGA board. The Synopsys Ethernet QoS 5.0 IPK
+is also supported.
+
+DesignWare(R) Cores Ethernet MAC 10/100/1000 Universal version 3.70a
+(and older) and DesignWare(R) Cores Ethernet Quality-of-Service version 4.0
+(and upper) have been used for developing this driver as well as
+DesignWare(R) Cores XGMAC - 10G Ethernet MAC.
+
+This driver supports both the platform bus and PCI.
+
+This driver includes support for the following Synopsys(R) DesignWare(R)
+Cores Ethernet Controllers and corresponding minimum and maximum versions:
+
++-------------------------------+--------------+--------------+--------------+
+| Controller Name | Min. Version | Max. Version | Abbrev. Name |
++===============================+==============+==============+==============+
+| Ethernet MAC Universal | N/A | 3.73a | GMAC |
++-------------------------------+--------------+--------------+--------------+
+| Ethernet Quality-of-Service | 4.00a | N/A | GMAC4+ |
++-------------------------------+--------------+--------------+--------------+
+| XGMAC - 10G Ethernet MAC | 2.10a | N/A | XGMAC2+ |
++-------------------------------+--------------+--------------+--------------+
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Ethernet adapter. All hardware requirements listed apply
+to use with Linux.
+
+Feature List
+============
+
+The following features are available in this driver:
+ - GMII/MII/RGMII/SGMII/RMII/XGMII Interface
+ - Half-Duplex / Full-Duplex Operation
+ - Energy Efficient Ethernet (EEE)
+ - IEEE 802.3x PAUSE Packets (Flow Control)
+ - RMON/MIB Counters
+ - IEEE 1588 Timestamping (PTP)
+ - Pulse-Per-Second Output (PPS)
+ - MDIO Clause 22 / Clause 45 Interface
+ - MAC Loopback
+ - ARP Offloading
+ - Automatic CRC / PAD Insertion and Checking
+ - Checksum Offload for Received and Transmitted Packets
+ - Standard or Jumbo Ethernet Packets
+ - Source Address Insertion / Replacement
+ - VLAN TAG Insertion / Replacement / Deletion / Filtering (HASH and PERFECT)
+ - Programmable TX and RX Watchdog and Coalesce Settings
+ - Destination Address Filtering (PERFECT)
+ - HASH Filtering (Multicast)
+ - Layer 3 / Layer 4 Filtering
+ - Remote Wake-Up Detection
+ - Receive Side Scaling (RSS)
+ - Frame Preemption for TX and RX
+ - Programmable Burst Length, Threshold, Queue Size
+ - Multiple Queues (up to 8)
+ - Multiple Scheduling Algorithms (TX: WRR, DWRR, WFQ, SP, CBS, EST, TBS;
+ RX: WRR, SP)
+ - Flexible RX Parser
+ - TCP / UDP Segmentation Offload (TSO, USO)
+ - Split Header (SPH)
+ - Safety Features (ECC Protection, Data Parity Protection)
+ - Selftests using Ethtool
+
+Kernel Configuration
+====================
+
+The kernel configuration option is ``CONFIG_STMMAC_ETH``:
+ - ``CONFIG_STMMAC_PLATFORM``: is to enable the platform driver.
+ - ``CONFIG_STMMAC_PCI``: is to enable the pci driver.
+
+Command Line Parameters
+=======================
+
+If the driver is built as a module the following optional parameters are used
+by entering them on the command line with the modprobe command using this
+syntax (e.g. for PCI module)::
+
+ modprobe stmmac_pci [<option>=<VAL1>,<VAL2>,...]
+
+Driver parameters can be also passed in command line by using::
+
+ stmmaceth=watchdog:100,chain_mode=1
+
+The default value for each parameter is generally the recommended setting,
+unless otherwise noted.
+
+watchdog
+--------
+:Valid Range: 5000-None
+:Default Value: 5000
+
+This parameter overrides the transmit timeout in milliseconds.
+
+debug
+-----
+:Valid Range: 0-16 (0=none,...,16=all)
+:Default Value: 0
+
+This parameter adjusts the level of debug messages displayed in the system
+logs.
+
+phyaddr
+-------
+:Valid Range: 0-31
+:Default Value: -1
+
+This parameter overrides the physical address of the PHY device.
+
+flow_ctrl
+---------
+:Valid Range: 0-3 (0=off,1=rx,2=tx,3=rx/tx)
+:Default Value: 3
+
+This parameter changes the default Flow Control ability.
+
+pause
+-----
+:Valid Range: 0-65535
+:Default Value: 65535
+
+This parameter changes the default Flow Control Pause time.
+
+tc
+--
+:Valid Range: 64-256
+:Default Value: 64
+
+This parameter changes the default HW FIFO Threshold control value.
+
+buf_sz
+------
+:Valid Range: 1536-16384
+:Default Value: 1536
+
+This parameter changes the default RX DMA packet buffer size.
+
+eee_timer
+---------
+:Valid Range: 0-None
+:Default Value: 1000
+
+This parameter changes the default LPI TX Expiration time in milliseconds.
+
+chain_mode
+----------
+:Valid Range: 0-1 (0=off,1=on)
+:Default Value: 0
+
+This parameter changes the default mode of operation from Ring Mode to
+Chain Mode.
+
+Driver Information and Notes
+============================
+
+Transmit Process
+----------------
+
+The xmit method is invoked when the kernel needs to transmit a packet; it sets
+the descriptors in the ring and informs the DMA engine that there is a packet
+ready to be transmitted.
+
+By default, the driver sets the ``NETIF_F_SG`` bit in the features field of
+the ``net_device`` structure, enabling the scatter-gather feature. This is
+true on chips and configurations where the checksum can be done in hardware.
+
+Once the controller has finished transmitting the packet, timer will be
+scheduled to release the transmit resources.
+
+Receive Process
+---------------
+
+When one or more packets are received, an interrupt happens. The interrupts
+are not queued, so the driver has to scan all the descriptors in the ring
+during the receive process.
+
+This is based on NAPI, so the interrupt handler signals only if there is work
+to be done, and it exits. Then the poll method will be scheduled at some
+future point.
+
+The incoming packets are stored, by the DMA, in a list of pre-allocated socket
+buffers in order to avoid the memcpy (zero-copy).
+
+Interrupt Mitigation
+--------------------
+
+The driver is able to mitigate the number of its DMA interrupts using NAPI for
+the reception on chips older than the 3.50. New chips have an HW RX Watchdog
+used for this mitigation.
+
+Mitigation parameters can be tuned by ethtool.
+
+WoL
+---
+
+Wake up on Lan feature through Magic and Unicast frames are supported for the
+GMAC, GMAC4/5 and XGMAC core.
+
+DMA Descriptors
+---------------
+
+Driver handles both normal and alternate descriptors. The latter has been only
+tested on DesignWare(R) Cores Ethernet MAC Universal version 3.41a and later.
+
+stmmac supports DMA descriptor to operate both in dual buffer (RING) and
+linked-list(CHAINED) mode. In RING each descriptor points to two data buffer
+pointers whereas in CHAINED mode they point to only one data buffer pointer.
+RING mode is the default.
+
+In CHAINED mode each descriptor will have pointer to next descriptor in the
+list, hence creating the explicit chaining in the descriptor itself, whereas
+such explicit chaining is not possible in RING mode.
+
+Extended Descriptors
+--------------------
+
+The extended descriptors give us information about the Ethernet payload when
+it is carrying PTP packets or TCP/UDP/ICMP over IP. These are not available on
+GMAC Synopsys(R) chips older than the 3.50. At probe time the driver will
+decide if these can be actually used. This support also is mandatory for PTPv2
+because the extra descriptors are used for saving the hardware timestamps and
+Extended Status.
+
+Ethtool Support
+---------------
+
+Ethtool is supported. For example, driver statistics (including RMON),
+internal errors can be taken using::
+
+ ethtool -S ethX
+
+Ethtool selftests are also supported. This allows to do some early sanity
+checks to the HW using MAC and PHY loopback mechanisms::
+
+ ethtool -t ethX
+
+Jumbo and Segmentation Offloading
+---------------------------------
+
+Jumbo frames are supported and tested for the GMAC. The GSO has been also
+added but it's performed in software. LRO is not supported.
+
+TSO Support
+-----------
+
+TSO (TCP Segmentation Offload) feature is supported by GMAC > 4.x and XGMAC
+chip family. When a packet is sent through TCP protocol, the TCP stack ensures
+that the SKB provided to the low level driver (stmmac in our case) matches
+with the maximum frame len (IP header + TCP header + payload <= 1500 bytes
+(for MTU set to 1500)). It means that if an application using TCP want to send
+a packet which will have a length (after adding headers) > 1514 the packet
+will be split in several TCP packets: The data payload is split and headers
+(TCP/IP ..) are added. It is done by software.
+
+When TSO is enabled, the TCP stack doesn't care about the maximum frame length
+and provide SKB packet to stmmac as it is. The GMAC IP will have to perform
+the segmentation by it self to match with maximum frame length.
+
+This feature can be enabled in device tree through ``snps,tso`` entry.
+
+Energy Efficient Ethernet
+-------------------------
+
+Energy Efficient Ethernet (EEE) enables IEEE 802.3 MAC sublayer along with a
+family of Physical layer to operate in the Low Power Idle (LPI) mode. The EEE
+mode supports the IEEE 802.3 MAC operation at 100Mbps, 1000Mbps and 1Gbps.
+
+The LPI mode allows power saving by switching off parts of the communication
+device functionality when there is no data to be transmitted & received.
+The system on both the side of the link can disable some functionalities and
+save power during the period of low-link utilization. The MAC controls whether
+the system should enter or exit the LPI mode and communicate this to PHY.
+
+As soon as the interface is opened, the driver verifies if the EEE can be
+supported. This is done by looking at both the DMA HW capability register and
+the PHY devices MCD registers.
+
+To enter in TX LPI mode the driver needs to have a software timer that enable
+and disable the LPI mode when there is nothing to be transmitted.
+
+Precision Time Protocol (PTP)
+-----------------------------
+
+The driver supports the IEEE 1588-2002, Precision Time Protocol (PTP), which
+enables precise synchronization of clocks in measurement and control systems
+implemented with technologies such as network communication.
+
+In addition to the basic timestamp features mentioned in IEEE 1588-2002
+Timestamps, new GMAC cores support the advanced timestamp features.
+IEEE 1588-2008 can be enabled when configuring the Kernel.
+
+SGMII/RGMII Support
+-------------------
+
+New GMAC devices provide own way to manage RGMII/SGMII. This information is
+available at run-time by looking at the HW capability register. This means
+that the stmmac can manage auto-negotiation and link status w/o using the
+PHYLIB stuff. In fact, the HW provides a subset of extended registers to
+restart the ANE, verify Full/Half duplex mode and Speed. Thanks to these
+registers, it is possible to look at the Auto-negotiated Link Parter Ability.
+
+Physical
+--------
+
+The driver is compatible with Physical Abstraction Layer to be connected with
+PHY and GPHY devices.
+
+Platform Information
+--------------------
+
+Several information can be passed through the platform and device-tree.
+
+::
+
+ struct plat_stmmacenet_data {
+
+1) Bus identifier::
+
+ int bus_id;
+
+2) PHY Physical Address. If set to -1 the driver will pick the first PHY it
+finds::
+
+ int phy_addr;
+
+3) PHY Device Interface::
+
+ int interface;
+
+4) Specific platform fields for the MDIO bus::
+
+ struct stmmac_mdio_bus_data *mdio_bus_data;
+
+5) Internal DMA parameters::
+
+ struct stmmac_dma_cfg *dma_cfg;
+
+6) Fixed CSR Clock Range selection::
+
+ int clk_csr;
+
+7) HW uses the GMAC core::
+
+ int has_gmac;
+
+8) If set the MAC will use Enhanced Descriptors::
+
+ int enh_desc;
+
+9) Core is able to perform TX Checksum and/or RX Checksum in HW::
+
+ int tx_coe;
+ int rx_coe;
+
+11) Some HWs are not able to perform the csum in HW for over-sized frames due
+to limited buffer sizes. Setting this flag the csum will be done in SW on
+JUMBO frames::
+
+ int bugged_jumbo;
+
+12) Core has the embedded power module::
+
+ int pmt;
+
+13) Force DMA to use the Store and Forward mode or Threshold mode::
+
+ int force_sf_dma_mode;
+ int force_thresh_dma_mode;
+
+15) Force to disable the RX Watchdog feature and switch to NAPI mode::
+
+ int riwt_off;
+
+16) Limit the maximum operating speed and MTU::
+
+ int max_speed;
+ int maxmtu;
+
+18) Number of Multicast/Unicast filters::
+
+ int multicast_filter_bins;
+ int unicast_filter_entries;
+
+20) Limit the maximum TX and RX FIFO size::
+
+ int tx_fifo_size;
+ int rx_fifo_size;
+
+21) Use the specified number of TX and RX Queues::
+
+ u32 rx_queues_to_use;
+ u32 tx_queues_to_use;
+
+22) Use the specified TX and RX scheduling algorithm::
+
+ u8 rx_sched_algorithm;
+ u8 tx_sched_algorithm;
+
+23) Internal TX and RX Queue parameters::
+
+ struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES];
+ struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES];
+
+24) This callback is used for modifying some syscfg registers (on ST SoCs)
+according to the link speed negotiated by the physical layer::
+
+ void (*fix_mac_speed)(void *priv, unsigned int speed);
+
+25) Callbacks used for calling a custom initialization; This is sometimes
+necessary on some platforms (e.g. ST boxes) where the HW needs to have set
+some PIO lines or system cfg registers. init/exit callbacks should not use
+or modify platform data::
+
+ int (*init)(struct platform_device *pdev, void *priv);
+ void (*exit)(struct platform_device *pdev, void *priv);
+
+26) Perform HW setup of the bus. For example, on some ST platforms this field
+is used to configure the AMBA bridge to generate more efficient STBus traffic::
+
+ struct mac_device_info *(*setup)(void *priv);
+ void *bsp_priv;
+
+27) Internal clocks and rates::
+
+ struct clk *stmmac_clk;
+ struct clk *pclk;
+ struct clk *clk_ptp_ref;
+ unsigned int clk_ptp_rate;
+ unsigned int clk_ref_rate;
+ s32 ptp_max_adj;
+
+28) Main reset::
+
+ struct reset_control *stmmac_rst;
+
+29) AXI Internal Parameters::
+
+ struct stmmac_axi *axi;
+
+30) HW uses GMAC>4 cores::
+
+ int has_gmac4;
+
+31) HW is sun8i based::
+
+ bool has_sun8i;
+
+32) Enables TSO feature::
+
+ bool tso_en;
+
+33) Enables Receive Side Scaling (RSS) feature::
+
+ int rss_en;
+
+34) MAC Port selection::
+
+ int mac_port_sel_speed;
+
+35) Enables TX LPI Clock Gating::
+
+ bool en_tx_lpi_clockgating;
+
+36) HW uses XGMAC>2.10 cores::
+
+ int has_xgmac;
+
+::
+
+ }
+
+For MDIO bus data, we have:
+
+::
+
+ struct stmmac_mdio_bus_data {
+
+1) PHY mask passed when MDIO bus is registered::
+
+ unsigned int phy_mask;
+
+2) List of IRQs, one per PHY::
+
+ int *irqs;
+
+3) If IRQs is NULL, use this for probed PHY::
+
+ int probed_phy_irq;
+
+4) Set to true if PHY needs reset::
+
+ bool needs_reset;
+
+::
+
+ }
+
+For DMA engine configuration, we have:
+
+::
+
+ struct stmmac_dma_cfg {
+
+1) Programmable Burst Length (TX and RX)::
+
+ int pbl;
+
+2) If set, DMA TX / RX will use this value rather than pbl::
+
+ int txpbl;
+ int rxpbl;
+
+3) Enable 8xPBL::
+
+ bool pblx8;
+
+4) Enable Fixed or Mixed burst::
+
+ int fixed_burst;
+ int mixed_burst;
+
+5) Enable Address Aligned Beats::
+
+ bool aal;
+
+6) Enable Enhanced Addressing (> 32 bits)::
+
+ bool eame;
+
+::
+
+ }
+
+For DMA AXI parameters, we have:
+
+::
+
+ struct stmmac_axi {
+
+1) Enable AXI LPI::
+
+ bool axi_lpi_en;
+ bool axi_xit_frm;
+
+2) Set AXI Write / Read maximum outstanding requests::
+
+ u32 axi_wr_osr_lmt;
+ u32 axi_rd_osr_lmt;
+
+3) Set AXI 4KB bursts::
+
+ bool axi_kbbe;
+
+4) Set AXI maximum burst length map::
+
+ u32 axi_blen[AXI_BLEN];
+
+5) Set AXI Fixed burst / mixed burst::
+
+ bool axi_fb;
+ bool axi_mb;
+
+6) Set AXI rebuild incrx mode::
+
+ bool axi_rb;
+
+::
+
+ }
+
+For the RX Queues configuration, we have:
+
+::
+
+ struct stmmac_rxq_cfg {
+
+1) Mode to use (DCB or AVB)::
+
+ u8 mode_to_use;
+
+2) DMA channel to use::
+
+ u32 chan;
+
+3) Packet routing, if applicable::
+
+ u8 pkt_route;
+
+4) Use priority routing, and priority to route::
+
+ bool use_prio;
+ u32 prio;
+
+::
+
+ }
+
+For the TX Queues configuration, we have:
+
+::
+
+ struct stmmac_txq_cfg {
+
+1) Queue weight in scheduler::
+
+ u32 weight;
+
+2) Mode to use (DCB or AVB)::
+
+ u8 mode_to_use;
+
+3) Credit Base Shaper Parameters::
+
+ u32 send_slope;
+ u32 idle_slope;
+ u32 high_credit;
+ u32 low_credit;
+
+4) Use priority scheduling, and priority::
+
+ bool use_prio;
+ u32 prio;
+
+::
+
+ }
+
+Device Tree Information
+-----------------------
+
+Please refer to the following document:
+Documentation/devicetree/bindings/net/snps,dwmac.yaml
+
+HW Capabilities
+---------------
+
+Note that, starting from new chips, where it is available the HW capability
+register, many configurations are discovered at run-time for example to
+understand if EEE, HW csum, PTP, enhanced descriptor etc are actually
+available. As strategy adopted in this driver, the information from the HW
+capability register can replace what has been passed from the platform.
+
+Debug Information
+=================
+
+The driver exports many information i.e. internal statistics, debug
+information, MAC and DMA registers etc.
+
+These can be read in several ways depending on the type of the information
+actually needed.
+
+For example a user can be use the ethtool support to get statistics: e.g.
+using: ``ethtool -S ethX`` (that shows the Management counters (MMC) if
+supported) or sees the MAC/DMA registers: e.g. using: ``ethtool -d ethX``
+
+Compiling the Kernel with ``CONFIG_DEBUG_FS`` the driver will export the
+following debugfs entries:
+
+ - ``descriptors_status``: To show the DMA TX/RX descriptor rings
+ - ``dma_cap``: To show the HW Capabilities
+
+Developer can also use the ``debug`` module parameter to get further debug
+information (please see: NETIF Msg Level).
+
+Support
+=======
+
+If an issue is identified with the released source code on a supported kernel
+with a supported adapter, email the specific information related to the
diff --git a/Documentation/networking/device_drivers/stmicro/stmmac.txt b/Documentation/networking/device_drivers/stmicro/stmmac.txt
deleted file mode 100644
index 1ae979fd90d2..000000000000
--- a/Documentation/networking/device_drivers/stmicro/stmmac.txt
+++ /dev/null
@@ -1,401 +0,0 @@
- STMicroelectronics 10/100/1000 Synopsys Ethernet driver
-
-Copyright (C) 2007-2015 STMicroelectronics Ltd
-Author: Giuseppe Cavallaro <[email protected]>
-
-This is the driver for the MAC 10/100/1000 on-chip Ethernet controllers
-(Synopsys IP blocks).
-
-Currently this network device driver is for all STi embedded MAC/GMAC
-(i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XLINX XC2V3000
-FF1152AMT0221 D1215994A VIRTEX FPGA board.
-
-DWC Ether MAC 10/100/1000 Universal version 3.70a (and older) and DWC Ether
-MAC 10/100 Universal version 4.0 have been used for developing this driver.
-
-This driver supports both the platform bus and PCI.
-
-Please, for more information also visit: www.stlinux.com
-
-1) Kernel Configuration
-The kernel configuration option is STMMAC_ETH:
- Device Drivers ---> Network device support ---> Ethernet (1000 Mbit) --->
- STMicroelectronics 10/100/1000 Ethernet driver (STMMAC_ETH)
-
-CONFIG_STMMAC_PLATFORM: is to enable the platform driver.
-CONFIG_STMMAC_PCI: is to enable the pci driver.
-
-2) Driver parameters list:
- debug: message level (0: no output, 16: all);
- phyaddr: to manually provide the physical address to the PHY device;
- buf_sz: DMA buffer size;
- tc: control the HW FIFO threshold;
- watchdog: transmit timeout (in milliseconds);
- flow_ctrl: Flow control ability [on/off];
- pause: Flow Control Pause Time;
- eee_timer: tx EEE timer;
- chain_mode: select chain mode instead of ring.
-
-3) Command line options
-Driver parameters can be also passed in command line by using:
- stmmaceth=watchdog:100,chain_mode=1
-
-4) Driver information and notes
-
-4.1) Transmit process
-The xmit method is invoked when the kernel needs to transmit a packet; it sets
-the descriptors in the ring and informs the DMA engine, that there is a packet
-ready to be transmitted.
-By default, the driver sets the NETIF_F_SG bit in the features field of the
-net_device structure, enabling the scatter-gather feature. This is true on
-chips and configurations where the checksum can be done in hardware.
-Once the controller has finished transmitting the packet, timer will be
-scheduled to release the transmit resources.
-
-4.2) Receive process
-When one or more packets are received, an interrupt happens. The interrupts
-are not queued, so the driver has to scan all the descriptors in the ring during
-the receive process.
-This is based on NAPI, so the interrupt handler signals only if there is work
-to be done, and it exits.
-Then the poll method will be scheduled at some future point.
-The incoming packets are stored, by the DMA, in a list of pre-allocated socket
-buffers in order to avoid the memcpy (zero-copy).
-
-4.3) Interrupt mitigation
-The driver is able to mitigate the number of its DMA interrupts
-using NAPI for the reception on chips older than the 3.50.
-New chips have an HW RX-Watchdog used for this mitigation.
-Mitigation parameters can be tuned by ethtool.
-
-4.4) WOL
-Wake up on Lan feature through Magic and Unicast frames are supported for the
-GMAC core.
-
-4.5) DMA descriptors
-Driver handles both normal and alternate descriptors. The latter has been only
-tested on DWC Ether MAC 10/100/1000 Universal version 3.41a and later.
-
-STMMAC supports DMA descriptor to operate both in dual buffer (RING)
-and linked-list(CHAINED) mode. In RING each descriptor points to two
-data buffer pointers whereas in CHAINED mode they point to only one data
-buffer pointer. RING mode is the default.
-
-In CHAINED mode each descriptor will have pointer to next descriptor in
-the list, hence creating the explicit chaining in the descriptor itself,
-whereas such explicit chaining is not possible in RING mode.
-
-4.5.1) Extended descriptors
-The extended descriptors give us information about the Ethernet payload
-when it is carrying PTP packets or TCP/UDP/ICMP over IP.
-These are not available on GMAC Synopsys chips older than the 3.50.
-At probe time the driver will decide if these can be actually used.
-This support also is mandatory for PTPv2 because the extra descriptors
-are used for saving the hardware timestamps and Extended Status.
-
-4.6) Ethtool support
-Ethtool is supported.
-
-For example, driver statistics (including RMON), internal errors can be taken
-using:
- # ethtool -S ethX
-command
-
-4.7) Jumbo and Segmentation Offloading
-Jumbo frames are supported and tested for the GMAC.
-The GSO has been also added but it's performed in software.
-LRO is not supported.
-
-4.8) Physical
-The driver is compatible with Physical Abstraction Layer to be connected with
-PHY and GPHY devices.
-
-4.9) Platform information
-Several information can be passed through the platform and device-tree.
-
-struct plat_stmmacenet_data {
- char *phy_bus_name;
- int bus_id;
- int phy_addr;
- int interface;
- struct stmmac_mdio_bus_data *mdio_bus_data;
- struct stmmac_dma_cfg *dma_cfg;
- int clk_csr;
- int has_gmac;
- int enh_desc;
- int tx_coe;
- int rx_coe;
- int bugged_jumbo;
- int pmt;
- int force_sf_dma_mode;
- int force_thresh_dma_mode;
- int riwt_off;
- int max_speed;
- int maxmtu;
- void (*fix_mac_speed)(void *priv, unsigned int speed);
- void (*bus_setup)(void __iomem *ioaddr);
- int (*init)(struct platform_device *pdev, void *priv);
- void (*exit)(struct platform_device *pdev, void *priv);
- void *bsp_priv;
- int has_gmac4;
- bool tso_en;
-};
-
-Where:
- o phy_bus_name: phy bus name to attach to the stmmac.
- o bus_id: bus identifier.
- o phy_addr: the physical address can be passed from the platform.
- If it is set to -1 the driver will automatically
- detect it at run-time by probing all the 32 addresses.
- o interface: PHY device's interface.
- o mdio_bus_data: specific platform fields for the MDIO bus.
- o dma_cfg: internal DMA parameters
- o pbl: the Programmable Burst Length is maximum number of beats to
- be transferred in one DMA transaction.
- GMAC also enables the 4xPBL by default. (8xPBL for GMAC 3.50 and newer)
- o txpbl/rxpbl: GMAC and newer supports independent DMA pbl for tx/rx.
- o pblx8: Enable 8xPBL (4xPBL for core rev < 3.50). Enabled by default.
- o fixed_burst/mixed_burst/aal
- o clk_csr: fixed CSR Clock range selection.
- o has_gmac: uses the GMAC core.
- o enh_desc: if sets the MAC will use the enhanced descriptor structure.
- o tx_coe: core is able to perform the tx csum in HW.
- o rx_coe: the supports three check sum offloading engine types:
- type_1, type_2 (full csum) and no RX coe.
- o bugged_jumbo: some HWs are not able to perform the csum in HW for
- over-sized frames due to limited buffer sizes.
- Setting this flag the csum will be done in SW on
- JUMBO frames.
- o pmt: core has the embedded power module (optional).
- o force_sf_dma_mode: force DMA to use the Store and Forward mode
- instead of the Threshold.
- o force_thresh_dma_mode: force DMA to use the Threshold mode other than
- the Store and Forward mode.
- o riwt_off: force to disable the RX watchdog feature and switch to NAPI mode.
- o fix_mac_speed: this callback is used for modifying some syscfg registers
- (on ST SoCs) according to the link speed negotiated by the
- physical layer .
- o bus_setup: perform HW setup of the bus. For example, on some ST platforms
- this field is used to configure the AMBA bridge to generate more
- efficient STBus traffic.
- o init/exit: callbacks used for calling a custom initialization;
- this is sometime necessary on some platforms (e.g. ST boxes)
- where the HW needs to have set some PIO lines or system cfg
- registers. init/exit callbacks should not use or modify
- platform data.
- o bsp_priv: another private pointer.
- o has_gmac4: uses GMAC4 core.
- o tso_en: Enables TSO (TCP Segmentation Offload) feature.
-
-For MDIO bus The we have:
-
- struct stmmac_mdio_bus_data {
- int (*phy_reset)(void *priv);
- unsigned int phy_mask;
- int *irqs;
- int probed_phy_irq;
- };
-
-Where:
- o phy_reset: hook to reset the phy device attached to the bus.
- o phy_mask: phy mask passed when register the MDIO bus within the driver.
- o irqs: list of IRQs, one per PHY.
- o probed_phy_irq: if irqs is NULL, use this for probed PHY.
-
-For DMA engine we have the following internal fields that should be
-tuned according to the HW capabilities.
-
-struct stmmac_dma_cfg {
- int pbl;
- int txpbl;
- int rxpbl;
- bool pblx8;
- int fixed_burst;
- int mixed_burst;
- bool aal;
-};
-
-Where:
- o pbl: Programmable Burst Length (tx and rx)
- o txpbl: Transmit Programmable Burst Length. Only for GMAC and newer.
- If set, DMA tx will use this value rather than pbl.
- o rxpbl: Receive Programmable Burst Length. Only for GMAC and newer.
- If set, DMA rx will use this value rather than pbl.
- o pblx8: Enable 8xPBL (4xPBL for core rev < 3.50). Enabled by default.
- o fixed_burst: program the DMA to use the fixed burst mode
- o mixed_burst: program the DMA to use the mixed burst mode
- o aal: Address-Aligned Beats
-
----
-
-Below an example how the structures above are using on ST platforms.
-
- static struct plat_stmmacenet_data stxYYY_ethernet_platform_data = {
- .has_gmac = 0,
- .enh_desc = 0,
- .fix_mac_speed = stxYYY_ethernet_fix_mac_speed,
- |
- |-> to write an internal syscfg
- | on this platform when the
- | link speed changes from 10 to
- | 100 and viceversa
- .init = &stmmac_claim_resource,
- |
- |-> On ST SoC this calls own "PAD"
- | manager framework to claim
- | all the resources necessary
- | (GPIO ...). The .custom_cfg field
- | is used to pass a custom config.
-};
-
-Below the usage of the stmmac_mdio_bus_data: on this SoC, in fact,
-there are two MAC cores: one MAC is for MDIO Bus/PHY emulation
-with fixed_link support.
-
-static struct stmmac_mdio_bus_data stmmac1_mdio_bus = {
- .phy_reset = phy_reset;
- |
- |-> function to provide the phy_reset on this board
- .phy_mask = 0,
-};
-
-static struct fixed_phy_status stmmac0_fixed_phy_status = {
- .link = 1,
- .speed = 100,
- .duplex = 1,
-};
-
-During the board's device_init we can configure the first
-MAC for fixed_link by calling:
- fixed_phy_add(PHY_POLL, 1, &stmmac0_fixed_phy_status);
-and the second one, with a real PHY device attached to the bus,
-by using the stmmac_mdio_bus_data structure (to provide the id, the
-reset procedure etc).
-
-Note that, starting from new chips, where it is available the HW capability
-register, many configurations are discovered at run-time for example to
-understand if EEE, HW csum, PTP, enhanced descriptor etc are actually
-available. As strategy adopted in this driver, the information from the HW
-capability register can replace what has been passed from the platform.
-
-4.10) Device-tree support.
-
-Please see the following document:
- Documentation/devicetree/bindings/net/stmmac.txt
-
-4.11) This is a summary of the content of some relevant files:
- o stmmac_main.c: implements the main network device driver;
- o stmmac_mdio.c: provides MDIO functions;
- o stmmac_pci: this is the PCI driver;
- o stmmac_platform.c: this the platform driver (OF supported);
- o stmmac_ethtool.c: implements the ethtool support;
- o stmmac.h: private driver structure;
- o common.h: common definitions and VFTs;
- o mmc_core.c/mmc.h: Management MAC Counters;
- o stmmac_hwtstamp.c: HW timestamp support for PTP;
- o stmmac_ptp.c: PTP 1588 clock;
- o stmmac_pcs.h: Physical Coding Sublayer common implementation;
- o dwmac-<XXX>.c: these are for the platform glue-logic file; e.g. dwmac-sti.c
- for STMicroelectronics SoCs.
-
-- GMAC 3.x
- o descs.h: descriptor structure definitions;
- o dwmac1000_core.c: dwmac GiGa core functions;
- o dwmac1000_dma.c: dma functions for the GMAC chip;
- o dwmac1000.h: specific header file for the dwmac GiGa;
- o dwmac100_core: dwmac 100 core code;
- o dwmac100_dma.c: dma functions for the dwmac 100 chip;
- o dwmac1000.h: specific header file for the MAC;
- o dwmac_lib.c: generic DMA functions;
- o enh_desc.c: functions for handling enhanced descriptors;
- o norm_desc.c: functions for handling normal descriptors;
- o chain_mode.c/ring_mode.c:: functions to manage RING/CHAINED modes;
-
-- GMAC4.x generation
- o dwmac4_core.c: dwmac GMAC4.x core functions;
- o dwmac4_desc.c: functions for handling GMAC4.x descriptors;
- o dwmac4_descs.h: descriptor definitions;
- o dwmac4_dma.c: dma functions for the GMAC4.x chip;
- o dwmac4_dma.h: dma definitions for the GMAC4.x chip;
- o dwmac4.h: core definitions for the GMAC4.x chip;
- o dwmac4_lib.c: generic GMAC4.x functions;
-
-4.12) TSO support (GMAC4.x)
-
-TSO (Tcp Segmentation Offload) feature is supported by GMAC 4.x chip family.
-When a packet is sent through TCP protocol, the TCP stack ensures that
-the SKB provided to the low level driver (stmmac in our case) matches with
-the maximum frame len (IP header + TCP header + payload <= 1500 bytes (for
-MTU set to 1500)). It means that if an application using TCP want to send a
-packet which will have a length (after adding headers) > 1514 the packet
-will be split in several TCP packets: The data payload is split and headers
-(TCP/IP ..) are added. It is done by software.
-
-When TSO is enabled, the TCP stack doesn't care about the maximum frame
-length and provide SKB packet to stmmac as it is. The GMAC IP will have to
-perform the segmentation by it self to match with maximum frame length.
-
-This feature can be enabled in device tree through "snps,tso" entry.
-
-5) Debug Information
-
-The driver exports many information i.e. internal statistics,
-debug information, MAC and DMA registers etc.
-
-These can be read in several ways depending on the
-type of the information actually needed.
-
-For example a user can be use the ethtool support
-to get statistics: e.g. using: ethtool -S ethX
-(that shows the Management counters (MMC) if supported)
-or sees the MAC/DMA registers: e.g. using: ethtool -d ethX
-
-Compiling the Kernel with CONFIG_DEBUG_FS the driver will export the following
-debugfs entries:
-
-/sys/kernel/debug/stmmaceth/descriptors_status
- To show the DMA TX/RX descriptor rings
-
-Developer can also use the "debug" module parameter to get further debug
-information (please see: NETIF Msg Level).
-
-6) Energy Efficient Ethernet
-
-Energy Efficient Ethernet(EEE) enables IEEE 802.3 MAC sublayer along
-with a family of Physical layer to operate in the Low power Idle(LPI)
-mode. The EEE mode supports the IEEE 802.3 MAC operation at 100Mbps,
-1000Mbps & 10Gbps.
-
-The LPI mode allows power saving by switching off parts of the
-communication device functionality when there is no data to be
-transmitted & received. The system on both the side of the link can
-disable some functionalities & save power during the period of low-link
-utilization. The MAC controls whether the system should enter or exit
-the LPI mode & communicate this to PHY.
-
-As soon as the interface is opened, the driver verifies if the EEE can
-be supported. This is done by looking at both the DMA HW capability
-register and the PHY devices MCD registers.
-To enter in Tx LPI mode the driver needs to have a software timer
-that enable and disable the LPI mode when there is nothing to be
-transmitted.
-
-7) Precision Time Protocol (PTP)
-The driver supports the IEEE 1588-2002, Precision Time Protocol (PTP),
-which enables precise synchronization of clocks in measurement and
-control systems implemented with technologies such as network
-communication.
-
-In addition to the basic timestamp features mentioned in IEEE 1588-2002
-Timestamps, new GMAC cores support the advanced timestamp features.
-IEEE 1588-2008 that can be enabled when configure the Kernel.
-
-8) SGMII/RGMII support
-New GMAC devices provide own way to manage RGMII/SGMII.
-This information is available at run-time by looking at the
-HW capability register. This means that the stmmac can manage
-auto-negotiation and link status w/o using the PHYLIB stuff.
-In fact, the HW provides a subset of extended registers to
-restart the ANE, verify Full/Half duplex mode and Speed.
-Thanks to these registers, it is possible to look at the
-Auto-negotiated Link Parter Ability.
diff --git a/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt b/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt
index 5c8cee17fca9..12855ab268b8 100644
--- a/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt
+++ b/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt
@@ -39,7 +39,7 @@ but without enabling "switch" mode, or to different bridges.
Devlink configuration parameters
====================
-See Documentation/networking/devlink-params-ti-cpsw-switch.txt
+See Documentation/networking/devlink/ti-cpsw-switch.rst
====================
# Bridging in dual mac mode
diff --git a/Documentation/networking/devlink-health.txt b/Documentation/networking/devlink-health.txt
deleted file mode 100644
index 1db3fbea0831..000000000000
--- a/Documentation/networking/devlink-health.txt
+++ /dev/null
@@ -1,86 +0,0 @@
-The health mechanism is targeted for Real Time Alerting, in order to know when
-something bad had happened to a PCI device
-- Provide alert debug information
-- Self healing
-- If problem needs vendor support, provide a way to gather all needed debugging
- information.
-
-The main idea is to unify and centralize driver health reports in the
-generic devlink instance and allow the user to set different
-attributes of the health reporting and recovery procedures.
-
-The devlink health reporter:
-Device driver creates a "health reporter" per each error/health type.
-Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
-or unknown (driver specific).
-For each registered health reporter a driver can issue error/health reports
-asynchronously. All health reports handling is done by devlink.
-Device driver can provide specific callbacks for each "health reporter", e.g.
- - Recovery procedures
- - Diagnostics and object dump procedures
- - OOB initial parameters
-Different parts of the driver can register different types of health reporters
-with different handlers.
-
-Once an error is reported, devlink health will do the following actions:
- * A log is being send to the kernel trace events buffer
- * Health status and statistics are being updated for the reporter instance
- * Object dump is being taken and saved at the reporter instance (as long as
- there is no other dump which is already stored)
- * Auto recovery attempt is being done. Depends on:
- - Auto-recovery configuration
- - Grace period vs. time passed since last recover
-
-The user interface:
-User can access/change each reporter's parameters and driver specific callbacks
-via devlink, e.g per error type (per health reporter)
- - Configure reporter's generic parameters (like: disable/enable auto recovery)
- - Invoke recovery procedure
- - Run diagnostics
- - Object dump
-
-The devlink health interface (via netlink):
-DEVLINK_CMD_HEALTH_REPORTER_GET
- Retrieves status and configuration info per DEV and reporter.
-DEVLINK_CMD_HEALTH_REPORTER_SET
- Allows reporter-related configuration setting.
-DEVLINK_CMD_HEALTH_REPORTER_RECOVER
- Triggers a reporter's recovery procedure.
-DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE
- Retrieves diagnostics data from a reporter on a device.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET
- Retrieves the last stored dump. Devlink health
- saves a single dump. If an dump is not already stored by the devlink
- for this reporter, devlink generates a new dump.
- dump output is defined by the reporter.
-DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR
- Clears the last saved dump file for the specified reporter.
-
-
- netlink
- +--------------------------+
- | |
- | + |
- | | |
- +--------------------------+
- |request for ops
- |(diagnose,
- mlx5_core devlink |recover,
- |dump)
-+--------+ +--------------------------+
-| | | reporter| |
-| | | +---------v----------+ |
-| | ops execution | | | |
-| <----------------------------------+ | |
-| | | | | |
-| | | + ^------------------+ |
-| | | | request for ops |
-| | | | (recover, dump) |
-| | | | |
-| | | +-+------------------+ |
-| | health report | | health handler | |
-| +-------------------------------> | |
-| | | +--------------------+ |
-| | health reporter create | |
-| +----------------------------> |
-+--------+ +--------------------------+
diff --git a/Documentation/networking/devlink-info-versions.rst b/Documentation/networking/devlink-info-versions.rst
deleted file mode 100644
index 4914f581b1fd..000000000000
--- a/Documentation/networking/devlink-info-versions.rst
+++ /dev/null
@@ -1,64 +0,0 @@
-.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-
-=====================
-Devlink info versions
-=====================
-
-board.id
-========
-
-Unique identifier of the board design.
-
-board.rev
-=========
-
-Board design revision.
-
-asic.id
-=======
-
-ASIC design identifier.
-
-asic.rev
-========
-
-ASIC design revision.
-
-board.manufacture
-=================
-
-An identifier of the company or the facility which produced the part.
-
-fw
-==
-
-Overall firmware version, often representing the collection of
-fw.mgmt, fw.app, etc.
-
-fw.mgmt
-=======
-
-Control unit firmware version. This firmware is responsible for house
-keeping tasks, PHY control etc. but not the packet-by-packet data path
-operation.
-
-fw.app
-======
-
-Data path microcode controlling high-speed packet processing.
-
-fw.undi
-=======
-
-UNDI software, may include the UEFI driver, firmware or both.
-
-fw.ncsi
-=======
-
-Version of the software responsible for supporting/handling the
-Network Controller Sideband Interface.
-
-fw.psid
-=======
-
-Unique identifier of the firmware parameter set.
diff --git a/Documentation/networking/devlink-params-bnxt.txt b/Documentation/networking/devlink-params-bnxt.txt
deleted file mode 100644
index 481aa303d5b4..000000000000
--- a/Documentation/networking/devlink-params-bnxt.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-enable_sriov [DEVICE, GENERIC]
- Configuration mode: Permanent
-
-ignore_ari [DEVICE, GENERIC]
- Configuration mode: Permanent
-
-msix_vec_per_pf_max [DEVICE, GENERIC]
- Configuration mode: Permanent
-
-msix_vec_per_pf_min [DEVICE, GENERIC]
- Configuration mode: Permanent
-
-gre_ver_check [DEVICE, DRIVER-SPECIFIC]
- Generic Routing Encapsulation (GRE) version check will
- be enabled in the device. If disabled, device skips
- version checking for incoming packets.
- Type: Boolean
- Configuration mode: Permanent
diff --git a/Documentation/networking/devlink-params-mlx5.txt b/Documentation/networking/devlink-params-mlx5.txt
deleted file mode 100644
index 5071467118bd..000000000000
--- a/Documentation/networking/devlink-params-mlx5.txt
+++ /dev/null
@@ -1,17 +0,0 @@
-flow_steering_mode [DEVICE, DRIVER-SPECIFIC]
- Controls the flow steering mode of the driver.
- Two modes are supported:
- 1. 'dmfs' - Device managed flow steering.
- 2. 'smfs - Software/Driver managed flow steering.
- In DMFS mode, the HW steering entities are created and
- managed through the Firmware.
- In SMFS mode, the HW steering entities are created and
- managed though by the driver directly into Hardware
- without firmware intervention.
- Type: String
- Configuration mode: runtime
-
-enable_roce [DEVICE, GENERIC]
- Enable handling of RoCE traffic in the device.
- Defaultly enabled.
- Configuration mode: driverinit
diff --git a/Documentation/networking/devlink-params-mlxsw.txt b/Documentation/networking/devlink-params-mlxsw.txt
deleted file mode 100644
index c63ea9fc7009..000000000000
--- a/Documentation/networking/devlink-params-mlxsw.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-fw_load_policy [DEVICE, GENERIC]
- Configuration mode: driverinit
-
-acl_region_rehash_interval [DEVICE, DRIVER-SPECIFIC]
- Sets an interval for periodic ACL region rehashes.
- The value is in milliseconds, minimal value is "3000".
- Value "0" disables the periodic work.
- The first rehash will be run right after value is set.
- Type: u32
- Configuration mode: runtime
diff --git a/Documentation/networking/devlink-params-mv88e6xxx.txt b/Documentation/networking/devlink-params-mv88e6xxx.txt
deleted file mode 100644
index 21c4b3556ef2..000000000000
--- a/Documentation/networking/devlink-params-mv88e6xxx.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-ATU_hash [DEVICE, DRIVER-SPECIFIC]
- Select one of four possible hashing algorithms for
- MAC addresses in the Address Translation Unit.
- A value of 3 seems to work better than the default of
- 1 when many MAC addresses have the same OUI.
- Configuration mode: runtime
- Type: u8. 0-3 valid.
diff --git a/Documentation/networking/devlink-params-nfp.txt b/Documentation/networking/devlink-params-nfp.txt
deleted file mode 100644
index 43e4d4034865..000000000000
--- a/Documentation/networking/devlink-params-nfp.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-fw_load_policy [DEVICE, GENERIC]
- Configuration mode: permanent
-
-reset_dev_on_drv_probe [DEVICE, GENERIC]
- Configuration mode: permanent
diff --git a/Documentation/networking/devlink-params-ti-cpsw-switch.txt b/Documentation/networking/devlink-params-ti-cpsw-switch.txt
deleted file mode 100644
index 4037458499f7..000000000000
--- a/Documentation/networking/devlink-params-ti-cpsw-switch.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-ale_bypass [DEVICE, DRIVER-SPECIFIC]
- Allows to enable ALE_CONTROL(4).BYPASS mode for debug purposes.
- All packets will be sent to the Host port only if enabled.
- Type: bool
- Configuration mode: runtime
-
-switch_mode [DEVICE, DRIVER-SPECIFIC]
- Enable switch mode
- Type: bool
- Configuration mode: runtime
diff --git a/Documentation/networking/devlink-params.txt b/Documentation/networking/devlink-params.txt
deleted file mode 100644
index 04e234e9acc9..000000000000
--- a/Documentation/networking/devlink-params.txt
+++ /dev/null
@@ -1,71 +0,0 @@
-Devlink configuration parameters
-================================
-Following is the list of configuration parameters via devlink interface.
-Each parameter can be generic or driver specific and are device level
-parameters.
-
-Note that the driver-specific files should contain the generic params
-they support to, with supported config modes.
-
-Each parameter can be set in different configuration modes:
- runtime - set while driver is running, no reset required.
- driverinit - applied while driver initializes, requires restart
- driver by devlink reload command.
- permanent - written to device's non-volatile memory, hard reset
- required.
-
-Following is the list of parameters:
-====================================
-enable_sriov [DEVICE, GENERIC]
- Enable Single Root I/O Virtualisation (SRIOV) in
- the device.
- Type: Boolean
-
-ignore_ari [DEVICE, GENERIC]
- Ignore Alternative Routing-ID Interpretation (ARI)
- capability. If enabled, adapter will ignore ARI
- capability even when platforms has the support
- enabled and creates same number of partitions when
- platform does not support ARI.
- Type: Boolean
-
-msix_vec_per_pf_max [DEVICE, GENERIC]
- Provides the maximum number of MSIX interrupts that
- a device can create. Value is same across all
- physical functions (PFs) in the device.
- Type: u32
-
-msix_vec_per_pf_min [DEVICE, GENERIC]
- Provides the minimum number of MSIX interrupts required
- for the device initialization. Value is same across all
- physical functions (PFs) in the device.
- Type: u32
-
-fw_load_policy [DEVICE, GENERIC]
- Controls the device's firmware loading policy.
- Valid values:
- * DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER (0)
- Load firmware version preferred by the driver.
- * DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH (1)
- Load firmware currently stored in flash.
- * DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK (2)
- Load firmware currently available on host's disk.
- Type: u8
-
-reset_dev_on_drv_probe [DEVICE, GENERIC]
- Controls the device's reset policy on driver probe.
- Valid values:
- * DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN (0)
- Unknown or invalid value.
- * DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS (1)
- Always reset device on driver probe.
- * DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER (2)
- Never reset device on driver probe.
- * DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK (3)
- Reset only if device firmware can be found in the
- filesystem.
- Type: u8
-
-enable_roce [DEVICE, GENERIC]
- Enable handling of RoCE traffic in the device.
- Type: Boolean
diff --git a/Documentation/networking/devlink-trap-netdevsim.rst b/Documentation/networking/devlink-trap-netdevsim.rst
deleted file mode 100644
index b721c9415473..000000000000
--- a/Documentation/networking/devlink-trap-netdevsim.rst
+++ /dev/null
@@ -1,20 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-======================
-Devlink Trap netdevsim
-======================
-
-Driver-specific Traps
-=====================
-
-.. list-table:: List of Driver-specific Traps Registered by ``netdevsim``
- :widths: 5 5 90
-
- * - Name
- - Type
- - Description
- * - ``fid_miss``
- - ``exception``
- - When a packet enters the device it is classified to a filtering
- indentifier (FID) based on the ingress port and VLAN. This trap is used
- to trap packets for which a FID could not be found
diff --git a/Documentation/networking/devlink/bnxt.rst b/Documentation/networking/devlink/bnxt.rst
new file mode 100644
index 000000000000..79e746d22a53
--- /dev/null
+++ b/Documentation/networking/devlink/bnxt.rst
@@ -0,0 +1,41 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+bnxt devlink support
+====================
+
+This document describes the devlink features implemented by the ``bnxt``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+ * - Name
+ - Mode
+ * - ``enable_sriov``
+ - Permanent
+ * - ``ignore_ari``
+ - Permanent
+ * - ``msix_vec_per_pf_max``
+ - Permanent
+ * - ``msix_vec_per_pf_min``
+ - Permanent
+
+The ``bnxt`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``gre_ver_check``
+ - Boolean
+ - Permanent
+ - Generic Routing Encapsulation (GRE) version check will be enabled in
+ the device. If disabled, the device will skip the version check for
+ incoming packets.
diff --git a/Documentation/networking/devlink/devlink-dpipe.rst b/Documentation/networking/devlink/devlink-dpipe.rst
new file mode 100644
index 000000000000..468fe1001b74
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-dpipe.rst
@@ -0,0 +1,252 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+Devlink DPIPE
+=============
+
+Background
+==========
+
+While performing the hardware offloading process, much of the hardware
+specifics cannot be presented. These details are useful for debugging, and
+``devlink-dpipe`` provides a standardized way to provide visibility into the
+offloading process.
+
+For example, the routing longest prefix match (LPM) algorithm used by the
+Linux kernel may differ from the hardware implementation. The pipeline debug
+API (DPIPE) is aimed at providing the user visibility into the ASIC's
+pipeline in a generic way.
+
+The hardware offload process is expected to be done in a way that the user
+should not be able to distinguish between the hardware vs. software
+implementation. In this process, hardware specifics are neglected. In
+reality those details can have lots of meaning and should be exposed in some
+standard way.
+
+This problem is made even more complex when one wishes to offload the
+control path of the whole networking stack to a switch ASIC. Due to
+differences in the hardware and software models some processes cannot be
+represented correctly.
+
+One example is the kernel's LPM algorithm which in many cases differs
+greatly to the hardware implementation. The configuration API is the same,
+but one cannot rely on the Forward Information Base (FIB) to look like the
+Level Path Compression trie (LPC-trie) in hardware.
+
+In many situations trying to analyze systems failure solely based on the
+kernel's dump may not be enough. By combining this data with complementary
+information about the underlying hardware, this debugging can be made
+easier; additionally, the information can be useful when debugging
+performance issues.
+
+Overview
+========
+
+The ``devlink-dpipe`` interface closes this gap. The hardware's pipeline is
+modeled as a graph of match/action tables. Each table represents a specific
+hardware block. This model is not new, first being used by the P4 language.
+
+Traditionally it has been used as an alternative model for hardware
+configuration, but the ``devlink-dpipe`` interface uses it for visibility
+purposes as a standard complementary tool. The system's view from
+``devlink-dpipe`` should change according to the changes done by the
+standard configuration tools.
+
+For example, it’s quiet common to implement Access Control Lists (ACL)
+using Ternary Content Addressable Memory (TCAM). The TCAM memory can be
+divided into TCAM regions. Complex TC filters can have multiple rules with
+different priorities and different lookup keys. On the other hand hardware
+TCAM regions have a predefined lookup key. Offloading the TC filter rules
+using TCAM engine can result in multiple TCAM regions being interconnected
+in a chain (which may affect the data path latency). In response to a new TC
+filter new tables should be created describing those regions.
+
+Model
+=====
+
+The ``DPIPE`` model introduces several objects:
+
+ * headers
+ * tables
+ * entries
+
+A ``header`` describes packet formats and provides names for fields within
+the packet. A ``table`` describes hardware blocks. An ``entry`` describes
+the actual content of a specific table.
+
+The hardware pipeline is not port specific, but rather describes the whole
+ASIC. Thus it is tied to the top of the ``devlink`` infrastructure.
+
+Drivers can register and unregister tables at run time, in order to support
+dynamic behavior. This dynamic behavior is mandatory for describing hardware
+blocks like TCAM regions which can be allocated and freed dynamically.
+
+``devlink-dpipe`` generally is not intended for configuration. The exception
+is hardware counting for a specific table.
+
+The following commands are used to obtain the ``dpipe`` objects from
+userspace:
+
+ * ``table_get``: Receive a table's description.
+ * ``headers_get``: Receive a device's supported headers.
+ * ``entries_get``: Receive a table's current entries.
+ * ``counters_set``: Enable or disable counters on a table.
+
+Table
+-----
+
+The driver should implement the following operations for each table:
+
+ * ``matches_dump``: Dump the supported matches.
+ * ``actions_dump``: Dump the supported actions.
+ * ``entries_dump``: Dump the actual content of the table.
+ * ``counters_set_update``: Synchronize hardware with counters enabled or
+ disabled.
+
+Header/Field
+------------
+
+In a similar way to P4 headers and fields are used to describe a table's
+behavior. There is a slight difference between the standard protocol headers
+and specific ASIC metadata. The protocol headers should be declared in the
+``devlink`` core API. On the other hand ASIC meta data is driver specific
+and should be defined in the driver. Additionally, each driver-specific
+devlink documentation file should document the driver-specific ``dpipe``
+headers it implements. The headers and fields are identified by enumeration.
+
+In order to provide further visibility some ASIC metadata fields could be
+mapped to kernel objects. For example, internal router interface indexes can
+be directly mapped to the net device ifindex. FIB table indexes used by
+different Virtual Routing and Forwarding (VRF) tables can be mapped to
+internal routing table indexes.
+
+Match
+-----
+
+Matches are kept primitive and close to hardware operation. Match types like
+LPM are not supported due to the fact that this is exactly a process we wish
+to describe in full detail. Example of matches:
+
+ * ``field_exact``: Exact match on a specific field.
+ * ``field_exact_mask``: Exact match on a specific field after masking.
+ * ``field_range``: Match on a specific range.
+
+The id's of the header and the field should be specified in order to
+identify the specific field. Furthermore, the header index should be
+specified in order to distinguish multiple headers of the same type in a
+packet (tunneling).
+
+Action
+------
+
+Similar to match, the actions are kept primitive and close to hardware
+operation. For example:
+
+ * ``field_modify``: Modify the field value.
+ * ``field_inc``: Increment the field value.
+ * ``push_header``: Add a header.
+ * ``pop_header``: Remove a header.
+
+Entry
+-----
+
+Entries of a specific table can be dumped on demand. Each eentry is
+identified with an index and its properties are described by a list of
+match/action values and specific counter. By dumping the tables content the
+interactions between tables can be resolved.
+
+Abstraction Example
+===================
+
+The following is an example of the abstraction model of the L3 part of
+Mellanox Spectrum ASIC. The blocks are described in the order they appear in
+the pipeline. The table sizes in the following examples are not real
+hardware sizes and are provided for demonstration purposes.
+
+LPM
+---
+
+The LPM algorithm can be implemented as a list of hash tables. Each hash
+table contains routes with the same prefix length. The root of the list is
+/32, and in case of a miss the hardware will continue to the next hash
+table. The depth of the search will affect the data path latency.
+
+In case of a hit the entry contains information about the next stage of the
+pipeline which resolves the MAC address. The next stage can be either local
+host table for directly connected routes, or adjacency table for next-hops.
+The ``meta.lpm_prefix`` field is used to connect two LPM tables.
+
+.. code::
+
+ table lpm_prefix_16 {
+ size: 4096,
+ counters_enabled: true,
+ match: { meta.vr_id: exact,
+ ipv4.dst_addr: exact_mask,
+ ipv6.dst_addr: exact_mask,
+ meta.lpm_prefix: exact },
+ action: { meta.adj_index: set,
+ meta.adj_group_size: set,
+ meta.rif_port: set,
+ meta.lpm_prefix: set },
+ }
+
+Local Host
+----------
+
+In the case of local routes the LPM lookup already resolves the egress
+router interface (RIF), yet the exact MAC address is not known. The local
+host table is a hash table combining the output interface id with
+destination IP address as a key. The result is the MAC address.
+
+.. code::
+
+ table local_host {
+ size: 4096,
+ counters_enabled: true,
+ match: { meta.rif_port: exact,
+ ipv4.dst_addr: exact},
+ action: { ethernet.daddr: set }
+ }
+
+Adjacency
+---------
+
+In case of remote routes this table does the ECMP. The LPM lookup results in
+ECMP group size and index that serves as a global offset into this table.
+Concurrently a hash of the packet is generated. Based on the ECMP group size
+and the packet's hash a local offset is generated. Multiple LPM entries can
+point to the same adjacency group.
+
+.. code::
+
+ table adjacency {
+ size: 4096,
+ counters_enabled: true,
+ match: { meta.adj_index: exact,
+ meta.adj_group_size: exact,
+ meta.packet_hash_index: exact },
+ action: { ethernet.daddr: set,
+ meta.erif: set }
+ }
+
+ERIF
+----
+
+In case the egress RIF and destination MAC have been resolved by previous
+tables this table does multiple operations like TTL decrease and MTU check.
+Then the decision of forward/drop is taken and the port L3 statistics are
+updated based on the packet's type (broadcast, unicast, multicast).
+
+.. code::
+
+ table erif {
+ size: 800,
+ counters_enabled: true,
+ match: { meta.rif_port: exact,
+ meta.is_l3_unicast: exact,
+ meta.is_l3_broadcast: exact,
+ meta.is_l3_multicast, exact },
+ action: { meta.l3_drop: set,
+ meta.l3_forward: set }
+ }
diff --git a/Documentation/networking/devlink/devlink-health.rst b/Documentation/networking/devlink/devlink-health.rst
new file mode 100644
index 000000000000..0c99b11f05f9
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-health.rst
@@ -0,0 +1,114 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Devlink Health
+==============
+
+Background
+==========
+
+The ``devlink`` health mechanism is targeted for Real Time Alerting, in
+order to know when something bad happened to a PCI device.
+
+ * Provide alert debug information.
+ * Self healing.
+ * If problem needs vendor support, provide a way to gather all needed
+ debugging information.
+
+Overview
+========
+
+The main idea is to unify and centralize driver health reports in the
+generic ``devlink`` instance and allow the user to set different
+attributes of the health reporting and recovery procedures.
+
+The ``devlink`` health reporter:
+Device driver creates a "health reporter" per each error/health type.
+Error/Health type can be a known/generic (eg pci error, fw error, rx/tx error)
+or unknown (driver specific).
+For each registered health reporter a driver can issue error/health reports
+asynchronously. All health reports handling is done by ``devlink``.
+Device driver can provide specific callbacks for each "health reporter", e.g.:
+
+ * Recovery procedures
+ * Diagnostics procedures
+ * Object dump procedures
+ * OOB initial parameters
+
+Different parts of the driver can register different types of health reporters
+with different handlers.
+
+Actions
+=======
+
+Once an error is reported, devlink health will perform the following actions:
+
+ * A log is being send to the kernel trace events buffer
+ * Health status and statistics are being updated for the reporter instance
+ * Object dump is being taken and saved at the reporter instance (as long as
+ there is no other dump which is already stored)
+ * Auto recovery attempt is being done. Depends on:
+ - Auto-recovery configuration
+ - Grace period vs. time passed since last recover
+
+User Interface
+==============
+
+User can access/change each reporter's parameters and driver specific callbacks
+via ``devlink``, e.g per error type (per health reporter):
+
+ * Configure reporter's generic parameters (like: disable/enable auto recovery)
+ * Invoke recovery procedure
+ * Run diagnostics
+ * Object dump
+
+.. list-table:: List of devlink health interfaces
+ :widths: 10 90
+
+ * - Name
+ - Description
+ * - ``DEVLINK_CMD_HEALTH_REPORTER_GET``
+ - Retrieves status and configuration info per DEV and reporter.
+ * - ``DEVLINK_CMD_HEALTH_REPORTER_SET``
+ - Allows reporter-related configuration setting.
+ * - ``DEVLINK_CMD_HEALTH_REPORTER_RECOVER``
+ - Triggers a reporter's recovery procedure.
+ * - ``DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE``
+ - Retrieves diagnostics data from a reporter on a device.
+ * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET``
+ - Retrieves the last stored dump. Devlink health
+ saves a single dump. If an dump is not already stored by the devlink
+ for this reporter, devlink generates a new dump.
+ dump output is defined by the reporter.
+ * - ``DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR``
+ - Clears the last saved dump file for the specified reporter.
+
+The following diagram provides a general overview of ``devlink-health``::
+
+ netlink
+ +--------------------------+
+ | |
+ | + |
+ | | |
+ +--------------------------+
+ |request for ops
+ |(diagnose,
+ mlx5_core devlink |recover,
+ |dump)
+ +--------+ +--------------------------+
+ | | | reporter| |
+ | | | +---------v----------+ |
+ | | ops execution | | | |
+ | <----------------------------------+ | |
+ | | | | | |
+ | | | + ^------------------+ |
+ | | | | request for ops |
+ | | | | (recover, dump) |
+ | | | | |
+ | | | +-+------------------+ |
+ | | health report | | health handler | |
+ | +-------------------------------> | |
+ | | | +--------------------+ |
+ | | health reporter create | |
+ | +----------------------------> |
+ +--------+ +--------------------------+
diff --git a/Documentation/networking/devlink/devlink-info.rst b/Documentation/networking/devlink/devlink-info.rst
new file mode 100644
index 000000000000..0385f15028b1
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-info.rst
@@ -0,0 +1,94 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+============
+Devlink Info
+============
+
+The ``devlink-info`` mechanism enables device drivers to report device
+information in a generic fashion. It is extensible, and enables exporting
+even device or driver specific information.
+
+devlink supports representing the following types of versions
+
+.. list-table:: List of version types
+ :widths: 5 95
+
+ * - Type
+ - Description
+ * - ``fixed``
+ - Represents fixed versions, which cannot change. For example,
+ component identifiers or the board version reported in the PCI VPD.
+ * - ``running``
+ - Represents the version of the currently running component. For
+ example the running version of firmware. These versions generally
+ only update after a reboot.
+ * - ``stored``
+ - Represents the version of a component as stored, such as after a
+ flash update. Stored values should update to reflect changes in the
+ flash even if a reboot has not yet occurred.
+
+Generic Versions
+================
+
+It is expected that drivers use the following generic names for exporting
+version information. Other information may be exposed using driver-specific
+names, but these should be documented in the driver-specific file.
+
+board.id
+--------
+
+Unique identifier of the board design.
+
+board.rev
+---------
+
+Board design revision.
+
+asic.id
+-------
+
+ASIC design identifier.
+
+asic.rev
+--------
+
+ASIC design revision.
+
+board.manufacture
+-----------------
+
+An identifier of the company or the facility which produced the part.
+
+fw
+--
+
+Overall firmware version, often representing the collection of
+fw.mgmt, fw.app, etc.
+
+fw.mgmt
+-------
+
+Control unit firmware version. This firmware is responsible for house
+keeping tasks, PHY control etc. but not the packet-by-packet data path
+operation.
+
+fw.app
+------
+
+Data path microcode controlling high-speed packet processing.
+
+fw.undi
+-------
+
+UNDI software, may include the UEFI driver, firmware or both.
+
+fw.ncsi
+-------
+
+Version of the software responsible for supporting/handling the
+Network Controller Sideband Interface.
+
+fw.psid
+-------
+
+Unique identifier of the firmware parameter set.
diff --git a/Documentation/networking/devlink/devlink-params.rst b/Documentation/networking/devlink/devlink-params.rst
new file mode 100644
index 000000000000..da2f85c0fa21
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-params.rst
@@ -0,0 +1,108 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Devlink Params
+==============
+
+``devlink`` provides capability for a driver to expose device parameters for low
+level device functionality. Since devlink can operate at the device-wide
+level, it can be used to provide configuration that may affect multiple
+ports on a single device.
+
+This document describes a number of generic parameters that are supported
+across multiple drivers. Each driver is also free to add their own
+parameters. Each driver must document the specific parameters they support,
+whether generic or not.
+
+Configuration modes
+===================
+
+Parameters may be set in different configuration modes.
+
+.. list-table:: Possible configuration modes
+ :widths: 5 90
+
+ * - Name
+ - Description
+ * - ``runtime``
+ - set while the driver is running, and takes effect immediately. No
+ reset is required.
+ * - ``driverinit``
+ - applied while the driver initializes. Requires the user to restart
+ the driver using the ``devlink`` reload command.
+ * - ``permanent``
+ - written to the device's non-volatile memory. A hard reset is required
+ for it to take effect.
+
+Reloading
+---------
+
+In order for ``driverinit`` parameters to take effect, the driver must
+support reloading via the ``devlink-reload`` command. This command will
+request a reload of the device driver.
+
+Generic configuration parameters
+================================
+The following is a list of generic configuration parameters that drivers may
+add. Use of generic parameters is preferred over each driver creating their
+own name.
+
+.. list-table:: List of generic parameters
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``enable_sriov``
+ - Boolean
+ - Enable Single Root I/O Virtualization (SRIOV) in the device.
+ * - ``ignore_ari``
+ - Boolean
+ - Ignore Alternative Routing-ID Interpretation (ARI) capability. If
+ enabled, the adapter will ignore ARI capability even when the
+ platform has support enabled. The device will create the same number
+ of partitions as when the platform does not support ARI.
+ * - ``msix_vec_per_pf_max``
+ - u32
+ - Provides the maximum number of MSI-X interrupts that a device can
+ create. Value is the same across all physical functions (PFs) in the
+ device.
+ * - ``msix_vec_per_pf_min``
+ - u32
+ - Provides the minimum number of MSI-X interrupts required for the
+ device to initialize. Value is the same across all physical functions
+ (PFs) in the device.
+ * - ``fw_load_policy``
+ - u8
+ - Control the device's firmware loading policy.
+ - ``DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DRIVER`` (0)
+ Load firmware version preferred by the driver.
+ - ``DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_FLASH`` (1)
+ Load firmware currently stored in flash.
+ - ``DEVLINK_PARAM_FW_LOAD_POLICY_VALUE_DISK`` (2)
+ Load firmware currently available on host's disk.
+ * - ``reset_dev_on_drv_probe``
+ - u8
+ - Controls the device's reset policy on driver probe.
+ - ``DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_UNKNOWN`` (0)
+ Unknown or invalid value.
+ - ``DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_ALWAYS`` (1)
+ Always reset device on driver probe.
+ - ``DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_NEVER`` (2)
+ Never reset device on driver probe.
+ - ``DEVLINK_PARAM_RESET_DEV_ON_DRV_PROBE_VALUE_DISK`` (3)
+ Reset the device only if firmware can be found in the filesystem.
+ * - ``enable_roce``
+ - Boolean
+ - Enable handling of RoCE traffic in the device.
+ * - ``internal_err_reset``
+ - Boolean
+ - When enabled, the device driver will reset the device on internal
+ errors.
+ * - ``max_macs``
+ - u32
+ - Specifies the maximum number of MAC addresses per ethernet port of
+ this device.
+ * - ``region_snapshot_enable``
+ - Boolean
+ - Enable capture of ``devlink-region`` snapshots.
diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
new file mode 100644
index 000000000000..1a7683e7acb2
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+Devlink Region
+==============
+
+``devlink`` regions enable access to driver defined address regions using
+devlink.
+
+Each device can create and register its own supported address regions. The
+region can then be accessed via the devlink region interface.
+
+Region snapshots are collected by the driver, and can be accessed via read
+or dump commands. This allows future analysis on the created snapshots.
+Regions may optionally support triggering snapshots on demand.
+
+The major benefit to creating a region is to provide access to internal
+address regions that are otherwise inaccessible to the user.
+
+Regions may also be used to provide an additional way to debug complex error
+states, but see also :doc:`devlink-health`
+
+example usage
+-------------
+
+.. code:: shell
+
+ $ devlink region help
+ $ devlink region show [ DEV/REGION ]
+ $ devlink region del DEV/REGION snapshot SNAPSHOT_ID
+ $ devlink region dump DEV/REGION [ snapshot SNAPSHOT_ID ]
+ $ devlink region read DEV/REGION [ snapshot SNAPSHOT_ID ]
+ address ADDRESS length length
+
+ # Show all of the exposed regions with region sizes:
+ $ devlink region show
+ pci/0000:00:05.0/cr-space: size 1048576 snapshot [1 2]
+ pci/0000:00:05.0/fw-health: size 64 snapshot [1 2]
+
+ # Delete a snapshot using:
+ $ devlink region del pci/0000:00:05.0/cr-space snapshot 1
+
+ # Trigger (request) a snapshot be taken:
+ $ devlink region trigger pci/0000:00:05.0/cr-space
+
+ # Dump a snapshot:
+ $ devlink region dump pci/0000:00:05.0/fw-health snapshot 1
+ 0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30
+ 0000000000000010 0000 0000 ffff ff04 0029 8c00 0028 8cc8
+ 0000000000000020 0016 0bb8 0016 1720 0000 0000 c00f 3ffc
+ 0000000000000030 bada cce5 bada cce5 bada cce5 bada cce5
+
+ # Read a specific part of a snapshot:
+ $ devlink region read pci/0000:00:05.0/fw-health snapshot 1 address 0
+ length 16
+ 0000000000000000 0014 95dc 0014 9514 0035 1670 0034 db30
+
+As regions are likely very device or driver specific, no generic regions are
+defined. See the driver-specific documentation files for information on the
+specific regions a driver supports.
diff --git a/Documentation/networking/devlink/devlink-resource.rst b/Documentation/networking/devlink/devlink-resource.rst
new file mode 100644
index 000000000000..93e92d2f0752
--- /dev/null
+++ b/Documentation/networking/devlink/devlink-resource.rst
@@ -0,0 +1,62 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Devlink Resource
+================
+
+``devlink`` provides the ability for drivers to register resources, which
+can allow administrators to see the device restrictions for a given
+resource, as well as how much of the given resource is currently
+in use. Additionally, these resources can optionally have configurable size.
+This could enable the administrator to limit the number of resources that
+are used.
+
+For example, the ``netdevsim`` driver enables ``/IPv4/fib`` and
+``/IPv4/fib-rules`` as resources to limit the number of IPv4 FIB entries and
+rules for a given device.
+
+Resource Ids
+============
+
+Each resource is represented by an id, and contains information about its
+current size and related sub resources. To access a sub resource, you
+specify the path of the resource. For example ``/IPv4/fib`` is the id for
+the ``fib`` sub-resource under the ``IPv4`` resource.
+
+example usage
+-------------
+
+The resources exposed by the driver can be observed, for example:
+
+.. code:: shell
+
+ $devlink resource show pci/0000:03:00.0
+ pci/0000:03:00.0:
+ name kvd size 245760 unit entry
+ resources:
+ name linear size 98304 occ 0 unit entry size_min 0 size_max 147456 size_gran 128
+ name hash_double size 60416 unit entry size_min 32768 size_max 180224 size_gran 128
+ name hash_single size 87040 unit entry size_min 65536 size_max 212992 size_gran 128
+
+Some resource's size can be changed. Examples:
+
+.. code:: shell
+
+ $devlink resource set pci/0000:03:00.0 path /kvd/hash_single size 73088
+ $devlink resource set pci/0000:03:00.0 path /kvd/hash_double size 74368
+
+The changes do not apply immediately, this can be validated by the 'size_new'
+attribute, which represents the pending change in size. For example:
+
+.. code:: shell
+
+ $devlink resource show pci/0000:03:00.0
+ pci/0000:03:00.0:
+ name kvd size 245760 unit entry size_valid false
+ resources:
+ name linear size 98304 size_new 147456 occ 0 unit entry size_min 0 size_max 147456 size_gran 128
+ name hash_double size 60416 unit entry size_min 32768 size_max 180224 size_gran 128
+ name hash_single size 87040 unit entry size_min 65536 size_max 212992 size_gran 128
+
+Note that changes in resource size may require a device reload to properly
+take effect.
diff --git a/Documentation/networking/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index 03311849bfb1..cbaa750de37d 100644
--- a/Documentation/networking/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -233,7 +233,7 @@ help debug packet drops caused by these exceptions. The following list includes
links to the description of driver-specific traps registered by various device
drivers:
- * :doc:`devlink-trap-netdevsim`
+ * :doc:`netdevsim`
Generic Packet Trap Groups
==========================
diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
new file mode 100644
index 000000000000..087ff54d53fc
--- /dev/null
+++ b/Documentation/networking/devlink/index.rst
@@ -0,0 +1,42 @@
+Linux Devlink Documentation
+===========================
+
+devlink is an API to expose device information and resources not directly
+related to any device class, such as chip-wide/switch-ASIC-wide configuration.
+
+Interface documentation
+-----------------------
+
+The following pages describe various interfaces available through devlink in
+general.
+
+.. toctree::
+ :maxdepth: 1
+
+ devlink-dpipe
+ devlink-health
+ devlink-info
+ devlink-params
+ devlink-region
+ devlink-resource
+ devlink-trap
+
+Driver-specific documentation
+-----------------------------
+
+Each driver that implements ``devlink`` is expected to document what
+parameters, info versions, and other features it supports.
+
+.. toctree::
+ :maxdepth: 1
+
+ bnxt
+ ionic
+ mlx4
+ mlx5
+ mlxsw
+ mv88e6xxx
+ netdevsim
+ nfp
+ qed
+ ti-cpsw-switch
diff --git a/Documentation/networking/devlink/ionic.rst b/Documentation/networking/devlink/ionic.rst
new file mode 100644
index 000000000000..48da9c92d584
--- /dev/null
+++ b/Documentation/networking/devlink/ionic.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+ionic devlink support
+=====================
+
+This document describes the devlink features implemented by the ``ionic``
+device driver.
+
+Info versions
+=============
+
+The ``ionic`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``fw``
+ - running
+ - Version of firmware running on the device
+ * - ``asic.id``
+ - fixed
+ - The ASIC type for this device
+ * - ``asic.rev``
+ - fixed
+ - The revision of the ASIC for this device
diff --git a/Documentation/networking/devlink/mlx4.rst b/Documentation/networking/devlink/mlx4.rst
new file mode 100644
index 000000000000..7b2d17ea5471
--- /dev/null
+++ b/Documentation/networking/devlink/mlx4.rst
@@ -0,0 +1,56 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+mlx4 devlink support
+====================
+
+This document describes the devlink features implemented by the ``mlx4``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+ * - Name
+ - Mode
+ * - ``internal_err_reset``
+ - driverinit, runtime
+ * - ``max_macs``
+ - driverinit
+ * - ``region_snapshot_enable``
+ - driverinit, runtime
+
+The ``mlx4`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``enable_64b_cqe_eqe``
+ - Boolean
+ - driverinit
+ - Enable 64 byte CQEs/EQEs, if the FW supports it.
+ * - ``enable_4k_uar``
+ - Boolean
+ - driverinit
+ - Enable using the 4k UAR.
+
+The ``mlx4`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
+
+Regions
+=======
+
+The ``mlx4`` driver supports dumping the firmware PCI crspace and health
+buffer during a critical firmware issue.
+
+In case a firmware command times out, firmware getting stuck, or a non zero
+value on the catastrophic buffer, a snapshot will be taken by the driver.
+
+The ``cr-space`` region will contain the firmware PCI crspace contents. The
+``fw-health`` region will contain the device firmware's health buffer.
+Snapshots for both of these regions are taken on the same event triggers.
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
new file mode 100644
index 000000000000..629a6e69c036
--- /dev/null
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -0,0 +1,59 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+mlx5 devlink support
+====================
+
+This document describes the devlink features implemented by the ``mlx5``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+ * - Name
+ - Mode
+ * - ``enable_roce``
+ - driverinit
+
+The ``mlx5`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``flow_steering_mode``
+ - string
+ - runtime
+ - Controls the flow steering mode of the driver
+
+ * ``dmfs`` Device managed flow steering. In DMFS mode, the HW
+ steering entities are created and managed through firmware.
+ * ``smfs`` Software managed flow steering. In SMFS mode, the HW
+ steering entities are created and manage through the driver without
+ firmware intervention.
+
+The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
+
+Info versions
+=============
+
+The ``mlx5`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``fw.psid``
+ - fixed
+ - Used to represent the board id of the device.
+ * - ``fw.version``
+ - stored, running
+ - Three digit major.minor.subminor firmware version number.
diff --git a/Documentation/networking/devlink/mlxsw.rst b/Documentation/networking/devlink/mlxsw.rst
new file mode 100644
index 000000000000..ccba9769c651
--- /dev/null
+++ b/Documentation/networking/devlink/mlxsw.rst
@@ -0,0 +1,59 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+mlxsw devlink support
+=====================
+
+This document describes the devlink features implemented by the ``mlxsw``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+ * - Name
+ - Mode
+ * - ``fw_load_policy``
+ - driverinit
+
+The ``mlxsw`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``acl_region_rehash_interval``
+ - u32
+ - runtime
+ - Sets an interval for periodic ACL region rehashes. The value is
+ specified in milliseconds, with a minimum of ``3000``. The value of
+ ``0`` disables periodic work entirely. The first rehash will be run
+ immediately after the value is set.
+
+The ``mlxsw`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
+
+Info versions
+=============
+
+The ``mlx5`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``hw.revision``
+ - fixed
+ - The hardware revision for this board
+ * - ``fw.psid``
+ - fixed
+ - Firmware PSID
+ * - ``fw.version``
+ - running
+ - Three digit firmware version
diff --git a/Documentation/networking/devlink/mv88e6xxx.rst b/Documentation/networking/devlink/mv88e6xxx.rst
new file mode 100644
index 000000000000..c621212a47a1
--- /dev/null
+++ b/Documentation/networking/devlink/mv88e6xxx.rst
@@ -0,0 +1,28 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+mv88e6xxx devlink support
+=========================
+
+This document describes the devlink features implemented by the ``mv88e6xxx``
+device driver.
+
+Parameters
+==========
+
+The ``mv88e6xxx`` driver implements the following driver-specific parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``ATU_hash``
+ - u8
+ - runtime
+ - Select one of four possible hashing algorithms for MAC addresses in
+ the Address Translation Unit. A value of 3 may work better than the
+ default of 1 when many MAC addresses have the same OUI. Only the
+ values 0 to 3 are valid for this parameter.
diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst
new file mode 100644
index 000000000000..2a266b7e7b38
--- /dev/null
+++ b/Documentation/networking/devlink/netdevsim.rst
@@ -0,0 +1,72 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+netdevsim devlink support
+=========================
+
+This document describes the ``devlink`` features supported by the
+``netdevsim`` device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+ * - Name
+ - Mode
+ * - ``max_macs``
+ - driverinit
+
+The ``netdevsim`` driver also implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``test1``
+ - Boolean
+ - driverinit
+ - Test parameter used to show how a driver-specific devlink parameter
+ can be implemented.
+
+The ``netdevsim`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
+
+Regions
+=======
+
+The ``netdevsim`` driver exposes a ``dummy`` region as an example of how the
+devlink-region interfaces work. A snapshot is taken whenever the
+``take_snapshot`` debugfs file is written to.
+
+Resources
+=========
+
+The ``netdevsim`` driver exposes resources to control the number of FIB
+entries and FIB rule entries that the driver will allow.
+
+.. code:: shell
+
+ $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib size 96
+ $ devlink resource set netdevsim/netdevsim0 path /IPv4/fib-rules size 16
+ $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib size 64
+ $ devlink resource set netdevsim/netdevsim0 path /IPv6/fib-rules size 16
+ $ devlink dev reload netdevsim/netdevsim0
+
+Driver-specific Traps
+=====================
+
+.. list-table:: List of Driver-specific Traps Registered by ``netdevsim``
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``fid_miss``
+ - ``exception``
+ - When a packet enters the device it is classified to a filtering
+ indentifier (FID) based on the ingress port and VLAN. This trap is used
+ to trap packets for which a FID could not be found
diff --git a/Documentation/networking/devlink/nfp.rst b/Documentation/networking/devlink/nfp.rst
new file mode 100644
index 000000000000..a1717db0dfcc
--- /dev/null
+++ b/Documentation/networking/devlink/nfp.rst
@@ -0,0 +1,65 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+nfp devlink support
+===================
+
+This document describes the devlink features implemented by the ``nfp``
+device driver.
+
+Parameters
+==========
+
+.. list-table:: Generic parameters implemented
+
+ * - Name
+ - Mode
+ * - ``fw_load_policy``
+ - permanent
+ * - ``reset_dev_on_drv_probe``
+ - permanent
+
+Info versions
+=============
+
+The ``nfp`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+ :widths: 5 5 90
+
+ * - Name
+ - Type
+ - Description
+ * - ``board.id``
+ - fixed
+ - Part number identifying the board design
+ * - ``board.rev``
+ - fixed
+ - Revision of the board design
+ * - ``board.manufacture``
+ - fixed
+ - Vendor of the board design
+ * - ``board.model``
+ - fixed
+ - Model name of the board design
+ * - ``fw.bundle_id``
+ - stored, running
+ - Firmware bundle id
+ * - ``fw.mgmt``
+ - stored, running
+ - Version of the management firmware
+ * - ``fw.cpld``
+ - stored, running
+ - The CPLD firmware component version
+ * - ``fw.app``
+ - stored, running
+ - The APP firmware component version
+ * - ``fw.undi``
+ - stored, running
+ - The UNDI firmware component version
+ * - ``fw.ncsi``
+ - stored, running
+ - The NSCI firmware component version
+ * - ``chip.init``
+ - stored, running
+ - The CFGR firmware component version
diff --git a/Documentation/networking/devlink/qed.rst b/Documentation/networking/devlink/qed.rst
new file mode 100644
index 000000000000..e7e17acf1eca
--- /dev/null
+++ b/Documentation/networking/devlink/qed.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+qed devlink support
+===================
+
+This document describes the devlink features implemented by the ``qed`` core
+device driver.
+
+Parameters
+==========
+
+The ``qed`` driver implements the following driver-specific parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``iwarp_cmt``
+ - Boolean
+ - runtime
+ - Enable iWARP functionality for 100g devices. Notee that this impacts
+ L2 performance, and is therefor not enabled by default.
diff --git a/Documentation/networking/devlink/ti-cpsw-switch.rst b/Documentation/networking/devlink/ti-cpsw-switch.rst
new file mode 100644
index 000000000000..dc399e32abaa
--- /dev/null
+++ b/Documentation/networking/devlink/ti-cpsw-switch.rst
@@ -0,0 +1,31 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+ti-cpsw-switch devlink support
+==============================
+
+This document describes the devlink features implemented by the ``ti-cpsw-switch``
+device driver.
+
+Parameters
+==========
+
+The ``ti-cpsw-switch`` driver implements the following driver-specific
+parameters.
+
+.. list-table:: Driver-specific parameters implemented
+ :widths: 5 5 5 85
+
+ * - Name
+ - Type
+ - Mode
+ - Description
+ * - ``ale_bypass``
+ - Boolean
+ - runtime
+ - Enables ALE_CONTROL(4).BYPASS mode for debugging purposes. In this
+ mode, all packets will be sent to the host port only.
+ * - ``switch_mode``
+ - Boolean
+ - runtime
+ - Enable switch mode
diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index eef20d0bcf7c..64553d8d91cb 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -230,12 +230,6 @@ simultaneously on two ports. The driver checks the consistency of the schedules
against this restriction and errors out when appropriate. Schedule analysis is
needed to avoid this, which is outside the scope of the document.
-At the moment, the time-aware scheduler can only be triggered based on a
-standalone clock and not based on PTP time. This means the base-time argument
-from tc-taprio is ignored and the schedule starts right away. It also means it
-is more difficult to phase-align the scheduler with the other devices in the
-network.
-
Device Tree bindings and board design
=====================================
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
new file mode 100644
index 000000000000..c60afba69e3c
--- /dev/null
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -0,0 +1,520 @@
+=============================
+Netlink interface for ethtool
+=============================
+
+
+Basic information
+=================
+
+Netlink interface for ethtool uses generic netlink family ``ethtool``
+(userspace application should use macros ``ETHTOOL_GENL_NAME`` and
+``ETHTOOL_GENL_VERSION`` defined in ``<linux/ethtool_netlink.h>`` uapi
+header). This family does not use a specific header, all information in
+requests and replies is passed using netlink attributes.
+
+The ethtool netlink interface uses extended ACK for error and warning
+reporting, userspace application developers are encouraged to make these
+messages available to user in a suitable way.
+
+Requests can be divided into three categories: "get" (retrieving information),
+"set" (setting parameters) and "action" (invoking an action).
+
+All "set" and "action" type requests require admin privileges
+(``CAP_NET_ADMIN`` in the namespace). Most "get" type requests are allowed for
+anyone but there are exceptions (where the response contains sensitive
+information). In some cases, the request as such is allowed for anyone but
+unprivileged users have attributes with sensitive information (e.g.
+wake-on-lan password) omitted.
+
+
+Conventions
+===========
+
+Attributes which represent a boolean value usually use NLA_U8 type so that we
+can distinguish three states: "on", "off" and "not present" (meaning the
+information is not available in "get" requests or value is not to be changed
+in "set" requests). For these attributes, the "true" value should be passed as
+number 1 but any non-zero value should be understood as "true" by recipient.
+In the tables below, "bool" denotes NLA_U8 attributes interpreted in this way.
+
+In the message structure descriptions below, if an attribute name is suffixed
+with "+", parent nest can contain multiple attributes of the same type. This
+implements an array of entries.
+
+
+Request header
+==============
+
+Each request or reply message contains a nested attribute with common header.
+Structure of this header is
+
+ ============================== ====== =============================
+ ``ETHTOOL_A_HEADER_DEV_INDEX`` u32 device ifindex
+ ``ETHTOOL_A_HEADER_DEV_NAME`` string device name
+ ``ETHTOOL_A_HEADER_FLAGS`` u32 flags common for all requests
+ ============================== ====== =============================
+
+``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the
+device message relates to. One of them is sufficient in requests, if both are
+used, they must identify the same device. Some requests, e.g. global string
+sets, do not require device identification. Most ``GET`` requests also allow
+dump requests without device identification to query the same information for
+all devices providing it (each device in a separate message).
+
+``ETHTOOL_A_HEADER_FLAGS`` is a bitmap of request flags common for all request
+types. The interpretation of these flags is the same for all request types but
+the flags may not apply to requests. Recognized flags are:
+
+ ================================= ===================================
+ ``ETHTOOL_FLAG_COMPACT_BITSETS`` use compact format bitsets in reply
+ ``ETHTOOL_FLAG_OMIT_REPLY`` omit optional reply (_SET and _ACT)
+ ================================= ===================================
+
+New request flags should follow the general idea that if the flag is not set,
+the behaviour is backward compatible, i.e. requests from old clients not aware
+of the flag should be interpreted the way the client expects. A client must
+not set flags it does not understand.
+
+
+Bit sets
+========
+
+For short bitmaps of (reasonably) fixed length, standard ``NLA_BITFIELD32``
+type is used. For arbitrary length bitmaps, ethtool netlink uses a nested
+attribute with contents of one of two forms: compact (two binary bitmaps
+representing bit values and mask of affected bits) and bit-by-bit (list of
+bits identified by either index or name).
+
+Verbose (bit-by-bit) bitsets allow sending symbolic names for bits together
+with their values which saves a round trip (when the bitset is passed in a
+request) or at least a second request (when the bitset is in a reply). This is
+useful for one shot applications like traditional ethtool command. On the
+other hand, long running applications like ethtool monitor (displaying
+notifications) or network management daemons may prefer fetching the names
+only once and using compact form to save message size. Notifications from
+ethtool netlink interface always use compact form for bitsets.
+
+A bitset can represent either a value/mask pair (``ETHTOOL_A_BITSET_NOMASK``
+not set) or a single bitmap (``ETHTOOL_A_BITSET_NOMASK`` set). In requests
+modifying a bitmap, the former changes the bit set in mask to values set in
+value and preserves the rest; the latter sets the bits set in the bitmap and
+clears the rest.
+
+Compact form: nested (bitset) atrribute contents:
+
+ ============================ ====== ============================
+ ``ETHTOOL_A_BITSET_NOMASK`` flag no mask, only a list
+ ``ETHTOOL_A_BITSET_SIZE`` u32 number of significant bits
+ ``ETHTOOL_A_BITSET_VALUE`` binary bitmap of bit values
+ ``ETHTOOL_A_BITSET_MASK`` binary bitmap of valid bits
+ ============================ ====== ============================
+
+Value and mask must have length at least ``ETHTOOL_A_BITSET_SIZE`` bits
+rounded up to a multiple of 32 bits. They consist of 32-bit words in host byte
+order, words ordered from least significant to most significant (i.e. the same
+way as bitmaps are passed with ioctl interface).
+
+For compact form, ``ETHTOOL_A_BITSET_SIZE`` and ``ETHTOOL_A_BITSET_VALUE`` are
+mandatory. ``ETHTOOL_A_BITSET_MASK`` attribute is mandatory if
+``ETHTOOL_A_BITSET_NOMASK`` is not set (bitset represents a value/mask pair);
+if ``ETHTOOL_A_BITSET_NOMASK`` is not set, ``ETHTOOL_A_BITSET_MASK`` is not
+allowed (bitset represents a single bitmap.
+
+Kernel bit set length may differ from userspace length if older application is
+used on newer kernel or vice versa. If userspace bitmap is longer, an error is
+issued only if the request actually tries to set values of some bits not
+recognized by kernel.
+
+Bit-by-bit form: nested (bitset) attribute contents:
+
+ +------------------------------------+--------+-----------------------------+
+ | ``ETHTOOL_A_BITSET_NOMASK`` | flag | no mask, only a list |
+ +------------------------------------+--------+-----------------------------+
+ | ``ETHTOOL_A_BITSET_SIZE`` | u32 | number of significant bits |
+ +------------------------------------+--------+-----------------------------+
+ | ``ETHTOOL_A_BITSET_BITS`` | nested | array of bits |
+ +-+----------------------------------+--------+-----------------------------+
+ | | ``ETHTOOL_A_BITSET_BITS_BIT+`` | nested | one bit |
+ +-+-+--------------------------------+--------+-----------------------------+
+ | | | ``ETHTOOL_A_BITSET_BIT_INDEX`` | u32 | bit index (0 for LSB) |
+ +-+-+--------------------------------+--------+-----------------------------+
+ | | | ``ETHTOOL_A_BITSET_BIT_NAME`` | string | bit name |
+ +-+-+--------------------------------+--------+-----------------------------+
+ | | | ``ETHTOOL_A_BITSET_BIT_VALUE`` | flag | present if bit is set |
+ +-+-+--------------------------------+--------+-----------------------------+
+
+Bit size is optional for bit-by-bit form. ``ETHTOOL_A_BITSET_BITS`` nest can
+only contain ``ETHTOOL_A_BITSET_BITS_BIT`` attributes but there can be an
+arbitrary number of them. A bit may be identified by its index or by its
+name. When used in requests, listed bits are set to 0 or 1 according to
+``ETHTOOL_A_BITSET_BIT_VALUE``, the rest is preserved. A request fails if
+index exceeds kernel bit length or if name is not recognized.
+
+When ``ETHTOOL_A_BITSET_NOMASK`` flag is present, bitset is interpreted as
+a simple bitmap. ``ETHTOOL_A_BITSET_BIT_VALUE`` attributes are not used in
+such case. Such bitset represents a bitmap with listed bits set and the rest
+zero.
+
+In requests, application can use either form. Form used by kernel in reply is
+determined by ``ETHTOOL_FLAG_COMPACT_BITSETS`` flag in flags field of request
+header. Semantics of value and mask depends on the attribute.
+
+
+List of message types
+=====================
+
+All constants identifying message types use ``ETHTOOL_CMD_`` prefix and suffix
+according to message purpose:
+
+ ============== ======================================
+ ``_GET`` userspace request to retrieve data
+ ``_SET`` userspace request to set data
+ ``_ACT`` userspace request to perform an action
+ ``_GET_REPLY`` kernel reply to a ``GET`` request
+ ``_SET_REPLY`` kernel reply to a ``SET`` request
+ ``_ACT_REPLY`` kernel reply to an ``ACT`` request
+ ``_NTF`` kernel notification
+ ============== ======================================
+
+Userspace to kernel:
+
+ ===================================== ================================
+ ``ETHTOOL_MSG_STRSET_GET`` get string set
+ ``ETHTOOL_MSG_LINKINFO_GET`` get link settings
+ ``ETHTOOL_MSG_LINKINFO_SET`` set link settings
+ ``ETHTOOL_MSG_LINKMODES_GET`` get link modes info
+ ``ETHTOOL_MSG_LINKMODES_SET`` set link modes info
+ ``ETHTOOL_MSG_LINKSTATE_GET`` get link state
+ ===================================== ================================
+
+Kernel to userspace:
+
+ ===================================== ================================
+ ``ETHTOOL_MSG_STRSET_GET_REPLY`` string set contents
+ ``ETHTOOL_MSG_LINKINFO_GET_REPLY`` link settings
+ ``ETHTOOL_MSG_LINKINFO_NTF`` link settings notification
+ ``ETHTOOL_MSG_LINKMODES_GET_REPLY`` link modes info
+ ``ETHTOOL_MSG_LINKMODES_NTF`` link modes notification
+ ``ETHTOOL_MSG_LINKSTATE_GET_REPLY`` link state info
+ ===================================== ================================
+
+``GET`` requests are sent by userspace applications to retrieve device
+information. They usually do not contain any message specific attributes.
+Kernel replies with corresponding "GET_REPLY" message. For most types, ``GET``
+request with ``NLM_F_DUMP`` and no device identification can be used to query
+the information for all devices supporting the request.
+
+If the data can be also modified, corresponding ``SET`` message with the same
+layout as corresponding ``GET_REPLY`` is used to request changes. Only
+attributes where a change is requested are included in such request (also, not
+all attributes may be changed). Replies to most ``SET`` request consist only
+of error code and extack; if kernel provides additional data, it is sent in
+the form of corresponding ``SET_REPLY`` message which can be suppressed by
+setting ``ETHTOOL_FLAG_OMIT_REPLY`` flag in request header.
+
+Data modification also triggers sending a ``NTF`` message with a notification.
+These usually bear only a subset of attributes which was affected by the
+change. The same notification is issued if the data is modified using other
+means (mostly ioctl ethtool interface). Unlike notifications from ethtool
+netlink code which are only sent if something actually changed, notifications
+triggered by ioctl interface may be sent even if the request did not actually
+change any data.
+
+``ACT`` messages request kernel (driver) to perform a specific action. If some
+information is reported by kernel (which can be suppressed by setting
+``ETHTOOL_FLAG_OMIT_REPLY`` flag in request header), the reply takes form of
+an ``ACT_REPLY`` message. Performing an action also triggers a notification
+(``NTF`` message).
+
+Later sections describe the format and semantics of these messages.
+
+
+STRSET_GET
+==========
+
+Requests contents of a string set as provided by ioctl commands
+``ETHTOOL_GSSET_INFO`` and ``ETHTOOL_GSTRINGS.`` String sets are not user
+writeable so that the corresponding ``STRSET_SET`` message is only used in
+kernel replies. There are two types of string sets: global (independent of
+a device, e.g. device feature names) and device specific (e.g. device private
+flags).
+
+Request contents:
+
+ +---------------------------------------+--------+------------------------+
+ | ``ETHTOOL_A_STRSET_HEADER`` | nested | request header |
+ +---------------------------------------+--------+------------------------+
+ | ``ETHTOOL_A_STRSET_STRINGSETS`` | nested | string set to request |
+ +-+-------------------------------------+--------+------------------------+
+ | | ``ETHTOOL_A_STRINGSETS_STRINGSET+`` | nested | one string set |
+ +-+-+-----------------------------------+--------+------------------------+
+ | | | ``ETHTOOL_A_STRINGSET_ID`` | u32 | set id |
+ +-+-+-----------------------------------+--------+------------------------+
+
+Kernel response contents:
+
+ +---------------------------------------+--------+-----------------------+
+ | ``ETHTOOL_A_STRSET_HEADER`` | nested | reply header |
+ +---------------------------------------+--------+-----------------------+
+ | ``ETHTOOL_A_STRSET_STRINGSETS`` | nested | array of string sets |
+ +-+-------------------------------------+--------+-----------------------+
+ | | ``ETHTOOL_A_STRINGSETS_STRINGSET+`` | nested | one string set |
+ +-+-+-----------------------------------+--------+-----------------------+
+ | | | ``ETHTOOL_A_STRINGSET_ID`` | u32 | set id |
+ +-+-+-----------------------------------+--------+-----------------------+
+ | | | ``ETHTOOL_A_STRINGSET_COUNT`` | u32 | number of strings |
+ +-+-+-----------------------------------+--------+-----------------------+
+ | | | ``ETHTOOL_A_STRINGSET_STRINGS`` | nested | array of strings |
+ +-+-+-+---------------------------------+--------+-----------------------+
+ | | | | ``ETHTOOL_A_STRINGS_STRING+`` | nested | one string |
+ +-+-+-+-+-------------------------------+--------+-----------------------+
+ | | | | | ``ETHTOOL_A_STRING_INDEX`` | u32 | string index |
+ +-+-+-+-+-------------------------------+--------+-----------------------+
+ | | | | | ``ETHTOOL_A_STRING_VALUE`` | string | string value |
+ +-+-+-+-+-------------------------------+--------+-----------------------+
+ | ``ETHTOOL_A_STRSET_COUNTS_ONLY`` | flag | return only counts |
+ +---------------------------------------+--------+-----------------------+
+
+Device identification in request header is optional. Depending on its presence
+a and ``NLM_F_DUMP`` flag, there are three type of ``STRSET_GET`` requests:
+
+ - no ``NLM_F_DUMP,`` no device: get "global" stringsets
+ - no ``NLM_F_DUMP``, with device: get string sets related to the device
+ - ``NLM_F_DUMP``, no device: get device related string sets for all devices
+
+If there is no ``ETHTOOL_A_STRSET_STRINGSETS`` array, all string sets of
+requested type are returned, otherwise only those specified in the request.
+Flag ``ETHTOOL_A_STRSET_COUNTS_ONLY`` tells kernel to only return string
+counts of the sets, not the actual strings.
+
+
+LINKINFO_GET
+============
+
+Requests link settings as provided by ``ETHTOOL_GLINKSETTINGS`` except for
+link modes and autonegotiation related information. The request does not use
+any attributes.
+
+Request contents:
+
+ ==================================== ====== ==========================
+ ``ETHTOOL_A_LINKINFO_HEADER`` nested request header
+ ==================================== ====== ==========================
+
+Kernel response contents:
+
+ ==================================== ====== ==========================
+ ``ETHTOOL_A_LINKINFO_HEADER`` nested reply header
+ ``ETHTOOL_A_LINKINFO_PORT`` u8 physical port
+ ``ETHTOOL_A_LINKINFO_PHYADDR`` u8 phy MDIO address
+ ``ETHTOOL_A_LINKINFO_TP_MDIX`` u8 MDI(-X) status
+ ``ETHTOOL_A_LINKINFO_TP_MDIX_CTRL`` u8 MDI(-X) control
+ ``ETHTOOL_A_LINKINFO_TRANSCEIVER`` u8 transceiver
+ ==================================== ====== ==========================
+
+Attributes and their values have the same meaning as matching members of the
+corresponding ioctl structures.
+
+``LINKINFO_GET`` allows dump requests (kernel returns reply message for all
+devices supporting the request).
+
+
+LINKINFO_SET
+============
+
+``LINKINFO_SET`` request allows setting some of the attributes reported by
+``LINKINFO_GET``.
+
+Request contents:
+
+ ==================================== ====== ==========================
+ ``ETHTOOL_A_LINKINFO_HEADER`` nested request header
+ ``ETHTOOL_A_LINKINFO_PORT`` u8 physical port
+ ``ETHTOOL_A_LINKINFO_PHYADDR`` u8 phy MDIO address
+ ``ETHTOOL_A_LINKINFO_TP_MDIX_CTRL`` u8 MDI(-X) control
+ ==================================== ====== ==========================
+
+MDI(-X) status and transceiver cannot be set, request with the corresponding
+attributes is rejected.
+
+
+LINKMODES_GET
+=============
+
+Requests link modes (supported, advertised and peer advertised) and related
+information (autonegotiation status, link speed and duplex) as provided by
+``ETHTOOL_GLINKSETTINGS``. The request does not use any attributes.
+
+Request contents:
+
+ ==================================== ====== ==========================
+ ``ETHTOOL_A_LINKMODES_HEADER`` nested request header
+ ==================================== ====== ==========================
+
+Kernel response contents:
+
+ ==================================== ====== ==========================
+ ``ETHTOOL_A_LINKMODES_HEADER`` nested reply header
+ ``ETHTOOL_A_LINKMODES_AUTONEG`` u8 autonegotiation status
+ ``ETHTOOL_A_LINKMODES_OURS`` bitset advertised link modes
+ ``ETHTOOL_A_LINKMODES_PEER`` bitset partner link modes
+ ``ETHTOOL_A_LINKMODES_SPEED`` u32 link speed (Mb/s)
+ ``ETHTOOL_A_LINKMODES_DUPLEX`` u8 duplex mode
+ ==================================== ====== ==========================
+
+For ``ETHTOOL_A_LINKMODES_OURS``, value represents advertised modes and mask
+represents supported modes. ``ETHTOOL_A_LINKMODES_PEER`` in the reply is a bit
+list.
+
+``LINKMODES_GET`` allows dump requests (kernel returns reply messages for all
+devices supporting the request).
+
+
+LINKMODES_SET
+=============
+
+Request contents:
+
+ ==================================== ====== ==========================
+ ``ETHTOOL_A_LINKMODES_HEADER`` nested request header
+ ``ETHTOOL_A_LINKMODES_AUTONEG`` u8 autonegotiation status
+ ``ETHTOOL_A_LINKMODES_OURS`` bitset advertised link modes
+ ``ETHTOOL_A_LINKMODES_PEER`` bitset partner link modes
+ ``ETHTOOL_A_LINKMODES_SPEED`` u32 link speed (Mb/s)
+ ``ETHTOOL_A_LINKMODES_DUPLEX`` u8 duplex mode
+ ==================================== ====== ==========================
+
+``ETHTOOL_A_LINKMODES_OURS`` bit set allows setting advertised link modes. If
+autonegotiation is on (either set now or kept from before), advertised modes
+are not changed (no ``ETHTOOL_A_LINKMODES_OURS`` attribute) and at least one
+of speed and duplex is specified, kernel adjusts advertised modes to all
+supported modes matching speed, duplex or both (whatever is specified). This
+autoselection is done on ethtool side with ioctl interface, netlink interface
+is supposed to allow requesting changes without knowing what exactly kernel
+supports.
+
+
+LINKSTATE_GET
+=============
+
+Requests link state information. At the moment, only link up/down flag (as
+provided by ``ETHTOOL_GLINK`` ioctl command) is provided but some future
+extensions are planned (e.g. link down reason). This request does not have any
+attributes.
+
+Request contents:
+
+ ==================================== ====== ==========================
+ ``ETHTOOL_A_LINKSTATE_HEADER`` nested request header
+ ==================================== ====== ==========================
+
+Kernel response contents:
+
+ ==================================== ====== ==========================
+ ``ETHTOOL_A_LINKSTATE_HEADER`` nested reply header
+ ``ETHTOOL_A_LINKSTATE_LINK`` bool link state (up/down)
+ ==================================== ====== ==========================
+
+For most NIC drivers, the value of ``ETHTOOL_A_LINKSTATE_LINK`` returns
+carrier flag provided by ``netif_carrier_ok()`` but there are drivers which
+define their own handler.
+
+``LINKSTATE_GET`` allows dump requests (kernel returns reply messages for all
+devices supporting the request).
+
+
+Request translation
+===================
+
+The following table maps ioctl commands to netlink commands providing their
+functionality. Entries with "n/a" in right column are commands which do not
+have their netlink replacement yet.
+
+ =================================== =====================================
+ ioctl command netlink command
+ =================================== =====================================
+ ``ETHTOOL_GSET`` ``ETHTOOL_MSG_LINKINFO_GET``
+ ``ETHTOOL_MSG_LINKMODES_GET``
+ ``ETHTOOL_SSET`` ``ETHTOOL_MSG_LINKINFO_SET``
+ ``ETHTOOL_MSG_LINKMODES_SET``
+ ``ETHTOOL_GDRVINFO`` n/a
+ ``ETHTOOL_GREGS`` n/a
+ ``ETHTOOL_GWOL`` n/a
+ ``ETHTOOL_SWOL`` n/a
+ ``ETHTOOL_GMSGLVL`` n/a
+ ``ETHTOOL_SMSGLVL`` n/a
+ ``ETHTOOL_NWAY_RST`` n/a
+ ``ETHTOOL_GLINK`` ``ETHTOOL_MSG_LINKSTATE_GET``
+ ``ETHTOOL_GEEPROM`` n/a
+ ``ETHTOOL_SEEPROM`` n/a
+ ``ETHTOOL_GCOALESCE`` n/a
+ ``ETHTOOL_SCOALESCE`` n/a
+ ``ETHTOOL_GRINGPARAM`` n/a
+ ``ETHTOOL_SRINGPARAM`` n/a
+ ``ETHTOOL_GPAUSEPARAM`` n/a
+ ``ETHTOOL_SPAUSEPARAM`` n/a
+ ``ETHTOOL_GRXCSUM`` n/a
+ ``ETHTOOL_SRXCSUM`` n/a
+ ``ETHTOOL_GTXCSUM`` n/a
+ ``ETHTOOL_STXCSUM`` n/a
+ ``ETHTOOL_GSG`` n/a
+ ``ETHTOOL_SSG`` n/a
+ ``ETHTOOL_TEST`` n/a
+ ``ETHTOOL_GSTRINGS`` ``ETHTOOL_MSG_STRSET_GET``
+ ``ETHTOOL_PHYS_ID`` n/a
+ ``ETHTOOL_GSTATS`` n/a
+ ``ETHTOOL_GTSO`` n/a
+ ``ETHTOOL_STSO`` n/a
+ ``ETHTOOL_GPERMADDR`` rtnetlink ``RTM_GETLINK``
+ ``ETHTOOL_GUFO`` n/a
+ ``ETHTOOL_SUFO`` n/a
+ ``ETHTOOL_GGSO`` n/a
+ ``ETHTOOL_SGSO`` n/a
+ ``ETHTOOL_GFLAGS`` n/a
+ ``ETHTOOL_SFLAGS`` n/a
+ ``ETHTOOL_GPFLAGS`` n/a
+ ``ETHTOOL_SPFLAGS`` n/a
+ ``ETHTOOL_GRXFH`` n/a
+ ``ETHTOOL_SRXFH`` n/a
+ ``ETHTOOL_GGRO`` n/a
+ ``ETHTOOL_SGRO`` n/a
+ ``ETHTOOL_GRXRINGS`` n/a
+ ``ETHTOOL_GRXCLSRLCNT`` n/a
+ ``ETHTOOL_GRXCLSRULE`` n/a
+ ``ETHTOOL_GRXCLSRLALL`` n/a
+ ``ETHTOOL_SRXCLSRLDEL`` n/a
+ ``ETHTOOL_SRXCLSRLINS`` n/a
+ ``ETHTOOL_FLASHDEV`` n/a
+ ``ETHTOOL_RESET`` n/a
+ ``ETHTOOL_SRXNTUPLE`` n/a
+ ``ETHTOOL_GRXNTUPLE`` n/a
+ ``ETHTOOL_GSSET_INFO`` ``ETHTOOL_MSG_STRSET_GET``
+ ``ETHTOOL_GRXFHINDIR`` n/a
+ ``ETHTOOL_SRXFHINDIR`` n/a
+ ``ETHTOOL_GFEATURES`` n/a
+ ``ETHTOOL_SFEATURES`` n/a
+ ``ETHTOOL_GCHANNELS`` n/a
+ ``ETHTOOL_SCHANNELS`` n/a
+ ``ETHTOOL_SET_DUMP`` n/a
+ ``ETHTOOL_GET_DUMP_FLAG`` n/a
+ ``ETHTOOL_GET_DUMP_DATA`` n/a
+ ``ETHTOOL_GET_TS_INFO`` n/a
+ ``ETHTOOL_GMODULEINFO`` n/a
+ ``ETHTOOL_GMODULEEEPROM`` n/a
+ ``ETHTOOL_GEEE`` n/a
+ ``ETHTOOL_SEEE`` n/a
+ ``ETHTOOL_GRSSH`` n/a
+ ``ETHTOOL_SRSSH`` n/a
+ ``ETHTOOL_GTUNABLE`` n/a
+ ``ETHTOOL_STUNABLE`` n/a
+ ``ETHTOOL_GPHYSTATS`` n/a
+ ``ETHTOOL_PERQUEUE`` n/a
+ ``ETHTOOL_GLINKSETTINGS`` ``ETHTOOL_MSG_LINKINFO_GET``
+ ``ETHTOOL_MSG_LINKMODES_GET``
+ ``ETHTOOL_SLINKSETTINGS`` ``ETHTOOL_MSG_LINKINFO_SET``
+ ``ETHTOOL_MSG_LINKMODES_SET``
+ ``ETHTOOL_PHY_GTUNABLE`` n/a
+ ``ETHTOOL_PHY_STUNABLE`` n/a
+ ``ETHTOOL_GFECPARAM`` n/a
+ ``ETHTOOL_SFECPARAM`` n/a
+ =================================== =====================================
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 5acab1290e03..d07d9855dcd3 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -13,9 +13,8 @@ Contents:
can_ucan_protocol
device_drivers/index
dsa/index
- devlink-info-versions
- devlink-trap
- devlink-trap-netdevsim
+ devlink/index
+ ethtool-netlink
ieee802154
j1939
kapi
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index fd26788e8c96..5f53faff4e25 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -479,6 +479,10 @@ tcp_no_metrics_save - BOOLEAN
degradation. If set, TCP will not cache metrics on closing
connections.
+tcp_no_ssthresh_metrics_save - BOOLEAN
+ Controls whether TCP saves ssthresh metrics in the route cache.
+ Default is 1, which disables ssthresh metrics.
+
tcp_orphan_retries - INTEGER
This value influences the timeout of a locally closed TCP connection,
when RTO retransmissions remain unacknowledged.
@@ -603,7 +607,7 @@ tcp_synack_retries - INTEGER
with the current initial RTO of 1second. With this the final timeout
for a passive TCP connection will happen after 63seconds.
-tcp_syncookies - BOOLEAN
+tcp_syncookies - INTEGER
Only valid when the kernel was compiled with CONFIG_SYN_COOKIES
Send out syncookies when the syn backlog queue of a socket
overflows. This is to prevent against the common 'SYN flood attack'
diff --git a/Documentation/networking/j1939.rst b/Documentation/networking/j1939.rst
index dc60b13fcd09..f5be243d250a 100644
--- a/Documentation/networking/j1939.rst
+++ b/Documentation/networking/j1939.rst
@@ -339,7 +339,7 @@ To claim an address following code example can be used:
.pgn = J1939_PGN_ADDRESS_CLAIMED,
.pgn_mask = J1939_PGN_PDU1_MAX,
}, {
- .pgn = J1939_PGN_ADDRESS_REQUEST,
+ .pgn = J1939_PGN_REQUEST,
.pgn_mask = J1939_PGN_PDU1_MAX,
}, {
.pgn = J1939_PGN_ADDRESS_COMMANDED,
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 642fa963be3c..d5c9320901c3 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -34,8 +34,8 @@ the names, the ``net`` tree is for fixes to existing code already in the
mainline tree from Linus, and ``net-next`` is where the new code goes
for the future release. You can find the trees here:
-- https://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-- https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
+- https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
Q: How often do changes from these trees make it to the mainline Linus tree?
----------------------------------------------------------------------------
diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst
index e0a7c7af6525..1e4735cc0553 100644
--- a/Documentation/networking/phy.rst
+++ b/Documentation/networking/phy.rst
@@ -267,6 +267,24 @@ Some of the interface modes are described below:
duplex, pause or other settings. This is dependent on the MAC and/or
PHY behaviour.
+``PHY_INTERFACE_MODE_10GBASER``
+ This is the IEEE 802.3 Clause 49 defined 10GBASE-R protocol used with
+ various different mediums. Please refer to the IEEE standard for a
+ definition of this.
+
+ Note: 10GBASE-R is just one protocol that can be used with XFI and SFI.
+ XFI and SFI permit multiple protocols over a single SERDES lane, and
+ also defines the electrical characteristics of the signals with a host
+ compliance board plugged into the host XFP/SFP connector. Therefore,
+ XFI and SFI are not PHY interface types in their own right.
+
+``PHY_INTERFACE_MODE_10GKR``
+ This is the IEEE 802.3 Clause 49 defined 10GBASE-R with Clause 73
+ autonegotiation. Please refer to the IEEE standard for further
+ information.
+
+ Note: due to legacy usage, some 10GBASE-R usage incorrectly makes
+ use of this definition.
Pause frames / flow control
===========================
diff --git a/Documentation/networking/sfp-phylink.rst b/Documentation/networking/sfp-phylink.rst
index a5e00a159d21..d753a309f9d1 100644
--- a/Documentation/networking/sfp-phylink.rst
+++ b/Documentation/networking/sfp-phylink.rst
@@ -251,7 +251,8 @@ this documentation.
phylink_mac_change(priv->phylink, link_is_up);
where ``link_is_up`` is true if the link is currently up or false
- otherwise.
+ otherwise. If a MAC is unable to provide these interrupts, then
+ it should set ``priv->phylink_config.pcs_poll = true;`` in step 9.
11. Verify that the driver does not call::
diff --git a/Documentation/process/coding-style.rst b/Documentation/process/coding-style.rst
index ada573b7d703..edb296c52f61 100644
--- a/Documentation/process/coding-style.rst
+++ b/Documentation/process/coding-style.rst
@@ -988,7 +988,7 @@ Similarly, if you need to calculate the size of some structure member, use
.. code-block:: c
- #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+ #define sizeof_field(t, f) (sizeof(((t*)0)->f))
There are also min() and max() macros that do strict type checking if you
need them. Feel free to peruse that header file to see what else is already
diff --git a/Documentation/process/index.rst b/Documentation/process/index.rst
index 21aa7d5358e6..6399d92f0b21 100644
--- a/Documentation/process/index.rst
+++ b/Documentation/process/index.rst
@@ -60,6 +60,7 @@ lack of a better place.
volatile-considered-harmful
botching-up-ioctls
clang-format
+ ../riscv/patch-acceptance
.. only:: subproject and html
diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst
index 215fd3c1f2d5..fa33bffd8992 100644
--- a/Documentation/riscv/index.rst
+++ b/Documentation/riscv/index.rst
@@ -7,6 +7,7 @@ RISC-V architecture
boot-image-header
pmu
+ patch-acceptance
.. only:: subproject and html
diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst
new file mode 100644
index 000000000000..dfe0ac5624fb
--- /dev/null
+++ b/Documentation/riscv/patch-acceptance.rst
@@ -0,0 +1,35 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+arch/riscv maintenance guidelines for developers
+================================================
+
+Overview
+--------
+The RISC-V instruction set architecture is developed in the open:
+in-progress drafts are available for all to review and to experiment
+with implementations. New module or extension drafts can change
+during the development process - sometimes in ways that are
+incompatible with previous drafts. This flexibility can present a
+challenge for RISC-V Linux maintenance. Linux maintainers disapprove
+of churn, and the Linux development process prefers well-reviewed and
+tested code over experimental code. We wish to extend these same
+principles to the RISC-V-related code that will be accepted for
+inclusion in the kernel.
+
+Submit Checklist Addendum
+-------------------------
+We'll only accept patches for new modules or extensions if the
+specifications for those modules or extensions are listed as being
+"Frozen" or "Ratified" by the RISC-V Foundation. (Developers may, of
+course, maintain their own Linux kernel trees that contain code for
+any draft extensions that they wish.)
+
+Additionally, the RISC-V specification allows implementors to create
+their own custom extensions. These custom extensions aren't required
+to go through any review or ratification process by the RISC-V
+Foundation. To avoid the maintenance complexity and potential
+performance impact of adding kernel code for implementor-specific
+RISC-V extensions, we'll only to accept patches for extensions that
+have been officially frozen or ratified by the RISC-V Foundation.
+(Implementors, may, of course, maintain their own Linux kernel trees
+containing code for any custom extensions that they wish.)
diff --git a/Documentation/scsi/smartpqi.txt b/Documentation/scsi/smartpqi.txt
index 201f80c7c050..df129f55ace5 100644
--- a/Documentation/scsi/smartpqi.txt
+++ b/Documentation/scsi/smartpqi.txt
@@ -29,7 +29,7 @@ smartpqi specific entries in /sys
smartpqi host attributes:
-------------------------
/sys/class/scsi_host/host*/rescan
- /sys/class/scsi_host/host*/version
+ /sys/class/scsi_host/host*/driver_version
The host rescan attribute is a write only attribute. Writing to this
attribute will trigger the driver to scan for new, changed, or removed
diff --git a/Documentation/translations/it_IT/process/coding-style.rst b/Documentation/translations/it_IT/process/coding-style.rst
index 8995d2d19f20..8725f2b9e960 100644
--- a/Documentation/translations/it_IT/process/coding-style.rst
+++ b/Documentation/translations/it_IT/process/coding-style.rst
@@ -1005,7 +1005,7 @@ struttura, usate
.. code-block:: c
- #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+ #define sizeof_field(t, f) (sizeof(((t*)0)->f))
Ci sono anche le macro min() e max() che, se vi serve, effettuano un controllo
rigido sui tipi. Sentitevi liberi di leggere attentamente questo file
diff --git a/Documentation/translations/zh_CN/process/coding-style.rst b/Documentation/translations/zh_CN/process/coding-style.rst
index 4f6237392e65..eae10bc7f86f 100644
--- a/Documentation/translations/zh_CN/process/coding-style.rst
+++ b/Documentation/translations/zh_CN/process/coding-style.rst
@@ -826,7 +826,7 @@ inline gcc 也可以自动使其内联。而且其他用户可能会要求移除
.. code-block:: c
- #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
+ #define sizeof_field(t, f) (sizeof(((t*)0)->f))
还有可以做严格的类型检查的 min() 和 max() 宏,如果你需要可以使用它们。你可以
自己看看那个头文件里还定义了什么你可以拿来用的东西,如果有定义的话,你就不应
diff --git a/MAINTAINERS b/MAINTAINERS
index d09519805cc0..2549f10eb0b1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -771,6 +771,8 @@ F: drivers/thermal/thermal_mmio.c
AMAZON ETHERNET DRIVERS
M: Netanel Belgazal <[email protected]>
+M: Arthur Kiyanovski <[email protected]>
+R: Guy Tzalik <[email protected]>
R: Saeed Bishara <[email protected]>
R: Zorik Machulsky <[email protected]>
@@ -2272,6 +2274,7 @@ F: drivers/*/*s3c64xx*
F: drivers/*/*s5pv210*
F: drivers/memory/samsung/
F: drivers/soc/samsung/
+F: drivers/tty/serial/samsung*
F: include/linux/soc/samsung/
F: Documentation/arm/samsung/
F: Documentation/devicetree/bindings/arm/samsung/
@@ -4845,6 +4848,7 @@ S: Supported
F: net/core/devlink.c
F: include/net/devlink.h
F: include/uapi/linux/devlink.h
+F: Documentation/networking/devlink
DIALOG SEMICONDUCTOR DRIVERS
M: Support Opensource <[email protected]>
@@ -4970,6 +4974,7 @@ F: include/linux/dma-buf*
F: include/linux/reservation.h
F: include/linux/*fence.h
F: Documentation/driver-api/dma-buf.rst
+K: dma_(buf|fence|resv)
T: git git://anongit.freedesktop.org/drm/drm-misc
DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
@@ -4999,7 +5004,7 @@ F: include/linux/dma-mapping.h
F: include/linux/dma-noncoherent.h
DMC FREQUENCY DRIVER FOR SAMSUNG EXYNOS5422
-M: Lukasz Luba <[email protected]>
+M: Lukasz Luba <[email protected]>
S: Maintained
@@ -6025,6 +6030,7 @@ M: Yash Shah <[email protected]>
S: Supported
F: drivers/edac/sifive_edac.c
+F: drivers/soc/sifive_l2_cache.c
EDAC-SKYLAKE
M: Tony Luck <[email protected]>
@@ -7031,6 +7037,7 @@ L: [email protected]
S: Maintained
F: Documentation/firmware-guide/acpi/gpio-properties.rst
F: drivers/gpio/gpiolib-acpi.c
+F: drivers/gpio/gpiolib-acpi.h
GPIO IR Transmitter
M: Sean Young <[email protected]>
@@ -9038,7 +9045,6 @@ F: include/linux/umh.h
KERNEL VIRTUAL MACHINE (KVM)
M: Paolo Bonzini <[email protected]>
-M: Radim Krčmář <[email protected]>
W: http://www.linux-kvm.org
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
@@ -9073,9 +9079,9 @@ F: virt/kvm/arm/
F: include/kvm/arm_*
KERNEL VIRTUAL MACHINE FOR MIPS (KVM/mips)
-M: James Hogan <[email protected]>
-S: Supported
+S: Orphan
F: arch/mips/include/uapi/asm/kvm*
F: arch/mips/include/asm/kvm*
F: arch/mips/kvm/
@@ -9110,7 +9116,6 @@ F: tools/testing/selftests/kvm/*/s390x/
KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
M: Paolo Bonzini <[email protected]>
-M: Radim Krčmář <[email protected]>
R: Sean Christopherson <[email protected]>
R: Vitaly Kuznetsov <[email protected]>
R: Wanpeng Li <[email protected]>
@@ -9885,7 +9890,7 @@ S: Maintained
F: drivers/net/dsa/mv88e6xxx/
F: include/linux/platform_data/mv88e6xxx.h
F: Documentation/devicetree/bindings/net/dsa/marvell.txt
-F: Documentation/networking/devlink-params-mv88e6xxx.txt
+F: Documentation/networking/devlink/mv88e6xxx.rst
MARVELL ARMADA DRM SUPPORT
M: Russell King <[email protected]>
@@ -10108,6 +10113,7 @@ S: Maintained
F: drivers/media/radio/radio-maxiradio*
MCAN MMIO DEVICE DRIVER
+M: Dan Murphy <[email protected]>
M: Sriram Dash <[email protected]>
S: Maintained
@@ -11455,8 +11461,8 @@ M: "David S. Miller" <[email protected]>
W: http://www.linuxfoundation.org/en/Net
Q: http://patchwork.ozlabs.org/project/netdev/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
S: Odd Fixes
F: Documentation/devicetree/bindings/net/
F: drivers/net/
@@ -11497,8 +11503,8 @@ M: "David S. Miller" <[email protected]>
W: http://www.linuxfoundation.org/en/Net
Q: http://patchwork.ozlabs.org/project/netdev/list/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
S: Maintained
F: net/
@@ -11543,7 +11549,7 @@ M: "David S. Miller" <[email protected]>
M: Alexey Kuznetsov <[email protected]>
M: Hideaki YOSHIFUJI <[email protected]>
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
S: Maintained
F: net/ipv4/
F: net/ipv6/
@@ -11568,6 +11574,16 @@ F: net/ipv6/calipso.c
F: net/netfilter/xt_CONNSECMARK.c
F: net/netfilter/xt_SECMARK.c
+NETWORKING [MPTCP]
+M: Mat Martineau <[email protected]>
+M: Matthieu Baerts <[email protected]>
+W: https://github.com/multipath-tcp/mptcp_net-next/wiki
+B: https://github.com/multipath-tcp/mptcp_net-next/issues
+S: Maintained
+F: include/net/mptcp.h
+
NETWORKING [TCP]
M: Eric Dumazet <[email protected]>
@@ -12393,7 +12409,7 @@ L: [email protected]
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git
S: Supported
F: fs/overlayfs/
-F: Documentation/filesystems/overlayfs.txt
+F: Documentation/filesystems/overlayfs.rst
P54 WIRELESS DRIVER
M: Christian Lamparter <[email protected]>
@@ -13681,7 +13697,6 @@ F: drivers/net/ethernet/qualcomm/emac/
QUALCOMM ETHQOS ETHERNET DRIVER
M: Vinod Koul <[email protected]>
-M: Niklas Cassel <[email protected]>
S: Maintained
F: drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
@@ -13715,6 +13730,15 @@ L: [email protected]
S: Maintained
F: drivers/iommu/qcom_iommu.c
+QUALCOMM RMNET DRIVER
+M: Subash Abhinov Kasiviswanathan <[email protected]>
+M: Sean Tranchetti <[email protected]>
+S: Maintained
+F: drivers/net/ethernet/qualcomm/rmnet/
+F: Documentation/networking/device_drivers/qualcomm/rmnet.txt
+F: include/linux/if_rmnet.h
+
QUALCOMM TSENS THERMAL DRIVER
M: Amit Kucheria <[email protected]>
@@ -14114,6 +14138,7 @@ M: Paul Walmsley <[email protected]>
M: Palmer Dabbelt <[email protected]>
M: Albert Ou <[email protected]>
+P: Documentation/riscv/patch-acceptance.rst
T: git git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux.git
S: Supported
F: arch/riscv/
@@ -14541,8 +14566,6 @@ F: include/linux/platform_data/spi-s3c64xx.h
SAMSUNG SXGBE DRIVERS
M: Byungho An <[email protected]>
-M: Girish K S <[email protected]>
-M: Vipul Pandya <[email protected]>
S: Supported
F: drivers/net/ethernet/samsung/sxgbe/
@@ -15763,6 +15786,7 @@ M: Jose Abreu <[email protected]>
W: http://www.stlinux.com
S: Supported
+F: Documentation/networking/device_drivers/stmicro/
F: drivers/net/ethernet/stmicro/stmmac/
SUN3/3X
@@ -16321,12 +16345,10 @@ F: drivers/media/radio/radio-raremono.c
THERMAL
M: Zhang Rui <[email protected]>
-M: Eduardo Valentin <[email protected]>
-R: Daniel Lezcano <[email protected]>
+M: Daniel Lezcano <[email protected]>
R: Amit Kucheria <[email protected]>
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/rzhang/linux.git
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/thermal/linux.git
Q: https://patchwork.kernel.org/project/linux-pm/list/
S: Supported
F: drivers/thermal/
@@ -16540,6 +16562,13 @@ L: [email protected] (moderated for non-subscribers)
S: Odd Fixes
F: sound/soc/codecs/tas571x*
+TI TCAN4X5X DEVICE DRIVER
+M: Dan Murphy <[email protected]>
+S: Maintained
+F: Documentation/devicetree/bindings/net/can/tcan4x5x.txt
+F: drivers/net/can/m_can/tcan4x5x.c
+
TI TRF7970A NFC DRIVER
M: Mark Greer <[email protected]>
@@ -17487,6 +17516,7 @@ F: net/vmw_vsock/diag.c
F: net/vmw_vsock/af_vsock_tap.c
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
+F: net/vmw_vsock/vsock_loopback.c
F: drivers/net/vsockmon.c
F: drivers/vhost/vsock.c
F: tools/testing/vsock/
@@ -17857,6 +17887,14 @@ L: [email protected]
S: Maintained
F: drivers/gpio/gpio-ws16c48.c
+WIREGUARD SECURE NETWORK TUNNEL
+M: Jason A. Donenfeld <[email protected]>
+S: Maintained
+F: drivers/net/wireguard/
+F: tools/testing/selftests/wireguard/
+
WISTRON LAPTOP BUTTON DRIVER
M: Miloslav Trmac <[email protected]>
S: Maintained
diff --git a/Makefile b/Makefile
index 73e3c2802927..e4c2d0327d8c 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 5
PATCHLEVEL = 5
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc5
NAME = Kleptomaniac Octopus
# *DOCUMENTATION*
@@ -414,6 +414,7 @@ STRIP = $(CROSS_COMPILE)strip
OBJCOPY = $(CROSS_COMPILE)objcopy
OBJDUMP = $(CROSS_COMPILE)objdump
OBJSIZE = $(CROSS_COMPILE)size
+READELF = $(CROSS_COMPILE)readelf
PAHOLE = pahole
LEX = flex
YACC = bison
@@ -472,7 +473,7 @@ GCC_PLUGINS_CFLAGS :=
CLANG_FLAGS :=
export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE AS LD CC
-export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE PAHOLE LEX YACC AWK INSTALLKERNEL
+export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE LEX YACC AWK INSTALLKERNEL
export PERL PYTHON PYTHON2 PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
diff --git a/arch/arc/include/asm/entry-arcv2.h b/arch/arc/include/asm/entry-arcv2.h
index 41b16f21beec..0b8b63d0bec1 100644
--- a/arch/arc/include/asm/entry-arcv2.h
+++ b/arch/arc/include/asm/entry-arcv2.h
@@ -162,7 +162,7 @@
#endif
#ifdef CONFIG_ARC_HAS_ACCL_REGS
- ST2 r58, r59, PT_sp + 12
+ ST2 r58, r59, PT_r58
#endif
.endm
@@ -172,8 +172,8 @@
LD2 gp, fp, PT_r26 ; gp (r26), fp (r27)
- ld r12, [sp, PT_sp + 4]
- ld r30, [sp, PT_sp + 8]
+ ld r12, [sp, PT_r12]
+ ld r30, [sp, PT_r30]
; Restore SP (into AUX_USER_SP) only if returning to U mode
; - for K mode, it will be implicitly restored as stack is unwound
@@ -190,7 +190,7 @@
#endif
#ifdef CONFIG_ARC_HAS_ACCL_REGS
- LD2 r58, r59, PT_sp + 12
+ LD2 r58, r59, PT_r58
#endif
.endm
diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h
index 9a74ce71a767..30ac40fed2c5 100644
--- a/arch/arc/include/asm/hugepage.h
+++ b/arch/arc/include/asm/hugepage.h
@@ -8,7 +8,6 @@
#define _ASM_ARC_HUGEPAGE_H
#include <linux/types.h>
-#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
static inline pte_t pmd_pte(pmd_t pmd)
diff --git a/arch/arc/kernel/asm-offsets.c b/arch/arc/kernel/asm-offsets.c
index 1f621e416521..c783bcd35eb8 100644
--- a/arch/arc/kernel/asm-offsets.c
+++ b/arch/arc/kernel/asm-offsets.c
@@ -66,7 +66,15 @@ int main(void)
DEFINE(SZ_CALLEE_REGS, sizeof(struct callee_regs));
DEFINE(SZ_PT_REGS, sizeof(struct pt_regs));
- DEFINE(PT_user_r25, offsetof(struct pt_regs, user_r25));
+
+#ifdef CONFIG_ISA_ARCV2
+ OFFSET(PT_r12, pt_regs, r12);
+ OFFSET(PT_r30, pt_regs, r30);
+#endif
+#ifdef CONFIG_ARC_HAS_ACCL_REGS
+ OFFSET(PT_r58, pt_regs, r58);
+ OFFSET(PT_r59, pt_regs, r59);
+#endif
return 0;
}
diff --git a/arch/arc/kernel/unwind.c b/arch/arc/kernel/unwind.c
index dc05a63516f5..27ea64b1fa33 100644
--- a/arch/arc/kernel/unwind.c
+++ b/arch/arc/kernel/unwind.c
@@ -42,10 +42,10 @@ do { \
#define EXTRA_INFO(f) { \
BUILD_BUG_ON_ZERO(offsetof(struct unwind_frame_info, f) \
- % FIELD_SIZEOF(struct unwind_frame_info, f)) \
+ % sizeof_field(struct unwind_frame_info, f)) \
+ offsetof(struct unwind_frame_info, f) \
- / FIELD_SIZEOF(struct unwind_frame_info, f), \
- FIELD_SIZEOF(struct unwind_frame_info, f) \
+ / sizeof_field(struct unwind_frame_info, f), \
+ sizeof_field(struct unwind_frame_info, f) \
}
#define PTREGS_INFO(f) EXTRA_INFO(regs.f)
diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig
index a376a50d3fea..a931d0a256d0 100644
--- a/arch/arc/plat-eznps/Kconfig
+++ b/arch/arc/plat-eznps/Kconfig
@@ -7,7 +7,7 @@
menuconfig ARC_PLAT_EZNPS
bool "\"EZchip\" ARC dev platform"
select CPU_BIG_ENDIAN
- select CLKSRC_NPS
+ select CLKSRC_NPS if !PHYS_ADDR_T_64BIT
select EZNPS_GIC
select EZCHIP_NPS_MANAGEMENT_ENET if ETHERNET
help
diff --git a/arch/arm/boot/dts/am335x-sancloud-bbe.dts b/arch/arm/boot/dts/am335x-sancloud-bbe.dts
index 8678e6e35493..e5fdb7abb0d5 100644
--- a/arch/arm/boot/dts/am335x-sancloud-bbe.dts
+++ b/arch/arm/boot/dts/am335x-sancloud-bbe.dts
@@ -108,7 +108,7 @@
&cpsw_emac0 {
phy-handle = <&ethphy0>;
- phy-mode = "rgmii-txid";
+ phy-mode = "rgmii-id";
};
&i2c0 {
diff --git a/arch/arm/boot/dts/am437x-gp-evm.dts b/arch/arm/boot/dts/am437x-gp-evm.dts
index cae4500194fe..811c8cae315b 100644
--- a/arch/arm/boot/dts/am437x-gp-evm.dts
+++ b/arch/arm/boot/dts/am437x-gp-evm.dts
@@ -86,7 +86,7 @@
};
lcd0: display {
- compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+ compatible = "osddisplays,osd070t1718-19ts", "panel-dpi";
label = "lcd";
backlight = <&lcd_bl>;
diff --git a/arch/arm/boot/dts/am43x-epos-evm.dts b/arch/arm/boot/dts/am43x-epos-evm.dts
index 95314121d111..078cb473fa7d 100644
--- a/arch/arm/boot/dts/am43x-epos-evm.dts
+++ b/arch/arm/boot/dts/am43x-epos-evm.dts
@@ -42,7 +42,7 @@
};
lcd0: display {
- compatible = "osddisplays,osd057T0559-34ts", "panel-dpi";
+ compatible = "osddisplays,osd070t1718-19ts", "panel-dpi";
label = "lcd";
backlight = <&lcd_bl>;
diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi
index 2dac3efc7640..1bc45cfd5453 100644
--- a/arch/arm/boot/dts/bcm-cygnus.dtsi
+++ b/arch/arm/boot/dts/bcm-cygnus.dtsi
@@ -174,8 +174,8 @@
mdio: mdio@18002000 {
compatible = "brcm,iproc-mdio";
reg = <0x18002000 0x8>;
- #size-cells = <1>;
- #address-cells = <0>;
+ #size-cells = <0>;
+ #address-cells = <1>;
status = "disabled";
gphy0: ethernet-phy@0 {
diff --git a/arch/arm/boot/dts/bcm2711.dtsi b/arch/arm/boot/dts/bcm2711.dtsi
index 961bed832755..e2f6ffb00aa9 100644
--- a/arch/arm/boot/dts/bcm2711.dtsi
+++ b/arch/arm/boot/dts/bcm2711.dtsi
@@ -43,7 +43,7 @@
<0x7c000000 0x0 0xfc000000 0x02000000>,
<0x40000000 0x0 0xff800000 0x00800000>;
/* Emulate a contiguous 30-bit address range for DMA */
- dma-ranges = <0xc0000000 0x0 0x00000000 0x3c000000>;
+ dma-ranges = <0xc0000000 0x0 0x00000000 0x40000000>;
/*
* This node is the provider for the enable-method for
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 3caaa57eb6c8..839491628e87 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -37,7 +37,7 @@
trips {
cpu-crit {
- temperature = <80000>;
+ temperature = <90000>;
hysteresis = <0>;
type = "critical";
};
diff --git a/arch/arm/boot/dts/bcm5301x.dtsi b/arch/arm/boot/dts/bcm5301x.dtsi
index 372dc1eb88a0..2d9b4dd05830 100644
--- a/arch/arm/boot/dts/bcm5301x.dtsi
+++ b/arch/arm/boot/dts/bcm5301x.dtsi
@@ -353,8 +353,8 @@
mdio: mdio@18003000 {
compatible = "brcm,iproc-mdio";
reg = <0x18003000 0x8>;
- #size-cells = <1>;
- #address-cells = <0>;
+ #size-cells = <0>;
+ #address-cells = <1>;
};
mdio-bus-mux@18003000 {
diff --git a/arch/arm/boot/dts/e60k02.dtsi b/arch/arm/boot/dts/e60k02.dtsi
index 6472b056a001..5a2c5320437d 100644
--- a/arch/arm/boot/dts/e60k02.dtsi
+++ b/arch/arm/boot/dts/e60k02.dtsi
@@ -265,11 +265,6 @@
regulator-name = "LDORTC1";
regulator-boot-on;
};
-
- ldortc2_reg: LDORTC2 {
- regulator-name = "LDORTC2";
- regulator-boot-on;
- };
};
};
};
diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
index 1506eb12b21e..212144511b66 100644
--- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
+++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
@@ -30,14 +30,26 @@
enable-active-high;
};
- reg_sensors: regulator-sensors {
+ reg_peri_3v3: regulator-peri-3v3 {
compatible = "regulator-fixed";
pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_sensors_reg>;
- regulator-name = "sensors-supply";
+ pinctrl-0 = <&pinctrl_peri_3v3>;
+ regulator-name = "VPERI_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
gpio = <&gpio5 2 GPIO_ACTIVE_LOW>;
+ /*
+ * If you want to want to make this dynamic please
+ * check schematics and test all affected peripherals:
+ *
+ * - sensors
+ * - ethernet phy
+ * - can
+ * - bluetooth
+ * - wm8960 audio codec
+ * - ov5640 camera
+ */
+ regulator-always-on;
};
reg_can_3v3: regulator-can-3v3 {
@@ -140,6 +152,7 @@
pinctrl-0 = <&pinctrl_enet1>;
phy-mode = "rmii";
phy-handle = <&ethphy0>;
+ phy-supply = <&reg_peri_3v3>;
status = "okay";
};
@@ -148,6 +161,7 @@
pinctrl-0 = <&pinctrl_enet2>;
phy-mode = "rmii";
phy-handle = <&ethphy1>;
+ phy-supply = <&reg_peri_3v3>;
status = "okay";
mdio {
@@ -193,8 +207,8 @@
magnetometer@e {
compatible = "fsl,mag3110";
reg = <0x0e>;
- vdd-supply = <&reg_sensors>;
- vddio-supply = <&reg_sensors>;
+ vdd-supply = <&reg_peri_3v3>;
+ vddio-supply = <&reg_peri_3v3>;
};
};
@@ -227,7 +241,7 @@
flash0: n25q256a@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "micron,n25q256a";
+ compatible = "micron,n25q256a", "jedec,spi-nor";
spi-max-frequency = <29000000>;
spi-rx-bus-width = <4>;
spi-tx-bus-width = <4>;
@@ -462,7 +476,7 @@
>;
};
- pinctrl_sensors_reg: sensorsreggrp {
+ pinctrl_peri_3v3: peri3v3grp {
fsl,pins = <
MX6UL_PAD_SNVS_TAMPER2__GPIO5_IO02 0x1b0b0
>;
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index e7e4bb5ad8d5..fde84f123fbb 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -350,6 +350,7 @@ CONFIG_PRINTK_TIME=y
CONFIG_DYNAMIC_DEBUG=y
CONFIG_DEBUG_INFO=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
CONFIG_SOFTLOCKUP_DETECTOR=y
# CONFIG_DETECT_HUNG_TASK is not set
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 26d6dee67aa6..3608e55eaecd 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -462,6 +462,7 @@ CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
# CONFIG_SCHED_DEBUG is not set
CONFIG_PROVE_LOCKING=y
# CONFIG_DEBUG_BUGVERBOSE is not set
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 8c37cc8ab6f2..c32c338f7704 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -92,6 +92,7 @@ CONFIG_IP_PNP_BOOTP=y
CONFIG_IP_PNP_RARP=y
CONFIG_NETFILTER=y
CONFIG_PHONET=m
+CONFIG_NET_SWITCHDEV=y
CONFIG_CAN=m
CONFIG_CAN_C_CAN=m
CONFIG_CAN_C_CAN_PLATFORM=m
@@ -181,6 +182,7 @@ CONFIG_SMSC911X=y
# CONFIG_NET_VENDOR_STMICRO is not set
CONFIG_TI_DAVINCI_EMAC=y
CONFIG_TI_CPSW=y
+CONFIG_TI_CPSW_SWITCHDEV=y
CONFIG_TI_CPTS=y
# CONFIG_NET_VENDOR_VIA is not set
# CONFIG_NET_VENDOR_WIZNET is not set
@@ -554,6 +556,6 @@ CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_SPLIT=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
CONFIG_SCHEDSTATS=y
# CONFIG_DEBUG_BUGVERBOSE is not set
-CONFIG_TI_CPSW_SWITCHDEV=y
diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index bda57cafa2bc..de3830443613 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig
@@ -212,4 +212,5 @@ CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=64
CONFIG_PRINTK_TIME=y
# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
index f3f42cf3b893..776ae07e0469 100644
--- a/arch/arm/crypto/curve25519-glue.c
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -38,6 +38,13 @@ void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
}
EXPORT_SYMBOL(curve25519_arch);
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE])
+{
+ return curve25519_arch(pub, secret, curve25519_base_point);
+}
+EXPORT_SYMBOL(curve25519_base_arch);
+
static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
unsigned int len)
{
diff --git a/arch/arm/mach-bcm/bcm2711.c b/arch/arm/mach-bcm/bcm2711.c
index dbe296798647..fa0300d8c79d 100644
--- a/arch/arm/mach-bcm/bcm2711.c
+++ b/arch/arm/mach-bcm/bcm2711.c
@@ -13,6 +13,7 @@ static const char * const bcm2711_compat[] = {
#ifdef CONFIG_ARCH_MULTI_V7
"brcm,bcm2711",
#endif
+ NULL
};
DT_MACHINE_START(BCM2711, "BCM2711")
diff --git a/arch/arm/mach-imx/cpu.c b/arch/arm/mach-imx/cpu.c
index d8118031c51f..871f98342d50 100644
--- a/arch/arm/mach-imx/cpu.c
+++ b/arch/arm/mach-imx/cpu.c
@@ -84,7 +84,7 @@ struct device * __init imx_soc_device_init(void)
const char *ocotp_compat = NULL;
struct soc_device *soc_dev;
struct device_node *root;
- struct regmap *ocotp;
+ struct regmap *ocotp = NULL;
const char *soc_id;
u64 soc_uid = 0;
u32 val;
@@ -148,11 +148,11 @@ struct device * __init imx_soc_device_init(void)
soc_id = "i.MX6UL";
break;
case MXC_CPU_IMX6ULL:
- ocotp_compat = "fsl,imx6ul-ocotp";
+ ocotp_compat = "fsl,imx6ull-ocotp";
soc_id = "i.MX6ULL";
break;
case MXC_CPU_IMX6ULZ:
- ocotp_compat = "fsl,imx6ul-ocotp";
+ ocotp_compat = "fsl,imx6ull-ocotp";
soc_id = "i.MX6ULZ";
break;
case MXC_CPU_IMX6SLL:
@@ -175,7 +175,9 @@ struct device * __init imx_soc_device_init(void)
ocotp = syscon_regmap_lookup_by_compatible(ocotp_compat);
if (IS_ERR(ocotp))
pr_err("%s: failed to find %s regmap!\n", __func__, ocotp_compat);
+ }
+ if (!IS_ERR_OR_NULL(ocotp)) {
regmap_read(ocotp, OCOTP_UID_H, &val);
soc_uid = val;
regmap_read(ocotp, OCOTP_UID_L, &val);
diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c
index 648932d8d7a8..507ee3878769 100644
--- a/arch/arm/mach-ixp4xx/fsg-setup.c
+++ b/arch/arm/mach-ixp4xx/fsg-setup.c
@@ -132,6 +132,22 @@ static struct platform_device fsg_leds = {
};
/* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource fsg_eth_npeb_resources[] = {
+ {
+ .start = IXP4XX_EthB_BASE_PHYS,
+ .end = IXP4XX_EthB_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct resource fsg_eth_npec_resources[] = {
+ {
+ .start = IXP4XX_EthC_BASE_PHYS,
+ .end = IXP4XX_EthC_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
static struct eth_plat_info fsg_plat_eth[] = {
{
.phy = 5,
@@ -151,12 +167,16 @@ static struct platform_device fsg_eth[] = {
.dev = {
.platform_data = fsg_plat_eth,
},
+ .num_resources = ARRAY_SIZE(fsg_eth_npeb_resources),
+ .resource = fsg_eth_npeb_resources,
}, {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEC,
.dev = {
.platform_data = fsg_plat_eth + 1,
},
+ .num_resources = ARRAY_SIZE(fsg_eth_npec_resources),
+ .resource = fsg_eth_npec_resources,
}
};
diff --git a/arch/arm/mach-ixp4xx/goramo_mlr.c b/arch/arm/mach-ixp4xx/goramo_mlr.c
index a0e0b6b7dc5c..07b50dfcc489 100644
--- a/arch/arm/mach-ixp4xx/goramo_mlr.c
+++ b/arch/arm/mach-ixp4xx/goramo_mlr.c
@@ -11,6 +11,7 @@
#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/pci.h>
+#include <linux/platform_data/wan_ixp4xx_hss.h>
#include <linux/serial_8250.h>
#include <asm/mach-types.h>
#include <asm/mach/arch.h>
@@ -272,6 +273,22 @@ static struct platform_device device_uarts = {
/* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource eth_npeb_resources[] = {
+ {
+ .start = IXP4XX_EthB_BASE_PHYS,
+ .end = IXP4XX_EthB_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct resource eth_npec_resources[] = {
+ {
+ .start = IXP4XX_EthC_BASE_PHYS,
+ .end = IXP4XX_EthC_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
static struct eth_plat_info eth_plat[] = {
{
.phy = 0,
@@ -289,10 +306,14 @@ static struct platform_device device_eth_tab[] = {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEB,
.dev.platform_data = eth_plat,
+ .num_resources = ARRAY_SIZE(eth_npeb_resources),
+ .resource = eth_npeb_resources,
}, {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEC,
.dev.platform_data = eth_plat + 1,
+ .num_resources = ARRAY_SIZE(eth_npec_resources),
+ .resource = eth_npec_resources,
}
};
@@ -405,6 +426,9 @@ static void __init gmlr_init(void)
if (hw_bits & CFG_HW_HAS_HSS1)
device_tab[devices++] = &device_hss_tab[1]; /* max index 5 */
+ hss_plat[0].timer_freq = ixp4xx_timer_freq;
+ hss_plat[1].timer_freq = ixp4xx_timer_freq;
+
gpio_request(GPIO_SCL, "SCL/clock");
gpio_request(GPIO_SDA, "SDA/data");
gpio_request(GPIO_STR, "strobe");
diff --git a/arch/arm/mach-ixp4xx/include/mach/platform.h b/arch/arm/mach-ixp4xx/include/mach/platform.h
index 342acbe20f7c..6d403fe0bf52 100644
--- a/arch/arm/mach-ixp4xx/include/mach/platform.h
+++ b/arch/arm/mach-ixp4xx/include/mach/platform.h
@@ -15,6 +15,7 @@
#ifndef __ASSEMBLY__
#include <linux/reboot.h>
+#include <linux/platform_data/eth_ixp4xx.h>
#include <asm/types.h>
@@ -92,27 +93,6 @@ struct ixp4xx_pata_data {
void __iomem *cs1;
};
-#define IXP4XX_ETH_NPEA 0x00
-#define IXP4XX_ETH_NPEB 0x10
-#define IXP4XX_ETH_NPEC 0x20
-
-/* Information about built-in Ethernet MAC interfaces */
-struct eth_plat_info {
- u8 phy; /* MII PHY ID, 0 - 31 */
- u8 rxq; /* configurable, currently 0 - 31 only */
- u8 txreadyq;
- u8 hwaddr[6];
-};
-
-/* Information about built-in HSS (synchronous serial) interfaces */
-struct hss_plat_info {
- int (*set_clock)(int port, unsigned int clock_type);
- int (*open)(int port, void *pdev,
- void (*set_carrier_cb)(void *pdev, int carrier));
- void (*close)(int port, void *pdev);
- u8 txreadyq;
-};
-
/*
* Frequency of clock used for primary clocksource
*/
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index 6f0f7ed18ea8..45d5b720ded6 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -187,6 +187,22 @@ static struct platform_device ixdp425_uart = {
};
/* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource ixp425_npeb_resources[] = {
+ {
+ .start = IXP4XX_EthB_BASE_PHYS,
+ .end = IXP4XX_EthB_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct resource ixp425_npec_resources[] = {
+ {
+ .start = IXP4XX_EthC_BASE_PHYS,
+ .end = IXP4XX_EthC_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
static struct eth_plat_info ixdp425_plat_eth[] = {
{
.phy = 0,
@@ -204,10 +220,14 @@ static struct platform_device ixdp425_eth[] = {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEB,
.dev.platform_data = ixdp425_plat_eth,
+ .num_resources = ARRAY_SIZE(ixp425_npeb_resources),
+ .resource = ixp425_npeb_resources,
}, {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEC,
.dev.platform_data = ixdp425_plat_eth + 1,
+ .num_resources = ARRAY_SIZE(ixp425_npec_resources),
+ .resource = ixp425_npec_resources,
}
};
diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c
index c142cfa8c5d6..6959ad2e3aec 100644
--- a/arch/arm/mach-ixp4xx/nas100d-setup.c
+++ b/arch/arm/mach-ixp4xx/nas100d-setup.c
@@ -165,6 +165,14 @@ static struct platform_device nas100d_uart = {
};
/* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource nas100d_eth_resources[] = {
+ {
+ .start = IXP4XX_EthB_BASE_PHYS,
+ .end = IXP4XX_EthB_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
static struct eth_plat_info nas100d_plat_eth[] = {
{
.phy = 0,
@@ -178,6 +186,8 @@ static struct platform_device nas100d_eth[] = {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEB,
.dev.platform_data = nas100d_plat_eth,
+ .num_resources = ARRAY_SIZE(nas100d_eth_resources),
+ .resource = nas100d_eth_resources,
}
};
diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c
index ee1877fcfafe..a428bb918703 100644
--- a/arch/arm/mach-ixp4xx/nslu2-setup.c
+++ b/arch/arm/mach-ixp4xx/nslu2-setup.c
@@ -185,6 +185,14 @@ static struct platform_device nslu2_uart = {
};
/* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource nslu2_eth_resources[] = {
+ {
+ .start = IXP4XX_EthB_BASE_PHYS,
+ .end = IXP4XX_EthB_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
static struct eth_plat_info nslu2_plat_eth[] = {
{
.phy = 1,
@@ -198,6 +206,8 @@ static struct platform_device nslu2_eth[] = {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEB,
.dev.platform_data = nslu2_plat_eth,
+ .num_resources = ARRAY_SIZE(nslu2_eth_resources),
+ .resource = nslu2_eth_resources,
}
};
diff --git a/arch/arm/mach-ixp4xx/omixp-setup.c b/arch/arm/mach-ixp4xx/omixp-setup.c
index 6ed5a9aed600..8f2b8c473d7a 100644
--- a/arch/arm/mach-ixp4xx/omixp-setup.c
+++ b/arch/arm/mach-ixp4xx/omixp-setup.c
@@ -170,6 +170,22 @@ static struct platform_device mic256_leds = {
};
/* Built-in 10/100 Ethernet MAC interfaces */
+static struct resource ixp425_npeb_resources[] = {
+ {
+ .start = IXP4XX_EthB_BASE_PHYS,
+ .end = IXP4XX_EthB_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct resource ixp425_npec_resources[] = {
+ {
+ .start = IXP4XX_EthC_BASE_PHYS,
+ .end = IXP4XX_EthC_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
static struct eth_plat_info ixdp425_plat_eth[] = {
{
.phy = 0,
@@ -187,10 +203,14 @@ static struct platform_device ixdp425_eth[] = {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEB,
.dev.platform_data = ixdp425_plat_eth,
+ .num_resources = ARRAY_SIZE(ixp425_npeb_resources),
+ .resource = ixp425_npeb_resources,
}, {
.name = "ixp4xx_eth",
.id = IXP4XX_ETH_NPEC,
.dev.platform_data = ixdp425_plat_eth + 1,
+ .num_resources = ARRAY_SIZE(ixp425_npec_resources),
+ .resource = ixp425_npec_resources,
},
};
diff --git a/arch/arm/mach-ixp4xx/vulcan-setup.c b/arch/arm/mach-ixp4xx/vulcan-setup.c
index d2ebb7c675a8..e506d2af98ad 100644
--- a/arch/arm/mach-ixp4xx/vulcan-setup.c
+++ b/arch/arm/mach-ixp4xx/vulcan-setup.c
@@ -124,6 +124,22 @@ static struct platform_device vulcan_uart = {
.num_resources = ARRAY_SIZE(vulcan_uart_resources),
};
+static struct resource vulcan_npeb_resources[] = {
+ {
+ .start = IXP4XX_EthB_BASE_PHYS,
+ .end = IXP4XX_EthB_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
+static struct resource vulcan_npec_resources[] = {
+ {
+ .start = IXP4XX_EthC_BASE_PHYS,
+ .end = IXP4XX_EthC_BASE_PHYS + 0x0fff,
+ .flags = IORESOURCE_MEM,
+ },
+};
+
static struct eth_plat_info vulcan_plat_eth[] = {
[0] = {
.phy = 0,
@@ -144,6 +160,8 @@ static struct platform_device vulcan_eth[] = {
.dev = {
.platform_data = &vulcan_plat_eth[0],
},
+ .num_resources = ARRAY_SIZE(vulcan_npeb_resources),
+ .resource = vulcan_npeb_resources,
},
[1] = {
.name = "ixp4xx_eth",
@@ -151,6 +169,8 @@ static struct platform_device vulcan_eth[] = {
.dev = {
.platform_data = &vulcan_plat_eth[1],
},
+ .num_resources = ARRAY_SIZE(vulcan_npec_resources),
+ .resource = vulcan_npec_resources,
},
};
diff --git a/arch/arm/mach-mmp/pxa168.h b/arch/arm/mach-mmp/pxa168.h
index 0331c58b07a2..dff651b9f252 100644
--- a/arch/arm/mach-mmp/pxa168.h
+++ b/arch/arm/mach-mmp/pxa168.h
@@ -17,9 +17,9 @@ extern void pxa168_clear_keypad_wakeup(void);
#include <linux/platform_data/keypad-pxa27x.h>
#include <linux/pxa168_eth.h>
#include <linux/platform_data/mv_usb.h>
+#include <linux/soc/mmp/cputype.h>
#include "devices.h"
-#include "cputype.h"
extern struct pxa_device_desc pxa168_device_uart1;
extern struct pxa_device_desc pxa168_device_uart2;
diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
index 354e0e7025ae..1da11bdb1dfb 100644
--- a/arch/arm/mach-vexpress/spc.c
+++ b/arch/arm/mach-vexpress/spc.c
@@ -551,8 +551,9 @@ static struct clk *ve_spc_clk_register(struct device *cpu_dev)
static int __init ve_spc_clk_init(void)
{
- int cpu;
+ int cpu, cluster;
struct clk *clk;
+ bool init_opp_table[MAX_CLUSTERS] = { false };
if (!info)
return 0; /* Continue only if SPC is initialised */
@@ -578,8 +579,17 @@ static int __init ve_spc_clk_init(void)
continue;
}
+ cluster = topology_physical_package_id(cpu_dev->id);
+ if (init_opp_table[cluster])
+ continue;
+
if (ve_init_opp_table(cpu_dev))
pr_warn("failed to initialise cpu%d opp table\n", cpu);
+ else if (dev_pm_opp_set_sharing_cpus(cpu_dev,
+ topology_core_cpumask(cpu_dev->id)))
+ pr_warn("failed to mark OPPs shared for cpu%d\n", cpu);
+ else
+ init_opp_table[cluster] = true;
}
platform_device_register_simple("vexpress-spc-cpufreq", -1, NULL, 0);
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 97dc386e3cb8..cc29869d12a3 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -1260,12 +1260,9 @@ static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx)
static void build_prologue(struct jit_ctx *ctx)
{
- const s8 r0 = bpf2a32[BPF_REG_0][1];
- const s8 r2 = bpf2a32[BPF_REG_1][1];
- const s8 r3 = bpf2a32[BPF_REG_1][0];
- const s8 r4 = bpf2a32[BPF_REG_6][1];
- const s8 fplo = bpf2a32[BPF_REG_FP][1];
- const s8 fphi = bpf2a32[BPF_REG_FP][0];
+ const s8 arm_r0 = bpf2a32[BPF_REG_0][1];
+ const s8 *bpf_r1 = bpf2a32[BPF_REG_1];
+ const s8 *bpf_fp = bpf2a32[BPF_REG_FP];
const s8 *tcc = bpf2a32[TCALL_CNT];
/* Save callee saved registers. */
@@ -1278,8 +1275,10 @@ static void build_prologue(struct jit_ctx *ctx)
emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx);
emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx);
#endif
- /* Save frame pointer for later */
- emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx);
+ /* mov r3, #0 */
+ /* sub r2, sp, #SCRATCH_SIZE */
+ emit(ARM_MOV_I(bpf_r1[0], 0), ctx);
+ emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx);
ctx->stack_size = imm8m(STACK_SIZE);
@@ -1287,18 +1286,15 @@ static void build_prologue(struct jit_ctx *ctx)
emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx);
/* Set up BPF prog stack base register */
- emit_a32_mov_r(fplo, ARM_IP, ctx);
- emit_a32_mov_i(fphi, 0, ctx);
+ emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx);
- /* mov r4, 0 */
- emit(ARM_MOV_I(r4, 0), ctx);
+ /* Initialize Tail Count */
+ emit(ARM_MOV_I(bpf_r1[1], 0), ctx);
+ emit_a32_mov_r64(true, tcc, bpf_r1, ctx);
/* Move BPF_CTX to BPF_R1 */
- emit(ARM_MOV_R(r3, r4), ctx);
- emit(ARM_MOV_R(r2, r0), ctx);
- /* Initialize Tail Count */
- emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx);
- emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx);
+ emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx);
+
/* end of prologue */
}
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1b4476ddb83..29d03459de20 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -69,6 +69,7 @@ config ARM64
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
+ select ARCH_WANT_DEFAULT_BPF_JIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36)
diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
index 8e8a77eb596a..13a3cbe89b5a 100644
--- a/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
+++ b/arch/arm64/boot/dts/freescale/fsl-ls1028a.dtsi
@@ -88,7 +88,7 @@
reboot {
compatible ="syscon-reboot";
- regmap = <&dcfg>;
+ regmap = <&rst>;
offset = <0xb0>;
mask = <0x02>;
};
@@ -178,6 +178,12 @@
big-endian;
};
+ rst: syscon@1e60000 {
+ compatible = "syscon";
+ reg = <0x0 0x1e60000 0x0 0x10000>;
+ little-endian;
+ };
+
scfg: syscon@1fc0000 {
compatible = "fsl,ls1028a-scfg", "syscon";
reg = <0x0 0x1fc0000 0x0 0x10000>;
@@ -584,7 +590,7 @@
0x00010004 0x0000003d
0x00010005 0x00000045
0x00010006 0x0000004d
- 0x00010007 0x00000045
+ 0x00010007 0x00000055
0x00010008 0x0000005e
0x00010009 0x00000066
0x0001000a 0x0000006e
diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h
index 8dc6c5cdabe6..baf52baaa2a5 100644
--- a/arch/arm64/include/asm/pgtable-prot.h
+++ b/arch/arm64/include/asm/pgtable-prot.h
@@ -85,13 +85,12 @@
#define PAGE_SHARED_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_WRITE)
#define PAGE_READONLY __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN | PTE_UXN)
#define PAGE_READONLY_EXEC __pgprot(_PAGE_DEFAULT | PTE_USER | PTE_RDONLY | PTE_NG | PTE_PXN)
-#define PAGE_EXECONLY __pgprot(_PAGE_DEFAULT | PTE_RDONLY | PTE_NG | PTE_PXN)
#define __P000 PAGE_NONE
#define __P001 PAGE_READONLY
#define __P010 PAGE_READONLY
#define __P011 PAGE_READONLY
-#define __P100 PAGE_EXECONLY
+#define __P100 PAGE_READONLY_EXEC
#define __P101 PAGE_READONLY_EXEC
#define __P110 PAGE_READONLY_EXEC
#define __P111 PAGE_READONLY_EXEC
@@ -100,7 +99,7 @@
#define __S001 PAGE_READONLY
#define __S010 PAGE_SHARED
#define __S011 PAGE_SHARED
-#define __S100 PAGE_EXECONLY
+#define __S100 PAGE_READONLY_EXEC
#define __S101 PAGE_READONLY_EXEC
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 5d15b4735a0e..cd5de0e40bfa 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -96,12 +96,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte))
#define pte_valid(pte) (!!(pte_val(pte) & PTE_VALID))
-/*
- * Execute-only user mappings do not have the PTE_USER bit set. All valid
- * kernel mappings have the PTE_UXN bit set.
- */
#define pte_valid_not_user(pte) \
- ((pte_val(pte) & (PTE_VALID | PTE_USER | PTE_UXN)) == (PTE_VALID | PTE_UXN))
+ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID)
#define pte_valid_young(pte) \
((pte_val(pte) & (PTE_VALID | PTE_AF)) == (PTE_VALID | PTE_AF))
#define pte_valid_user(pte) \
@@ -117,8 +113,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
/*
* p??_access_permitted() is true for valid user mappings (subject to the
- * write permission check) other than user execute-only which do not have the
- * PTE_USER bit set. PROT_NONE mappings do not have the PTE_VALID bit set.
+ * write permission check). PROT_NONE mappings do not have the PTE_VALID bit
+ * set.
*/
#define pte_access_permitted(pte, write) \
(pte_valid_user(pte) && (!(write) || pte_write(pte)))
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 6a09ca7644ea..85f4bec22f6d 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -547,6 +547,7 @@ static const struct midr_range spectre_v2_safe_list[] = {
MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
MIDR_ALL_VERSIONS(MIDR_BRAHMA_B53),
+ MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
{ /* sentinel */ }
};
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 46822afc57e0..9f2165937f7d 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -2098,9 +2098,9 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu,
WARN_ON(1);
}
- kvm_err("Unsupported guest CP%d access at: %08lx [%08lx]\n",
- cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
- print_sys_reg_instr(params);
+ print_sys_reg_msg(params,
+ "Unsupported guest CP%d access at: %08lx [%08lx]\n",
+ cp, *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
kvm_inject_undefined(vcpu);
}
@@ -2233,6 +2233,12 @@ int kvm_handle_cp14_32(struct kvm_vcpu *vcpu, struct kvm_run *run)
NULL, 0);
}
+static bool is_imp_def_sys_reg(struct sys_reg_params *params)
+{
+ // See ARM DDI 0487E.a, section D12.3.2
+ return params->Op0 == 3 && (params->CRn & 0b1011) == 0b1011;
+}
+
static int emulate_sys_reg(struct kvm_vcpu *vcpu,
struct sys_reg_params *params)
{
@@ -2248,10 +2254,12 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu,
if (likely(r)) {
perform_access(vcpu, params, r);
+ } else if (is_imp_def_sys_reg(params)) {
+ kvm_inject_undefined(vcpu);
} else {
- kvm_err("Unsupported guest sys_reg access at: %lx [%08lx]\n",
- *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
- print_sys_reg_instr(params);
+ print_sys_reg_msg(params,
+ "Unsupported guest sys_reg access at: %lx [%08lx]\n",
+ *vcpu_pc(vcpu), *vcpu_cpsr(vcpu));
kvm_inject_undefined(vcpu);
}
return 1;
@@ -2360,8 +2368,11 @@ static const struct sys_reg_desc *index_to_sys_reg_desc(struct kvm_vcpu *vcpu,
if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM64_SYSREG)
return NULL;
+ if (!index_to_params(id, &params))
+ return NULL;
+
table = get_target_table(vcpu->arch.target, true, &num);
- r = find_reg_by_id(id, &params, table, num);
+ r = find_reg(&params, table, num);
if (!r)
r = find_reg(&params, sys_reg_descs, ARRAY_SIZE(sys_reg_descs));
diff --git a/arch/arm64/kvm/sys_regs.h b/arch/arm64/kvm/sys_regs.h
index 9bca0312d798..5a6fc30f5989 100644
--- a/arch/arm64/kvm/sys_regs.h
+++ b/arch/arm64/kvm/sys_regs.h
@@ -62,11 +62,24 @@ struct sys_reg_desc {
#define REG_HIDDEN_USER (1 << 0) /* hidden from userspace ioctls */
#define REG_HIDDEN_GUEST (1 << 1) /* hidden from guest */
-static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+static __printf(2, 3)
+inline void print_sys_reg_msg(const struct sys_reg_params *p,
+ char *fmt, ...)
{
+ va_list va;
+
+ va_start(va, fmt);
/* Look, we even formatted it for you to paste into the table! */
- kvm_pr_unimpl(" { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+ kvm_pr_unimpl("%pV { Op0(%2u), Op1(%2u), CRn(%2u), CRm(%2u), Op2(%2u), func_%s },\n",
+ &(struct va_format){ fmt, &va },
p->Op0, p->Op1, p->CRn, p->CRm, p->Op2, p->is_write ? "write" : "read");
+ va_end(va);
+}
+
+static inline void print_sys_reg_instr(const struct sys_reg_params *p)
+{
+ /* GCC warns on an empty format string */
+ print_sys_reg_msg(p, "%s", "");
}
static inline bool ignore_write(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 077b02a2d4d3..85566d32958f 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -445,7 +445,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
const struct fault_info *inf;
struct mm_struct *mm = current->mm;
vm_fault_t fault, major = 0;
- unsigned long vm_flags = VM_READ | VM_WRITE;
+ unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
if (kprobe_page_fault(regs, esr))
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 5a3b15a14a7f..40797cbfba2d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1070,7 +1070,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
/*
* FIXME: Cleanup page tables (also in arch_add_memory() in case
@@ -1079,7 +1078,6 @@ void arch_remove_memory(int nid, u64 start, u64 size,
* unplug. ARCH_ENABLE_MEMORY_HOTREMOVE must not be
* unlocked yet.
*/
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
}
#endif
diff --git a/arch/hexagon/include/asm/atomic.h b/arch/hexagon/include/asm/atomic.h
index 12cd9231c4b8..0231d69c8bf2 100644
--- a/arch/hexagon/include/asm/atomic.h
+++ b/arch/hexagon/include/asm/atomic.h
@@ -91,7 +91,7 @@ static inline void atomic_##op(int i, atomic_t *v) \
"1: %0 = memw_locked(%1);\n" \
" %0 = "#op "(%0,%2);\n" \
" memw_locked(%1,P3)=%0;\n" \
- " if !P3 jump 1b;\n" \
+ " if (!P3) jump 1b;\n" \
: "=&r" (output) \
: "r" (&v->counter), "r" (i) \
: "memory", "p3" \
@@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
"1: %0 = memw_locked(%1);\n" \
" %0 = "#op "(%0,%2);\n" \
" memw_locked(%1,P3)=%0;\n" \
- " if !P3 jump 1b;\n" \
+ " if (!P3) jump 1b;\n" \
: "=&r" (output) \
: "r" (&v->counter), "r" (i) \
: "memory", "p3" \
@@ -124,7 +124,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v) \
"1: %0 = memw_locked(%2);\n" \
" %1 = "#op "(%0,%3);\n" \
" memw_locked(%2,P3)=%1;\n" \
- " if !P3 jump 1b;\n" \
+ " if (!P3) jump 1b;\n" \
: "=&r" (output), "=&r" (val) \
: "r" (&v->counter), "r" (i) \
: "memory", "p3" \
@@ -173,7 +173,7 @@ static inline int atomic_fetch_add_unless(atomic_t *v, int a, int u)
" }"
" memw_locked(%2, p3) = %1;"
" {"
- " if !p3 jump 1b;"
+ " if (!p3) jump 1b;"
" }"
"2:"
: "=&r" (__oldval), "=&r" (tmp)
diff --git a/arch/hexagon/include/asm/bitops.h b/arch/hexagon/include/asm/bitops.h
index 47384b094b94..71429f756af0 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -38,7 +38,7 @@ static inline int test_and_clear_bit(int nr, volatile void *addr)
"1: R12 = memw_locked(R10);\n"
" { P0 = tstbit(R12,R11); R12 = clrbit(R12,R11); }\n"
" memw_locked(R10,P1) = R12;\n"
- " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+ " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
: "=&r" (oldval)
: "r" (addr), "r" (nr)
: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -62,7 +62,7 @@ static inline int test_and_set_bit(int nr, volatile void *addr)
"1: R12 = memw_locked(R10);\n"
" { P0 = tstbit(R12,R11); R12 = setbit(R12,R11); }\n"
" memw_locked(R10,P1) = R12;\n"
- " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+ " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
: "=&r" (oldval)
: "r" (addr), "r" (nr)
: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -88,7 +88,7 @@ static inline int test_and_change_bit(int nr, volatile void *addr)
"1: R12 = memw_locked(R10);\n"
" { P0 = tstbit(R12,R11); R12 = togglebit(R12,R11); }\n"
" memw_locked(R10,P1) = R12;\n"
- " {if !P1 jump 1b; %0 = mux(P0,#1,#0);}\n"
+ " {if (!P1) jump 1b; %0 = mux(P0,#1,#0);}\n"
: "=&r" (oldval)
: "r" (addr), "r" (nr)
: "r10", "r11", "r12", "p0", "p1", "memory"
@@ -223,7 +223,7 @@ static inline int ffs(int x)
int r;
asm("{ P0 = cmp.eq(%1,#0); %0 = ct0(%1);}\n"
- "{ if P0 %0 = #0; if !P0 %0 = add(%0,#1);}\n"
+ "{ if (P0) %0 = #0; if (!P0) %0 = add(%0,#1);}\n"
: "=&r" (r)
: "r" (x)
: "p0");
diff --git a/arch/hexagon/include/asm/cmpxchg.h b/arch/hexagon/include/asm/cmpxchg.h
index 6091322c3af9..92b8a02e588a 100644
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -30,7 +30,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
__asm__ __volatile__ (
"1: %0 = memw_locked(%1);\n" /* load into retval */
" memw_locked(%1,P0) = %2;\n" /* store into memory */
- " if !P0 jump 1b;\n"
+ " if (!P0) jump 1b;\n"
: "=&r" (retval)
: "r" (ptr), "r" (x)
: "memory", "p0"
diff --git a/arch/hexagon/include/asm/futex.h b/arch/hexagon/include/asm/futex.h
index cb635216a732..0191f7c7193e 100644
--- a/arch/hexagon/include/asm/futex.h
+++ b/arch/hexagon/include/asm/futex.h
@@ -16,7 +16,7 @@
/* For example: %1 = %4 */ \
insn \
"2: memw_locked(%3,p2) = %1;\n" \
- " if !p2 jump 1b;\n" \
+ " if (!p2) jump 1b;\n" \
" %1 = #0;\n" \
"3:\n" \
".section .fixup,\"ax\"\n" \
@@ -84,10 +84,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval,
"1: %1 = memw_locked(%3)\n"
" {\n"
" p2 = cmp.eq(%1,%4)\n"
- " if !p2.new jump:NT 3f\n"
+ " if (!p2.new) jump:NT 3f\n"
" }\n"
"2: memw_locked(%3,p2) = %5\n"
- " if !p2 jump 1b\n"
+ " if (!p2) jump 1b\n"
"3:\n"
".section .fixup,\"ax\"\n"
"4: %0 = #%6\n"
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index 539e3efcf39c..b0dbc3473172 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -173,6 +173,7 @@ static inline void writel(u32 data, volatile void __iomem *addr)
void __iomem *ioremap(unsigned long phys_addr, unsigned long size);
#define ioremap_nocache ioremap
+#define ioremap_uc(X, Y) ioremap((X), (Y))
#define __raw_writel writel
diff --git a/arch/hexagon/include/asm/spinlock.h b/arch/hexagon/include/asm/spinlock.h
index bfe07d842ff3..ef103b73bec8 100644
--- a/arch/hexagon/include/asm/spinlock.h
+++ b/arch/hexagon/include/asm/spinlock.h
@@ -30,9 +30,9 @@ static inline void arch_read_lock(arch_rwlock_t *lock)
__asm__ __volatile__(
"1: R6 = memw_locked(%0);\n"
" { P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n"
- " { if !P3 jump 1b; }\n"
+ " { if (!P3) jump 1b; }\n"
" memw_locked(%0,P3) = R6;\n"
- " { if !P3 jump 1b; }\n"
+ " { if (!P3) jump 1b; }\n"
:
: "r" (&lock->lock)
: "memory", "r6", "p3"
@@ -46,7 +46,7 @@ static inline void arch_read_unlock(arch_rwlock_t *lock)
"1: R6 = memw_locked(%0);\n"
" R6 = add(R6,#-1);\n"
" memw_locked(%0,P3) = R6\n"
- " if !P3 jump 1b;\n"
+ " if (!P3) jump 1b;\n"
:
: "r" (&lock->lock)
: "memory", "r6", "p3"
@@ -61,7 +61,7 @@ static inline int arch_read_trylock(arch_rwlock_t *lock)
__asm__ __volatile__(
" R6 = memw_locked(%1);\n"
" { %0 = #0; P3 = cmp.ge(R6,#0); R6 = add(R6,#1);}\n"
- " { if !P3 jump 1f; }\n"
+ " { if (!P3) jump 1f; }\n"
" memw_locked(%1,P3) = R6;\n"
" { %0 = P3 }\n"
"1:\n"
@@ -78,9 +78,9 @@ static inline void arch_write_lock(arch_rwlock_t *lock)
__asm__ __volatile__(
"1: R6 = memw_locked(%0)\n"
" { P3 = cmp.eq(R6,#0); R6 = #-1;}\n"
- " { if !P3 jump 1b; }\n"
+ " { if (!P3) jump 1b; }\n"
" memw_locked(%0,P3) = R6;\n"
- " { if !P3 jump 1b; }\n"
+ " { if (!P3) jump 1b; }\n"
:
: "r" (&lock->lock)
: "memory", "r6", "p3"
@@ -94,7 +94,7 @@ static inline int arch_write_trylock(arch_rwlock_t *lock)
__asm__ __volatile__(
" R6 = memw_locked(%1)\n"
" { %0 = #0; P3 = cmp.eq(R6,#0); R6 = #-1;}\n"
- " { if !P3 jump 1f; }\n"
+ " { if (!P3) jump 1f; }\n"
" memw_locked(%1,P3) = R6;\n"
" %0 = P3;\n"
"1:\n"
@@ -117,9 +117,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
__asm__ __volatile__(
"1: R6 = memw_locked(%0);\n"
" P3 = cmp.eq(R6,#0);\n"
- " { if !P3 jump 1b; R6 = #1; }\n"
+ " { if (!P3) jump 1b; R6 = #1; }\n"
" memw_locked(%0,P3) = R6;\n"
- " { if !P3 jump 1b; }\n"
+ " { if (!P3) jump 1b; }\n"
:
: "r" (&lock->lock)
: "memory", "r6", "p3"
@@ -139,7 +139,7 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
__asm__ __volatile__(
" R6 = memw_locked(%1);\n"
" P3 = cmp.eq(R6,#0);\n"
- " { if !P3 jump 1f; R6 = #1; %0 = #0; }\n"
+ " { if (!P3) jump 1f; R6 = #1; %0 = #0; }\n"
" memw_locked(%1,P3) = R6;\n"
" %0 = P3;\n"
"1:\n"
diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c
index 35f29423fda8..5ed02f699479 100644
--- a/arch/hexagon/kernel/stacktrace.c
+++ b/arch/hexagon/kernel/stacktrace.c
@@ -11,8 +11,6 @@
#include <linux/thread_info.h>
#include <linux/module.h>
-register unsigned long current_frame_pointer asm("r30");
-
struct stackframe {
unsigned long fp;
unsigned long rets;
@@ -30,7 +28,7 @@ void save_stack_trace(struct stack_trace *trace)
low = (unsigned long)task_stack_page(current);
high = low + THREAD_SIZE;
- fp = current_frame_pointer;
+ fp = (unsigned long)__builtin_frame_address(0);
while (fp >= low && fp <= (high - sizeof(*frame))) {
frame = (struct stackframe *)fp;
diff --git a/arch/hexagon/kernel/vm_entry.S b/arch/hexagon/kernel/vm_entry.S
index 12242c27e2df..4023fdbea490 100644
--- a/arch/hexagon/kernel/vm_entry.S
+++ b/arch/hexagon/kernel/vm_entry.S
@@ -369,7 +369,7 @@ ret_from_fork:
R26.L = #LO(do_work_pending);
R0 = #VM_INT_DISABLE;
}
- if P0 jump check_work_pending
+ if (P0) jump check_work_pending
{
R0 = R25;
callr R24
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 58fd67068bac..b01d68a2d5d9 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -689,9 +689,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
}
#endif
diff --git a/arch/m68k/emu/nfeth.c b/arch/m68k/emu/nfeth.c
index a4ebd2445eda..d2875e32abfc 100644
--- a/arch/m68k/emu/nfeth.c
+++ b/arch/m68k/emu/nfeth.c
@@ -167,7 +167,7 @@ static int nfeth_xmit(struct sk_buff *skb, struct net_device *dev)
return 0;
}
-static void nfeth_tx_timeout(struct net_device *dev)
+static void nfeth_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
dev->stats.tx_errors++;
netif_wake_queue(dev);
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index add388236f4e..ed8e28b0fb3e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -47,7 +47,7 @@ config MIPS
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES
select HAVE_ASM_MODVERSIONS
- select HAVE_EBPF_JIT if (!CPU_MICROMIPS)
+ select HAVE_EBPF_JIT if 64BIT && !CPU_MICROMIPS && TARGET_ISA_REV >= 2
select HAVE_CONTEXT_TRACKING
select HAVE_COPY_THREAD_TLS
select HAVE_C_RECORDMCOUNT
diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile
index 172801ed35b8..d859f079b771 100644
--- a/arch/mips/boot/compressed/Makefile
+++ b/arch/mips/boot/compressed/Makefile
@@ -29,6 +29,9 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
-DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
-DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
+# Prevents link failures: __sanitizer_cov_trace_pc() is not linked in.
+KCOV_INSTRUMENT := n
+
# decompressor objects (linked with vmlinuz)
vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o
diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi
index 5cfc9d347826..8f5aed760abb 100644
--- a/arch/mips/boot/dts/qca/ar9331.dtsi
+++ b/arch/mips/boot/dts/qca/ar9331.dtsi
@@ -126,6 +126,9 @@
clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
clock-names = "eth", "mdio";
+ phy-mode = "mii";
+ phy-handle = <&phy_port4>;
+
status = "disabled";
};
@@ -133,13 +136,127 @@
compatible = "qca,ar9330-eth";
reg = <0x1a000000 0x200>;
interrupts = <5>;
-
resets = <&rst 13>, <&rst 23>;
reset-names = "mac", "mdio";
clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_AHB>;
clock-names = "eth", "mdio";
+ phy-mode = "gmii";
+
status = "disabled";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch10: switch@10 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ compatible = "qca,ar9331-switch";
+ reg = <0x10>;
+ resets = <&rst 8>;
+ reset-names = "switch";
+
+ interrupt-parent = <&miscintc>;
+ interrupts = <12>;
+
+ interrupt-controller;
+ #interrupt-cells = <1>;
+
+ ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ switch_port0: port@0 {
+ reg = <0x0>;
+ label = "cpu";
+ ethernet = <&eth1>;
+
+ phy-mode = "gmii";
+
+ fixed-link {
+ speed = <1000>;
+ full-duplex;
+ };
+ };
+
+ switch_port1: port@1 {
+ reg = <0x1>;
+ phy-handle = <&phy_port0>;
+ phy-mode = "internal";
+
+ status = "disabled";
+ };
+
+ switch_port2: port@2 {
+ reg = <0x2>;
+ phy-handle = <&phy_port1>;
+ phy-mode = "internal";
+
+ status = "disabled";
+ };
+
+ switch_port3: port@3 {
+ reg = <0x3>;
+ phy-handle = <&phy_port2>;
+ phy-mode = "internal";
+
+ status = "disabled";
+ };
+
+ switch_port4: port@4 {
+ reg = <0x4>;
+ phy-handle = <&phy_port3>;
+ phy-mode = "internal";
+
+ status = "disabled";
+ };
+ };
+
+ mdio {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ interrupt-parent = <&switch10>;
+
+ phy_port0: phy@0 {
+ reg = <0x0>;
+ interrupts = <0>;
+ status = "disabled";
+ };
+
+ phy_port1: phy@1 {
+ reg = <0x1>;
+ interrupts = <0>;
+ status = "disabled";
+ };
+
+ phy_port2: phy@2 {
+ reg = <0x2>;
+ interrupts = <0>;
+ status = "disabled";
+ };
+
+ phy_port3: phy@3 {
+ reg = <0x3>;
+ interrupts = <0>;
+ status = "disabled";
+ };
+
+ phy_port4: phy@4 {
+ reg = <0x4>;
+ interrupts = <0>;
+ status = "disabled";
+ };
+ };
+ };
+ };
};
usb: usb@1b000100 {
diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
index 77bab823eb3b..0f2b20044834 100644
--- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
+++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts
@@ -84,3 +84,16 @@
&eth1 {
status = "okay";
};
+
+&switch_port1 {
+ label = "lan0";
+ status = "okay";
+};
+
+&phy_port0 {
+ status = "okay";
+};
+
+&phy_port4 {
+ status = "okay";
+};
diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
index ba8f82a29a81..e794b2d53adf 100644
--- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
+++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c
@@ -45,13 +45,6 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
/* See header file for descriptions of functions */
/**
- * This macro returns the size of a member of a structure.
- * Logically it is the same as "sizeof(s::field)" in C++, but
- * C lacks the "::" operator.
- */
-#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field)
-
-/**
* This macro returns a member of the
* cvmx_bootmem_named_block_desc_t structure. These members can't
* be directly addressed as they might be in memory not directly
@@ -65,7 +58,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc;
#define CVMX_BOOTMEM_NAMED_GET_FIELD(addr, field) \
__cvmx_bootmem_desc_get(addr, \
offsetof(struct cvmx_bootmem_named_block_desc, field), \
- SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field))
+ sizeof_field(struct cvmx_bootmem_named_block_desc, field))
/**
* This function is the implementation of the get macros defined
diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h
index c46c59b0f1b4..49f0061a6051 100644
--- a/arch/mips/include/asm/cpu-type.h
+++ b/arch/mips/include/asm/cpu-type.h
@@ -15,7 +15,8 @@
static inline int __pure __get_cpu_type(const int cpu_type)
{
switch (cpu_type) {
-#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2EF)
+#if defined(CONFIG_SYS_HAS_CPU_LOONGSON2E) || \
+ defined(CONFIG_SYS_HAS_CPU_LOONGSON2F)
case CPU_LOONGSON2EF:
#endif
diff --git a/arch/mips/include/asm/thread_info.h b/arch/mips/include/asm/thread_info.h
index 4993db40482c..ee26f9a4575d 100644
--- a/arch/mips/include/asm/thread_info.h
+++ b/arch/mips/include/asm/thread_info.h
@@ -49,8 +49,26 @@ struct thread_info {
.addr_limit = KERNEL_DS, \
}
-/* How to get the thread information struct from C. */
+/*
+ * A pointer to the struct thread_info for the currently executing thread is
+ * held in register $28/$gp.
+ *
+ * We declare __current_thread_info as a global register variable rather than a
+ * local register variable within current_thread_info() because clang doesn't
+ * support explicit local register variables.
+ *
+ * When building the VDSO we take care not to declare the global register
+ * variable because this causes GCC to not preserve the value of $28/$gp in
+ * functions that change its value (which is common in the PIC VDSO when
+ * accessing the GOT). Since the VDSO shouldn't be accessing
+ * __current_thread_info anyway we declare it extern in order to cause a link
+ * failure if it's referenced.
+ */
+#ifdef __VDSO__
+extern struct thread_info *__current_thread_info;
+#else
register struct thread_info *__current_thread_info __asm__("$28");
+#endif
static inline struct thread_info *current_thread_info(void)
{
diff --git a/arch/mips/include/asm/vdso/gettimeofday.h b/arch/mips/include/asm/vdso/gettimeofday.h
index b08825531e9f..0ae9b4cbc153 100644
--- a/arch/mips/include/asm/vdso/gettimeofday.h
+++ b/arch/mips/include/asm/vdso/gettimeofday.h
@@ -26,8 +26,6 @@
#define __VDSO_USE_SYSCALL ULLONG_MAX
-#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
-
static __always_inline long gettimeofday_fallback(
struct __kernel_old_timeval *_tv,
struct timezone *_tz)
@@ -48,17 +46,6 @@ static __always_inline long gettimeofday_fallback(
return error ? -ret : ret;
}
-#else
-
-static __always_inline long gettimeofday_fallback(
- struct __kernel_old_timeval *_tv,
- struct timezone *_tz)
-{
- return -1;
-}
-
-#endif
-
static __always_inline long clock_gettime_fallback(
clockid_t _clkid,
struct __kernel_timespec *_ts)
diff --git a/arch/mips/kernel/cacheinfo.c b/arch/mips/kernel/cacheinfo.c
index f777e44653d5..47312c529410 100644
--- a/arch/mips/kernel/cacheinfo.c
+++ b/arch/mips/kernel/cacheinfo.c
@@ -50,6 +50,25 @@ static int __init_cache_level(unsigned int cpu)
return 0;
}
+static void fill_cpumask_siblings(int cpu, cpumask_t *cpu_map)
+{
+ int cpu1;
+
+ for_each_possible_cpu(cpu1)
+ if (cpus_are_siblings(cpu, cpu1))
+ cpumask_set_cpu(cpu1, cpu_map);
+}
+
+static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map)
+{
+ int cpu1;
+ int cluster = cpu_cluster(&cpu_data[cpu]);
+
+ for_each_possible_cpu(cpu1)
+ if (cpu_cluster(&cpu_data[cpu1]) == cluster)
+ cpumask_set_cpu(cpu1, cpu_map);
+}
+
static int __populate_cache_leaves(unsigned int cpu)
{
struct cpuinfo_mips *c = &current_cpu_data;
@@ -57,14 +76,20 @@ static int __populate_cache_leaves(unsigned int cpu)
struct cacheinfo *this_leaf = this_cpu_ci->info_list;
if (c->icache.waysize) {
+ /* L1 caches are per core */
+ fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map);
populate_cache(dcache, this_leaf, 1, CACHE_TYPE_DATA);
+ fill_cpumask_siblings(cpu, &this_leaf->shared_cpu_map);
populate_cache(icache, this_leaf, 1, CACHE_TYPE_INST);
} else {
populate_cache(dcache, this_leaf, 1, CACHE_TYPE_UNIFIED);
}
- if (c->scache.waysize)
+ if (c->scache.waysize) {
+ /* L2 cache is per cluster */
+ fill_cpumask_cluster(cpu, &this_leaf->shared_cpu_map);
populate_cache(scache, this_leaf, 2, CACHE_TYPE_UNIFIED);
+ }
if (c->tcache.waysize)
populate_cache(tcache, this_leaf, 3, CACHE_TYPE_UNIFIED);
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 46b76751f3a5..561154cbcc40 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -604,6 +604,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
{
int off, b_off;
+ int tcc_reg;
ctx->flags |= EBPF_SEEN_TC;
/*
@@ -616,14 +617,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx)
b_off = b_imm(this_idx + 1, ctx);
emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off);
/*
- * if (--TCC < 0)
+ * if (TCC-- < 0)
* goto out;
*/
/* Delay slot */
- emit_instr(ctx, daddiu, MIPS_R_T5,
- (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1);
+ tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4;
+ emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1);
b_off = b_imm(this_idx + 1, ctx);
- emit_instr(ctx, bltz, MIPS_R_T5, b_off);
+ emit_instr(ctx, bltz, tcc_reg, b_off);
/*
* prog = array->ptrs[index];
* if (prog == NULL)
@@ -1803,7 +1804,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
unsigned int image_size;
u8 *image_ptr;
- if (!prog->jit_requested || MIPS_ISA_REV < 2)
+ if (!prog->jit_requested)
return prog;
tmp = bpf_jit_blind_constants(prog);
diff --git a/arch/mips/vdso/vgettimeofday.c b/arch/mips/vdso/vgettimeofday.c
index 6ebdc37c89fc..6b83b6376a4b 100644
--- a/arch/mips/vdso/vgettimeofday.c
+++ b/arch/mips/vdso/vgettimeofday.c
@@ -17,12 +17,22 @@ int __vdso_clock_gettime(clockid_t clock,
return __cvdso_clock_gettime32(clock, ts);
}
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
+
+/*
+ * This is behind the ifdef so that we don't provide the symbol when there's no
+ * possibility of there being a usable clocksource, because there's nothing we
+ * can do without it. When libc fails the symbol lookup it should fall back on
+ * the standard syscall path.
+ */
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
return __cvdso_gettimeofday(tv, tz);
}
+#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */
+
int __vdso_clock_getres(clockid_t clock_id,
struct old_timespec32 *res)
{
@@ -43,12 +53,22 @@ int __vdso_clock_gettime(clockid_t clock,
return __cvdso_clock_gettime(clock, ts);
}
+#ifdef CONFIG_MIPS_CLOCK_VSYSCALL
+
+/*
+ * This is behind the ifdef so that we don't provide the symbol when there's no
+ * possibility of there being a usable clocksource, because there's nothing we
+ * can do without it. When libc fails the symbol lookup it should fall back on
+ * the standard syscall path.
+ */
int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
struct timezone *tz)
{
return __cvdso_gettimeofday(tv, tz);
}
+#endif /* CONFIG_MIPS_CLOCK_VSYSCALL */
+
int __vdso_clock_getres(clockid_t clock_id,
struct __kernel_timespec *res)
{
diff --git a/arch/nios2/mm/ioremap.c b/arch/nios2/mm/ioremap.c
index b56af759dcdf..819bdfcc2e71 100644
--- a/arch/nios2/mm/ioremap.c
+++ b/arch/nios2/mm/ioremap.c
@@ -138,6 +138,14 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
return NULL;
}
+ /*
+ * Map uncached objects in the low part of address space to
+ * CONFIG_NIOS2_IO_REGION_BASE
+ */
+ if (IS_MAPPABLE_UNCACHEABLE(phys_addr) &&
+ IS_MAPPABLE_UNCACHEABLE(last_addr))
+ return (void __iomem *)(CONFIG_NIOS2_IO_REGION_BASE + phys_addr);
+
/* Mappings have to be page-aligned */
offset = phys_addr & ~PAGE_MASK;
phys_addr &= PAGE_MASK;
diff --git a/arch/parisc/include/asm/cmpxchg.h b/arch/parisc/include/asm/cmpxchg.h
index f627c37dad9c..ab5c215cf46c 100644
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -44,8 +44,14 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
** if (((unsigned long)p & 0xf) == 0)
** return __ldcw(p);
*/
-#define xchg(ptr, x) \
- ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
+#define xchg(ptr, x) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __typeof__(*(ptr)) _x_ = (x); \
+ __ret = (__typeof__(*(ptr))) \
+ __xchg((unsigned long)_x_, (ptr), sizeof(*(ptr))); \
+ __ret; \
+})
/* bug catcher for when unsupported size is used - won't link */
extern void __cmpxchg_called_with_bad_pointer(void);
diff --git a/arch/parisc/include/asm/kexec.h b/arch/parisc/include/asm/kexec.h
index a99ea747d7ed..87e174006995 100644
--- a/arch/parisc/include/asm/kexec.h
+++ b/arch/parisc/include/asm/kexec.h
@@ -2,8 +2,6 @@
#ifndef _ASM_PARISC_KEXEC_H
#define _ASM_PARISC_KEXEC_H
-#ifdef CONFIG_KEXEC
-
/* Maximum physical address we can use pages from */
#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
/* Maximum address we can reach in physical address mode */
@@ -32,6 +30,4 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
#endif /* __ASSEMBLY__ */
-#endif /* CONFIG_KEXEC */
-
#endif /* _ASM_PARISC_KEXEC_H */
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 2663c8f8be11..068d90950d93 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -37,5 +37,5 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_KPROBES) += kprobes.o
-obj-$(CONFIG_KEXEC) += kexec.o relocate_kernel.o
+obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o
obj-$(CONFIG_KEXEC_FILE) += kexec_file.o
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 3b330e58a4f0..a6c9f49c6612 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -810,7 +810,7 @@ EXPORT_SYMBOL(device_to_hwpath);
static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high,
struct device *parent);
-static void walk_lower_bus(struct parisc_device *dev)
+static void __init walk_lower_bus(struct parisc_device *dev)
{
unsigned long io_io_low, io_io_high;
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index 36434d4da381..749c4579db0d 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -327,8 +327,7 @@ static int pdt_mainloop(void *unused)
((pde & PDT_ADDR_SINGLE_ERR) == 0))
memory_failure(pde >> PAGE_SHIFT, 0);
else
- soft_offline_page(
- pfn_to_page(pde >> PAGE_SHIFT), 0);
+ soft_offline_page(pde >> PAGE_SHIFT, 0);
#else
pr_crit("PDT: memory error at 0x%lx ignored.\n"
"Rebuild kernel with CONFIG_MEMORY_FAILURE=y "
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index e9a960e28f3c..860228e917dc 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -15,6 +15,7 @@
*
* (the type definitions are in asm/spinlock_types.h)
*/
+#include <linux/jump_label.h>
#include <linux/irqflags.h>
#ifdef CONFIG_PPC64
#include <asm/paca.h>
@@ -36,10 +37,12 @@
#endif
#ifdef CONFIG_PPC_PSERIES
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
#define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
- if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ if (!static_branch_unlikely(&shared_processor))
return false;
return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
}
@@ -110,13 +113,8 @@ static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
static inline bool is_shared_processor(void)
{
-/*
- * LPPACA is only available on Pseries so guard anything LPPACA related to
- * allow other platforms (which include this common header) to compile.
- */
-#ifdef CONFIG_PPC_PSERIES
- return (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
- lppaca_shared_proc(local_paca->lppaca_ptr));
+#ifdef CONFIG_PPC_SPLPAR
+ return static_branch_unlikely(&shared_processor);
#else
return false;
#endif
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 15002b51ff18..c92fe7fe9692 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -401,7 +401,7 @@ copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
return n;
}
-extern unsigned long __clear_user(void __user *addr, unsigned long size);
+unsigned long __arch_clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
{
@@ -409,12 +409,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
might_fault();
if (likely(access_ok(addr, size))) {
allow_write_to_user(addr, size);
- ret = __clear_user(addr, size);
+ ret = __arch_clear_user(addr, size);
prevent_write_to_user(addr, size);
}
return ret;
}
+static inline unsigned long __clear_user(void __user *addr, unsigned long size)
+{
+ return clear_user(addr, size);
+}
+
extern long strncpy_from_user(char *dst, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 5645bc9cbc09..add67498c126 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -619,8 +619,6 @@ void __do_irq(struct pt_regs *regs)
trace_irq_entry(regs);
- check_stack_overflow();
-
/*
* Query the platform PIC for the interrupt & ack it.
*
@@ -652,6 +650,8 @@ void do_IRQ(struct pt_regs *regs)
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
+ check_stack_overflow();
+
/* Already there ? */
if (unlikely(cursp == irqsp || cursp == sirqsp)) {
__do_irq(regs);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index dc53578193ee..6ff3f896d908 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4983,7 +4983,8 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
if (nesting_enabled(kvm))
kvmhv_release_all_nested(kvm);
kvm->arch.process_table = 0;
- uv_svm_terminate(kvm->arch.lpid);
+ if (kvm->arch.secure_guest)
+ uv_svm_terminate(kvm->arch.lpid);
kvmhv_set_ptbl_entry(kvm->arch.lpid, 0, 0);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 0496e66aaa56..c6fbbd29bd87 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1117,7 +1117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r7, VCPU_GPR(R7)(r4)
bne ret_to_ultra
- lwz r0, VCPU_CR(r4)
+ ld r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R0)(r4)
@@ -1137,7 +1137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
* R3 = UV_RETURN
*/
ret_to_ultra:
- lwz r0, VCPU_CR(r4)
+ ld r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R3)(r4)
diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S
index f69a6aab7bfb..1ddb26394e8a 100644
--- a/arch/powerpc/lib/string_32.S
+++ b/arch/powerpc/lib/string_32.S
@@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
-_GLOBAL(__clear_user)
+_GLOBAL(__arch_clear_user)
/*
* Use dcbz on the complete cache lines in the destination
* to set them to zero. This requires that the destination
@@ -87,4 +87,4 @@ _GLOBAL(__clear_user)
EX_TABLE(8b, 91b)
EX_TABLE(9b, 91b)
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S
index 507b18b1660e..169872bc0892 100644
--- a/arch/powerpc/lib/string_64.S
+++ b/arch/powerpc/lib/string_64.S
@@ -17,7 +17,7 @@ PPC64_CACHES:
.section ".text"
/**
- * __clear_user: - Zero a block of memory in user space, with less checking.
+ * __arch_clear_user: - Zero a block of memory in user space, with less checking.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
@@ -58,7 +58,7 @@ err3; stb r0,0(r3)
mr r3,r4
blr
-_GLOBAL_TOC(__clear_user)
+_GLOBAL_TOC(__arch_clear_user)
cmpdi r4,32
neg r6,r3
li r0,0
@@ -181,4 +181,4 @@ err1; dcbz 0,r3
cmpdi r4,32
blt .Lshort_clear
b .Lmedium_clear
-EXPORT_SYMBOL(__clear_user)
+EXPORT_SYMBOL(__arch_clear_user)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 9488b63dfc87..f5535eae637f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -151,10 +151,9 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
int ret;
- __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
/* Remove htab bolted mappings for this section of memory */
start = (unsigned long)__va(start);
@@ -289,6 +288,14 @@ void __init mem_init(void)
BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
#ifdef CONFIG_SWIOTLB
+ /*
+ * Some platforms (e.g. 85xx) limit DMA-able memory way below
+ * 4G. We force memblock to bottom-up mode to ensure that the
+ * memory allocated in swiotlb_init() is DMA-able.
+ * As it's the last memblock allocation, no need to reset it
+ * back to to-down.
+ */
+ memblock_set_bottom_up(true);
swiotlb_init(0);
#endif
diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c
index 090af2d2d3e4..96eb8e43f39b 100644
--- a/arch/powerpc/mm/nohash/8xx.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -103,7 +103,7 @@ static void mmu_patch_addis(s32 *site, long simm)
patch_instruction_site(site, instr);
}
-void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
+static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
{
unsigned long s = offset;
unsigned long v = PAGE_OFFSET + s;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index 42bbcd47cc85..dffe1a45b6ed 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -50,7 +50,7 @@ static void slice_print_mask(const char *label, const struct slice_mask *mask) {
#endif
-static inline bool slice_addr_is_low(unsigned long addr)
+static inline notrace bool slice_addr_is_low(unsigned long addr)
{
u64 tmp = (u64)addr;
@@ -659,7 +659,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
mm_ctx_user_psize(&current->mm->context), 1);
}
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
{
unsigned char *psizes;
int index, mask_index;
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index 6e5a2a4faeab..4ec2a9f14f84 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -97,12 +97,12 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh);
#ifdef CONFIG_SMP
#ifdef CONFIG_PPC64
#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2); \
+ do { BUILD_BUG_ON(sizeof_field(struct paca_struct, paca_index) != 2); \
PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index)); \
} while (0)
#else
#define PPC_BPF_LOAD_CPU(r) \
- do { BUILD_BUG_ON(FIELD_SIZEOF(struct task_struct, cpu) != 4); \
+ do { BUILD_BUG_ON(sizeof_field(struct task_struct, cpu) != 4); \
PPC_LHZ_OFFS(r, 2, offsetof(struct task_struct, cpu)); \
} while(0)
#endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index d57b46e0dd60..0acc9d5fb19e 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -321,7 +321,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
break;
case BPF_LD | BPF_W | BPF_LEN: /* A = skb->len; */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, len) != 4);
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
break;
case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
@@ -333,16 +333,16 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
/*** Ancillary info loads ***/
case BPF_ANC | SKF_AD_PROTOCOL: /* A = ntohs(skb->protocol); */
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+ BUILD_BUG_ON(sizeof_field(struct sk_buff,
protocol) != 2);
PPC_NTOHS_OFFS(r_A, r_skb, offsetof(struct sk_buff,
protocol));
break;
case BPF_ANC | SKF_AD_IFINDEX:
case BPF_ANC | SKF_AD_HATYPE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+ BUILD_BUG_ON(sizeof_field(struct net_device,
ifindex) != 4);
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device,
+ BUILD_BUG_ON(sizeof_field(struct net_device,
type) != 2);
PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
dev));
@@ -365,17 +365,17 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
break;
case BPF_ANC | SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
mark));
break;
case BPF_ANC | SKF_AD_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
hash));
break;
case BPF_ANC | SKF_AD_VLAN_TAG:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
vlan_tci));
@@ -388,7 +388,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
PPC_ANDI(r_A, r_A, 1);
break;
case BPF_ANC | SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
+ BUILD_BUG_ON(sizeof_field(struct sk_buff,
queue_mapping) != 2);
PPC_LHZ_OFFS(r_A, r_skb, offsetof(struct sk_buff,
queue_mapping));
diff --git a/arch/powerpc/platforms/pseries/cmm.c b/arch/powerpc/platforms/pseries/cmm.c
index 91571841df8a..9dba7e880885 100644
--- a/arch/powerpc/platforms/pseries/cmm.c
+++ b/arch/powerpc/platforms/pseries/cmm.c
@@ -539,6 +539,16 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
/* balloon page list reference */
get_page(newpage);
+ /*
+ * When we migrate a page to a different zone, we have to fixup the
+ * count of both involved zones as we adjusted the managed page count
+ * when inflating.
+ */
+ if (page_zone(page) != page_zone(newpage)) {
+ adjust_managed_page_count(page, 1);
+ adjust_managed_page_count(newpage, -1);
+ }
+
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
balloon_page_insert(b_dev_info, newpage);
balloon_page_delete(page);
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 0a40201f315f..0c8421dd01ab 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -74,6 +74,9 @@
#include "pseries.h"
#include "../../../../drivers/pci/pci.h"
+DEFINE_STATIC_KEY_FALSE(shared_processor);
+EXPORT_SYMBOL_GPL(shared_processor);
+
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@@ -758,6 +761,10 @@ static void __init pSeries_setup_arch(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
vpa_init(boot_cpuid);
+
+ if (lppaca_shared_proc(get_lppaca()))
+ static_branch_enable(&shared_processor);
+
ppc_md.power_save = pseries_lpar_idle;
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
#ifdef CONFIG_PCI_IOV
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 759ffb00267c..a31169b02ec0 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -64,6 +64,7 @@ config RISCV
select SPARSEMEM_STATIC if 32BIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select HAVE_ARCH_MMAP_RND_BITS if MMU
+ select ARCH_HAS_GCOV_PROFILE_ALL
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
@@ -154,7 +155,7 @@ config GENERIC_HWEIGHT
def_bool y
config FIX_EARLYCON_MEM
- def_bool CONFIG_MMU
+ def_bool MMU
config PGTABLE_LEVELS
int
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 634759ac8c71..d325b67d00df 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -2,8 +2,8 @@ menu "SoC selection"
config SOC_SIFIVE
bool "SiFive SoCs"
- select SERIAL_SIFIVE
- select SERIAL_SIFIVE_CONSOLE
+ select SERIAL_SIFIVE if TTY
+ select SERIAL_SIFIVE_CONSOLE if TTY
select CLK_SIFIVE
select CLK_SIFIVE_FU540_PRCI
select SIFIVE_PLIC
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index a474f98ce4fa..36db8145f9f4 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -24,7 +24,7 @@ $(obj)/Image: vmlinux FORCE
$(obj)/Image.gz: $(obj)/Image FORCE
$(call if_changed,gzip)
-loader.o: $(src)/loader.S $(obj)/Image
+$(obj)/loader.o: $(src)/loader.S $(obj)/Image
$(obj)/loader: $(obj)/loader.o $(obj)/Image $(obj)/loader.lds FORCE
$(Q)$(LD) -T $(obj)/loader.lds -o $@ $(obj)/loader.o
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index 70a1891e7cd0..a2e3d54e830c 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -54,6 +54,7 @@
reg = <1>;
riscv,isa = "rv64imafdc";
tlb-split;
+ next-level-cache = <&l2cache>;
cpu1_intc: interrupt-controller {
#interrupt-cells = <1>;
compatible = "riscv,cpu-intc";
@@ -77,6 +78,7 @@
reg = <2>;
riscv,isa = "rv64imafdc";
tlb-split;
+ next-level-cache = <&l2cache>;
cpu2_intc: interrupt-controller {
#interrupt-cells = <1>;
compatible = "riscv,cpu-intc";
@@ -100,6 +102,7 @@
reg = <3>;
riscv,isa = "rv64imafdc";
tlb-split;
+ next-level-cache = <&l2cache>;
cpu3_intc: interrupt-controller {
#interrupt-cells = <1>;
compatible = "riscv,cpu-intc";
@@ -123,6 +126,7 @@
reg = <4>;
riscv,isa = "rv64imafdc";
tlb-split;
+ next-level-cache = <&l2cache>;
cpu4_intc: interrupt-controller {
#interrupt-cells = <1>;
compatible = "riscv,cpu-intc";
@@ -253,6 +257,17 @@
#pwm-cells = <3>;
status = "disabled";
};
+ l2cache: cache-controller@2010000 {
+ compatible = "sifive,fu540-c000-ccache", "cache";
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-sets = <1024>;
+ cache-size = <2097152>;
+ cache-unified;
+ interrupt-parent = <&plic0>;
+ interrupts = <1 2 3>;
+ reg = <0x0 0x2010000 0x0 0x1000>;
+ };
};
};
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 0a62d2d68455..435b65532e29 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -116,9 +116,9 @@
# define SR_PIE SR_MPIE
# define SR_PP SR_MPP
-# define IRQ_SOFT IRQ_M_SOFT
-# define IRQ_TIMER IRQ_M_TIMER
-# define IRQ_EXT IRQ_M_EXT
+# define RV_IRQ_SOFT IRQ_M_SOFT
+# define RV_IRQ_TIMER IRQ_M_TIMER
+# define RV_IRQ_EXT IRQ_M_EXT
#else /* CONFIG_RISCV_M_MODE */
# define CSR_STATUS CSR_SSTATUS
# define CSR_IE CSR_SIE
@@ -133,15 +133,15 @@
# define SR_PIE SR_SPIE
# define SR_PP SR_SPP
-# define IRQ_SOFT IRQ_S_SOFT
-# define IRQ_TIMER IRQ_S_TIMER
-# define IRQ_EXT IRQ_S_EXT
+# define RV_IRQ_SOFT IRQ_S_SOFT
+# define RV_IRQ_TIMER IRQ_S_TIMER
+# define RV_IRQ_EXT IRQ_S_EXT
#endif /* CONFIG_RISCV_M_MODE */
/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
-#define IE_SIE (_AC(0x1, UL) << IRQ_SOFT)
-#define IE_TIE (_AC(0x1, UL) << IRQ_TIMER)
-#define IE_EIE (_AC(0x1, UL) << IRQ_EXT)
+#define IE_SIE (_AC(0x1, UL) << RV_IRQ_SOFT)
+#define IE_TIE (_AC(0x1, UL) << RV_IRQ_TIMER)
+#define IE_EIE (_AC(0x1, UL) << RV_IRQ_EXT)
#ifndef __ASSEMBLY__
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index aefbfaa6a781..0234048b12bc 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -82,4 +82,8 @@ struct riscv_pmu {
int irq;
};
+#ifdef CONFIG_PERF_EVENTS
+#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
+#endif
+
#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7ff0ed4f292e..f66b87314fa2 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -90,6 +90,31 @@ extern pgd_t swapper_pg_dir[];
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END (PAGE_OFFSET - 1)
+#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
+
+#define BPF_JIT_REGION_SIZE (SZ_128M)
+#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
+#define BPF_JIT_REGION_END (VMALLOC_END)
+
+/*
+ * Roughly size the vmemmap space to be large enough to fit enough
+ * struct pages to map half the virtual address space. Then
+ * position vmemmap directly below the VMALLOC region.
+ */
+#define VMEMMAP_SHIFT \
+ (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
+#define VMEMMAP_END (VMALLOC_START - 1)
+#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
+
+/*
+ * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel
+ * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled.
+ */
+#define vmemmap ((struct page *)VMEMMAP_START)
+
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
@@ -400,23 +425,6 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (PAGE_OFFSET - 1)
-#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-
-/*
- * Roughly size the vmemmap space to be large enough to fit enough
- * struct pages to map half the virtual address space. Then
- * position vmemmap directly below the VMALLOC region.
- */
-#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
-#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
-#define VMEMMAP_END (VMALLOC_START - 1)
-#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
-
-#define vmemmap ((struct page *)VMEMMAP_START)
-
#define PCI_IO_SIZE SZ_16M
#define PCI_IO_END VMEMMAP_START
#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
diff --git a/arch/riscv/include/uapi/asm/bpf_perf_event.h b/arch/riscv/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000000..6cb1c2823288
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_regs_struct bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index a1349ca64669..e163b7b64c86 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -246,6 +246,7 @@ check_syscall_nr:
*/
li t1, -1
beq a7, t1, ret_from_syscall_rejected
+ blt a7, t1, 1f
/* Call syscall */
la s0, sys_call_table
slli t0, a7, RISCV_LGPTR
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index b94d8db5ddcc..c40fdcdeb950 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -142,7 +142,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*/
old = *parent;
- if (function_graph_enter(old, self_addr, frame_pointer, parent))
+ if (!function_graph_enter(old, self_addr, frame_pointer, parent))
*parent = return_hooker;
}
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 84a6f0a4b120..797802c73dee 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -246,7 +246,7 @@ ENTRY(reset_regs)
li t4, 0
li t5, 0
li t6, 0
- csrw sscratch, 0
+ csrw CSR_SCRATCH, 0
#ifdef CONFIG_FPU
csrr t0, CSR_MISA
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index 3f07a91d5afb..345c4f2eba13 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -23,11 +23,11 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
irq_enter();
switch (regs->cause & ~CAUSE_IRQ_FLAG) {
- case IRQ_TIMER:
+ case RV_IRQ_TIMER:
riscv_timer_interrupt();
break;
#ifdef CONFIG_SMP
- case IRQ_SOFT:
+ case RV_IRQ_SOFT:
/*
* We only use software interrupts to pass IPIs, so if a non-SMP
* system gets one, then we don't know what to do.
@@ -35,7 +35,7 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
riscv_software_interrupt();
break;
#endif
- case IRQ_EXT:
+ case RV_IRQ_EXT:
handle_arch_irq(regs);
break;
default:
diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
index 4800cf703186..2a02b7eebee0 100644
--- a/arch/riscv/kernel/riscv_ksyms.c
+++ b/arch/riscv/kernel/riscv_ksyms.c
@@ -9,8 +9,5 @@
/*
* Assembly functions that may be used (directly or indirectly) by modules
*/
-EXPORT_SYMBOL(__clear_user);
-EXPORT_SYMBOL(__asm_copy_to_user);
-EXPORT_SYMBOL(__asm_copy_from_user);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(memcpy);
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index fecd65657a6f..f29d2ba2c0a6 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -1,4 +1,5 @@
#include <linux/linkage.h>
+#include <asm-generic/export.h>
#include <asm/asm.h>
#include <asm/csr.h>
@@ -66,6 +67,8 @@ ENTRY(__asm_copy_from_user)
j 3b
ENDPROC(__asm_copy_to_user)
ENDPROC(__asm_copy_from_user)
+EXPORT_SYMBOL(__asm_copy_to_user)
+EXPORT_SYMBOL(__asm_copy_from_user)
ENTRY(__clear_user)
@@ -108,6 +111,7 @@ ENTRY(__clear_user)
bltu a0, a3, 5b
j 3b
ENDPROC(__clear_user)
+EXPORT_SYMBOL(__clear_user)
.section .fixup,"ax"
.balign 4
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 3c8b33258457..a1bd95c8047a 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -10,7 +10,6 @@ obj-y += extable.o
obj-$(CONFIG_MMU) += fault.o
obj-y += cacheflush.o
obj-y += context.o
-obj-y += sifive_l2_cache.o
ifeq ($(CONFIG_MMU),y)
obj-$(CONFIG_SMP) += tlbflush.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 8f1900686640..8930ab7278e6 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -22,6 +22,7 @@ void flush_icache_all(void)
else
on_each_cpu(ipi_remote_fence_i, NULL, 1);
}
+EXPORT_SYMBOL(flush_icache_all);
/*
* Performs an icache flush for the given MM context. RISC-V has no direct
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 69f6678db7f3..965a8cf4829c 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -99,13 +99,13 @@ static void __init setup_initrd(void)
pr_info("initrd not found or empty");
goto disable;
}
- if (__pa(initrd_end) > PFN_PHYS(max_low_pfn)) {
+ if (__pa_symbol(initrd_end) > PFN_PHYS(max_low_pfn)) {
pr_err("initrd extends beyond end of memory");
goto disable;
}
size = initrd_end - initrd_start;
- memblock_reserve(__pa(initrd_start), size);
+ memblock_reserve(__pa_symbol(initrd_start), size);
initrd_below_start_ok = 1;
pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
@@ -124,8 +124,8 @@ void __init setup_bootmem(void)
{
struct memblock_region *reg;
phys_addr_t mem_size = 0;
- phys_addr_t vmlinux_end = __pa(&_end);
- phys_addr_t vmlinux_start = __pa(&_start);
+ phys_addr_t vmlinux_end = __pa_symbol(&_end);
+ phys_addr_t vmlinux_start = __pa_symbol(&_start);
/* Find the memory region containing the kernel */
for_each_memblock(memory, reg) {
@@ -445,7 +445,7 @@ static void __init setup_vm_final(void)
/* Setup swapper PGD for fixmap */
create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
- __pa(fixmap_pgd_next),
+ __pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
/* Map all memory banks */
@@ -474,7 +474,7 @@ static void __init setup_vm_final(void)
clear_fixmap(FIX_PMD);
/* Move to swapper page table */
- csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
+ csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
}
#else
diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c
index 5451ef3845f2..483f4ad7f4dc 100644
--- a/arch/riscv/net/bpf_jit_comp.c
+++ b/arch/riscv/net/bpf_jit_comp.c
@@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx)
return false;
}
+static void mark_fp(struct rv_jit_context *ctx)
+{
+ __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags);
+}
+
static void mark_call(struct rv_jit_context *ctx)
{
__set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
@@ -456,6 +461,11 @@ static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl)
return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f);
}
+static u32 rv_auipc(u8 rd, u32 imm31_12)
+{
+ return rv_u_insn(imm31_12, rd, 0x17);
+}
+
static bool is_12b_int(s64 val)
{
return -(1 << 11) <= val && val < (1 << 11);
@@ -479,27 +489,7 @@ static bool is_32b_int(s64 val)
static int is_12b_check(int off, int insn)
{
if (!is_12b_int(off)) {
- pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
- insn, (int)off);
- return -1;
- }
- return 0;
-}
-
-static int is_13b_check(int off, int insn)
-{
- if (!is_13b_int(off)) {
- pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
- insn, (int)off);
- return -1;
- }
- return 0;
-}
-
-static int is_21b_check(int off, int insn)
-{
- if (!is_21b_int(off)) {
- pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
+ pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n",
insn, (int)off);
return -1;
}
@@ -545,10 +535,13 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
emit(rv_addi(rd, rd, lower), ctx);
}
-static int rv_offset(int bpf_to, int bpf_from, struct rv_jit_context *ctx)
+static int rv_offset(int insn, int off, struct rv_jit_context *ctx)
{
- int from = ctx->offset[bpf_from] - 1, to = ctx->offset[bpf_to];
+ int from, to;
+ off++; /* BPF branch is from PC+1, RV is from PC */
+ from = (insn > 0) ? ctx->offset[insn - 1] : 0;
+ to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0;
return (to - from) << 2;
}
@@ -559,7 +552,7 @@ static int epilogue_offset(struct rv_jit_context *ctx)
return (to - from) << 2;
}
-static void __build_epilogue(u8 reg, struct rv_jit_context *ctx)
+static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
{
int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
@@ -596,8 +589,114 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx)
emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
/* Set return value. */
- emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
- emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx);
+ if (!is_tail_call)
+ emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx);
+ emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
+ is_tail_call ? 4 : 0), /* skip TCC init */
+ ctx);
+}
+
+/* return -1 or inverted cond */
+static int invert_bpf_cond(u8 cond)
+{
+ switch (cond) {
+ case BPF_JEQ:
+ return BPF_JNE;
+ case BPF_JGT:
+ return BPF_JLE;
+ case BPF_JLT:
+ return BPF_JGE;
+ case BPF_JGE:
+ return BPF_JLT;
+ case BPF_JLE:
+ return BPF_JGT;
+ case BPF_JNE:
+ return BPF_JEQ;
+ case BPF_JSGT:
+ return BPF_JSLE;
+ case BPF_JSLT:
+ return BPF_JSGE;
+ case BPF_JSGE:
+ return BPF_JSLT;
+ case BPF_JSLE:
+ return BPF_JSGT;
+ }
+ return -1;
+}
+
+static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
+ struct rv_jit_context *ctx)
+{
+ switch (cond) {
+ case BPF_JEQ:
+ emit(rv_beq(rd, rs, rvoff >> 1), ctx);
+ return;
+ case BPF_JGT:
+ emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
+ return;
+ case BPF_JLT:
+ emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
+ return;
+ case BPF_JGE:
+ emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
+ return;
+ case BPF_JLE:
+ emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
+ return;
+ case BPF_JNE:
+ emit(rv_bne(rd, rs, rvoff >> 1), ctx);
+ return;
+ case BPF_JSGT:
+ emit(rv_blt(rs, rd, rvoff >> 1), ctx);
+ return;
+ case BPF_JSLT:
+ emit(rv_blt(rd, rs, rvoff >> 1), ctx);
+ return;
+ case BPF_JSGE:
+ emit(rv_bge(rd, rs, rvoff >> 1), ctx);
+ return;
+ case BPF_JSLE:
+ emit(rv_bge(rs, rd, rvoff >> 1), ctx);
+ }
+}
+
+static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
+ struct rv_jit_context *ctx)
+{
+ s64 upper, lower;
+
+ if (is_13b_int(rvoff)) {
+ emit_bcc(cond, rd, rs, rvoff, ctx);
+ return;
+ }
+
+ /* Adjust for jal */
+ rvoff -= 4;
+
+ /* Transform, e.g.:
+ * bne rd,rs,foo
+ * to
+ * beq rd,rs,<.L1>
+ * (auipc foo)
+ * jal(r) foo
+ * .L1
+ */
+ cond = invert_bpf_cond(cond);
+ if (is_21b_int(rvoff)) {
+ emit_bcc(cond, rd, rs, 8, ctx);
+ emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
+ return;
+ }
+
+ /* 32b No need for an additional rvoff adjustment, since we
+ * get that from the auipc at PC', where PC = PC' + 4.
+ */
+ upper = (rvoff + (1 << 11)) >> 12;
+ lower = rvoff & 0xfff;
+
+ emit_bcc(cond, rd, rs, 12, ctx);
+ emit(rv_auipc(RV_REG_T1, upper), ctx);
+ emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx);
}
static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
@@ -627,18 +726,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
return -1;
emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
- if (is_13b_check(off, insn))
- return -1;
- emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx);
+ emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
- /* if (--TCC < 0)
+ /* if (TCC-- < 0)
* goto out;
*/
emit(rv_addi(RV_REG_T1, tcc, -1), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
- if (is_13b_check(off, insn))
- return -1;
- emit(rv_blt(RV_REG_T1, RV_REG_ZERO, off >> 1), ctx);
+ emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
/* prog = array->ptrs[index];
* if (!prog)
@@ -651,18 +746,15 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
return -1;
emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx);
off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
- if (is_13b_check(off, insn))
- return -1;
- emit(rv_beq(RV_REG_T2, RV_REG_ZERO, off >> 1), ctx);
+ emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
/* goto *(prog->bpf_func + 4); */
off = offsetof(struct bpf_prog, bpf_func);
if (is_12b_check(off, insn))
return -1;
emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx);
- emit(rv_addi(RV_REG_T3, RV_REG_T3, 4), ctx);
emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx);
- __build_epilogue(RV_REG_T3, ctx);
+ __build_epilogue(true, ctx);
return 0;
}
@@ -687,13 +779,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
*rs = bpf_to_rv_reg(insn->src_reg, ctx);
}
-static int rv_offset_check(int *rvoff, s16 off, int insn,
- struct rv_jit_context *ctx)
-{
- *rvoff = rv_offset(insn + off, insn, ctx);
- return is_13b_check(*rvoff, insn);
-}
-
static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
{
emit(rv_addi(RV_REG_T2, *rd, 0), ctx);
@@ -726,13 +811,57 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
*rd = RV_REG_T2;
}
+static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr,
+ struct rv_jit_context *ctx)
+{
+ s64 upper, lower;
+
+ if (rvoff && is_21b_int(rvoff) && !force_jalr) {
+ emit(rv_jal(rd, rvoff >> 1), ctx);
+ return;
+ }
+
+ upper = (rvoff + (1 << 11)) >> 12;
+ lower = rvoff & 0xfff;
+ emit(rv_auipc(RV_REG_T1, upper), ctx);
+ emit(rv_jalr(rd, RV_REG_T1, lower), ctx);
+}
+
+static bool is_signed_bpf_cond(u8 cond)
+{
+ return cond == BPF_JSGT || cond == BPF_JSLT ||
+ cond == BPF_JSGE || cond == BPF_JSLE;
+}
+
+static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
+{
+ s64 off = 0;
+ u64 ip;
+ u8 rd;
+
+ if (addr && ctx->insns) {
+ ip = (u64)(long)(ctx->insns + ctx->ninsns);
+ off = addr - ip;
+ if (!is_32b_int(off)) {
+ pr_err("bpf-jit: target call addr %pK is out of range\n",
+ (void *)addr);
+ return -ERANGE;
+ }
+ }
+
+ emit_jump_and_link(RV_REG_RA, off, !fixed, ctx);
+ rd = bpf_to_rv_reg(BPF_REG_0, ctx);
+ emit(rv_addi(rd, RV_REG_A0, 0), ctx);
+ return 0;
+}
+
static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
bool extra_pass)
{
bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
BPF_CLASS(insn->code) == BPF_JMP;
+ int s, e, rvoff, i = insn - ctx->prog->insnsi;
struct bpf_prog_aux *aux = ctx->prog->aux;
- int rvoff, i = insn - ctx->prog->insnsi;
u8 rd = -1, rs = -1, code = insn->code;
s16 off = insn->off;
s32 imm = insn->imm;
@@ -1000,214 +1129,110 @@ out_be:
/* JUMP off */
case BPF_JMP | BPF_JA:
- rvoff = rv_offset(i + off, i, ctx);
- if (!is_21b_int(rvoff)) {
- pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n",
- i, rvoff);
- return -1;
- }
-
- emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
+ rvoff = rv_offset(i, off, ctx);
+ emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
break;
/* IF (dst COND src) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_X:
case BPF_JMP32 | BPF_JEQ | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_zext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_beq(rd, rs, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JGT | BPF_X:
case BPF_JMP32 | BPF_JGT | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_zext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP32 | BPF_JLT | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_zext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JGE | BPF_X:
case BPF_JMP32 | BPF_JGE | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_zext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JLE | BPF_X:
case BPF_JMP32 | BPF_JLE | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_zext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JNE | BPF_X:
case BPF_JMP32 | BPF_JNE | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_zext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_bne(rd, rs, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSGT | BPF_X:
case BPF_JMP32 | BPF_JSGT | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_sext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_blt(rs, rd, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSLT | BPF_X:
case BPF_JMP32 | BPF_JSLT | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_sext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_blt(rd, rs, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JSGE | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_sext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_bge(rd, rs, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSLE | BPF_X:
case BPF_JMP32 | BPF_JSLE | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_sext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_bge(rs, rd, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- if (!is64)
- emit_zext_32_rd_rs(&rd, &rs, ctx);
- emit(rv_and(RV_REG_T1, rd, rs), ctx);
- emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx);
+ rvoff = rv_offset(i, off, ctx);
+ if (!is64) {
+ s = ctx->ninsns;
+ if (is_signed_bpf_cond(BPF_OP(code)))
+ emit_sext_32_rd_rs(&rd, &rs, ctx);
+ else
+ emit_zext_32_rd_rs(&rd, &rs, ctx);
+ e = ctx->ninsns;
+
+ /* Adjust for extra insns */
+ rvoff -= (e - s) << 2;
+ }
+
+ if (BPF_OP(code) == BPF_JSET) {
+ /* Adjust for and */
+ rvoff -= 4;
+ emit(rv_and(RV_REG_T1, rd, rs), ctx);
+ emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
+ ctx);
+ } else {
+ emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
+ }
break;
/* IF (dst COND imm) JUMP off */
case BPF_JMP | BPF_JEQ | BPF_K:
case BPF_JMP32 | BPF_JEQ | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_zext_32_rd_t1(&rd, ctx);
- emit(rv_beq(rd, RV_REG_T1, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JGT | BPF_K:
case BPF_JMP32 | BPF_JGT | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_zext_32_rd_t1(&rd, ctx);
- emit(rv_bltu(RV_REG_T1, rd, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP32 | BPF_JLT | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_zext_32_rd_t1(&rd, ctx);
- emit(rv_bltu(rd, RV_REG_T1, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP32 | BPF_JGE | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_zext_32_rd_t1(&rd, ctx);
- emit(rv_bgeu(rd, RV_REG_T1, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JLE | BPF_K:
case BPF_JMP32 | BPF_JLE | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_zext_32_rd_t1(&rd, ctx);
- emit(rv_bgeu(RV_REG_T1, rd, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JNE | BPF_K:
case BPF_JMP32 | BPF_JNE | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_zext_32_rd_t1(&rd, ctx);
- emit(rv_bne(rd, RV_REG_T1, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSGT | BPF_K:
case BPF_JMP32 | BPF_JSGT | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_sext_32_rd(&rd, ctx);
- emit(rv_blt(RV_REG_T1, rd, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSLT | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_sext_32_rd(&rd, ctx);
- emit(rv_blt(rd, RV_REG_T1, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_sext_32_rd(&rd, ctx);
- emit(rv_bge(rd, RV_REG_T1, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSLE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
- emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_sext_32_rd(&rd, ctx);
- emit(rv_bge(RV_REG_T1, rd, rvoff >> 1), ctx);
- break;
case BPF_JMP | BPF_JSET | BPF_K:
case BPF_JMP32 | BPF_JSET | BPF_K:
- if (rv_offset_check(&rvoff, off, i, ctx))
- return -1;
+ rvoff = rv_offset(i, off, ctx);
+ s = ctx->ninsns;
emit_imm(RV_REG_T1, imm, ctx);
- if (!is64)
- emit_zext_32_rd_t1(&rd, ctx);
- emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
- emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx);
+ if (!is64) {
+ if (is_signed_bpf_cond(BPF_OP(code)))
+ emit_sext_32_rd(&rd, ctx);
+ else
+ emit_zext_32_rd_t1(&rd, ctx);
+ }
+ e = ctx->ninsns;
+
+ /* Adjust for extra insns */
+ rvoff -= (e - s) << 2;
+
+ if (BPF_OP(code) == BPF_JSET) {
+ /* Adjust for and */
+ rvoff -= 4;
+ emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
+ emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
+ ctx);
+ } else {
+ emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx);
+ }
break;
/* function call */
case BPF_JMP | BPF_CALL:
{
bool fixed;
- int i, ret;
+ int ret;
u64 addr;
mark_call(ctx);
@@ -1215,20 +1240,9 @@ out_be:
&fixed);
if (ret < 0)
return ret;
- if (fixed) {
- emit_imm(RV_REG_T1, addr, ctx);
- } else {
- i = ctx->ninsns;
- emit_imm(RV_REG_T1, addr, ctx);
- for (i = ctx->ninsns - i; i < 8; i++) {
- /* nop */
- emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0),
- ctx);
- }
- }
- emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx);
- rd = bpf_to_rv_reg(BPF_REG_0, ctx);
- emit(rv_addi(rd, RV_REG_A0, 0), ctx);
+ ret = emit_call(fixed, addr, ctx);
+ if (ret)
+ return ret;
break;
}
/* tail call */
@@ -1243,9 +1257,7 @@ out_be:
break;
rvoff = epilogue_offset(ctx);
- if (is_21b_check(rvoff, i))
- return -1;
- emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
+ emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
break;
/* dst = imm64 */
@@ -1426,6 +1438,10 @@ static void build_prologue(struct rv_jit_context *ctx)
{
int stack_adjust = 0, store_offset, bpf_stack_adjust;
+ bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
+ if (bpf_stack_adjust)
+ mark_fp(ctx);
+
if (seen_reg(RV_REG_RA, ctx))
stack_adjust += 8;
stack_adjust += 8; /* RV_REG_FP */
@@ -1443,7 +1459,6 @@ static void build_prologue(struct rv_jit_context *ctx)
stack_adjust += 8;
stack_adjust = round_up(stack_adjust, 16);
- bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
stack_adjust += bpf_stack_adjust;
store_offset = stack_adjust - 8;
@@ -1502,10 +1517,10 @@ static void build_prologue(struct rv_jit_context *ctx)
static void build_epilogue(struct rv_jit_context *ctx)
{
- __build_epilogue(RV_REG_RA, ctx);
+ __build_epilogue(false, ctx);
}
-static int build_body(struct rv_jit_context *ctx, bool extra_pass)
+static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset)
{
const struct bpf_prog *prog = ctx->prog;
int i;
@@ -1517,12 +1532,12 @@ static int build_body(struct rv_jit_context *ctx, bool extra_pass)
ret = emit_insn(insn, ctx, extra_pass);
if (ret > 0) {
i++;
- if (ctx->insns == NULL)
- ctx->offset[i] = ctx->ninsns;
+ if (offset)
+ offset[i] = ctx->ninsns;
continue;
}
- if (ctx->insns == NULL)
- ctx->offset[i] = ctx->ninsns;
+ if (offset)
+ offset[i] = ctx->ninsns;
if (ret)
return ret;
}
@@ -1548,9 +1563,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
{
bool tmp_blinded = false, extra_pass = false;
struct bpf_prog *tmp, *orig_prog = prog;
+ int pass = 0, prev_ninsns = 0, i;
struct rv_jit_data *jit_data;
+ unsigned int image_size = 0;
struct rv_jit_context *ctx;
- unsigned int image_size;
if (!prog->jit_requested)
return orig_prog;
@@ -1587,33 +1603,59 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
prog = orig_prog;
goto out_offset;
}
+ for (i = 0; i < prog->len; i++) {
+ prev_ninsns += 32;
+ ctx->offset[i] = prev_ninsns;
+ }
- /* First pass generates the ctx->offset, but does not emit an image. */
- if (build_body(ctx, extra_pass)) {
- prog = orig_prog;
- goto out_offset;
+ for (i = 0; i < 16; i++) {
+ pass++;
+ ctx->ninsns = 0;
+ if (build_body(ctx, extra_pass, ctx->offset)) {
+ prog = orig_prog;
+ goto out_offset;
+ }
+ build_prologue(ctx);
+ ctx->epilogue_offset = ctx->ninsns;
+ build_epilogue(ctx);
+
+ if (ctx->ninsns == prev_ninsns) {
+ if (jit_data->header)
+ break;
+
+ image_size = sizeof(u32) * ctx->ninsns;
+ jit_data->header =
+ bpf_jit_binary_alloc(image_size,
+ &jit_data->image,
+ sizeof(u32),
+ bpf_fill_ill_insns);
+ if (!jit_data->header) {
+ prog = orig_prog;
+ goto out_offset;
+ }
+
+ ctx->insns = (u32 *)jit_data->image;
+ /* Now, when the image is allocated, the image
+ * can potentially shrink more (auipc/jalr ->
+ * jal).
+ */
+ }
+ prev_ninsns = ctx->ninsns;
}
- build_prologue(ctx);
- ctx->epilogue_offset = ctx->ninsns;
- build_epilogue(ctx);
- /* Allocate image, now that we know the size. */
- image_size = sizeof(u32) * ctx->ninsns;
- jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image,
- sizeof(u32),
- bpf_fill_ill_insns);
- if (!jit_data->header) {
+ if (i == 16) {
+ pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
+ bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
}
- /* Second, real pass, that acutally emits the image. */
- ctx->insns = (u32 *)jit_data->image;
skip_init_ctx:
+ pass++;
ctx->ninsns = 0;
build_prologue(ctx);
- if (build_body(ctx, extra_pass)) {
+ if (build_body(ctx, extra_pass, NULL)) {
bpf_jit_binary_free(jit_data->header);
prog = orig_prog;
goto out_offset;
@@ -1621,7 +1663,7 @@ skip_init_ctx:
build_epilogue(ctx);
if (bpf_jit_enable > 1)
- bpf_jit_dump(prog->len, image_size, 2, ctx->insns);
+ bpf_jit_dump(prog->len, image_size, pass, ctx->insns);
prog->bpf_func = (void *)ctx->insns;
prog->jited = 1;
@@ -1641,3 +1683,16 @@ out:
tmp : orig_prog);
return prog;
}
+
+void *bpf_jit_alloc_exec(unsigned long size)
+{
+ return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
+ BPF_JIT_REGION_END, GFP_KERNEL,
+ PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+
+void bpf_jit_free_exec(void *addr)
+{
+ return vfree(addr);
+}
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index d4051e88e625..bc88841d335d 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -124,6 +124,7 @@ config S390
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN
+ select HAVE_ARCH_KASAN_VMALLOC
select CPU_NO_EFFICIENT_FFS if !HAVE_MARCH_Z9_109_FEATURES
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 6dc6c4fbc8e2..69289e99cabd 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -27,7 +27,6 @@
#define MACHINE_FLAG_DIAG9C BIT(3)
#define MACHINE_FLAG_ESOP BIT(4)
#define MACHINE_FLAG_IDTE BIT(5)
-#define MACHINE_FLAG_DIAG44 BIT(6)
#define MACHINE_FLAG_EDAT1 BIT(7)
#define MACHINE_FLAG_EDAT2 BIT(8)
#define MACHINE_FLAG_TOPOLOGY BIT(10)
@@ -94,7 +93,6 @@ extern unsigned long __swsusp_reset_dma;
#define MACHINE_HAS_DIAG9C (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG9C)
#define MACHINE_HAS_ESOP (S390_lowcore.machine_flags & MACHINE_FLAG_ESOP)
#define MACHINE_HAS_IDTE (S390_lowcore.machine_flags & MACHINE_FLAG_IDTE)
-#define MACHINE_HAS_DIAG44 (S390_lowcore.machine_flags & MACHINE_FLAG_DIAG44)
#define MACHINE_HAS_EDAT1 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT1)
#define MACHINE_HAS_EDAT2 (S390_lowcore.machine_flags & MACHINE_FLAG_EDAT2)
#define MACHINE_HAS_TOPOLOGY (S390_lowcore.machine_flags & MACHINE_FLAG_TOPOLOGY)
diff --git a/arch/s390/include/asm/timex.h b/arch/s390/include/asm/timex.h
index 6da8885251d6..670f14a228e5 100644
--- a/arch/s390/include/asm/timex.h
+++ b/arch/s390/include/asm/timex.h
@@ -194,9 +194,9 @@ static inline unsigned long long get_tod_clock_monotonic(void)
{
unsigned long long tod;
- preempt_disable();
+ preempt_disable_notrace();
tod = get_tod_clock() - *(unsigned long long *) &tod_clock_base[1];
- preempt_enable();
+ preempt_enable_notrace();
return tod;
}
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index ef3c00b049ab..4093a2856929 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -86,7 +86,7 @@ static inline int share(unsigned long addr, u16 cmd)
};
if (!is_prot_virt_guest())
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
/*
* Sharing is page wise, if we encounter addresses that are
* not page aligned, we assume something went wrong. If
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index db32a55daaec..cd241ee66eff 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -204,21 +204,6 @@ static __init void detect_diag9c(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG9C;
}
-static __init void detect_diag44(void)
-{
- int rc;
-
- diag_stat_inc(DIAG_STAT_X044);
- asm volatile(
- " diag 0,0,0x44\n"
- "0: la %0,0\n"
- "1:\n"
- EX_TABLE(0b,1b)
- : "=d" (rc) : "0" (-EOPNOTSUPP) : "cc");
- if (!rc)
- S390_lowcore.machine_flags |= MACHINE_FLAG_DIAG44;
-}
-
static __init void detect_machine_facilities(void)
{
if (test_facility(8)) {
@@ -331,7 +316,6 @@ void __init startup_init(void)
setup_arch_string();
setup_boot_command_line();
detect_diag9c();
- detect_diag44();
detect_machine_facilities();
save_vector_registers();
setup_topology();
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 9e1660a6b9db..c3597d2e2ae0 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -35,6 +35,7 @@ EXPORT_SYMBOL(_mcount)
ENTRY(ftrace_caller)
.globl ftrace_regs_caller
.set ftrace_regs_caller,ftrace_caller
+ stg %r14,(__SF_GPRS+8*8)(%r15) # save traced function caller
lgr %r1,%r15
#if !(defined(CC_USING_HOTPATCH) || defined(CC_USING_NOP_MCOUNT))
aghi %r0,MCOUNT_RETURN_FIXUP
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index c07fdcd73726..77d93c534284 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1303,18 +1303,28 @@ static void hw_perf_event_update(struct perf_event *event, int flush_all)
*/
if (flush_all && done)
break;
-
- /* If an event overflow happened, discard samples by
- * processing any remaining sample-data-blocks.
- */
- if (event_overflow)
- flush_all = 1;
}
/* Account sample overflows in the event hardware structure */
if (sampl_overflow)
OVERFLOW_REG(hwc) = DIV_ROUND_UP(OVERFLOW_REG(hwc) +
sampl_overflow, 1 + num_sdb);
+
+ /* Perf_event_overflow() and perf_event_account_interrupt() limit
+ * the interrupt rate to an upper limit. Roughly 1000 samples per
+ * task tick.
+ * Hitting this limit results in a large number
+ * of throttled REF_REPORT_THROTTLE entries and the samples
+ * are dropped.
+ * Slightly increase the interval to avoid hitting this limit.
+ */
+ if (event_overflow) {
+ SAMPL_RATE(hwc) += DIV_ROUND_UP(SAMPL_RATE(hwc), 10);
+ debug_sprintf_event(sfdbg, 1, "%s: rate adjustment %ld\n",
+ __func__,
+ DIV_ROUND_UP(SAMPL_RATE(hwc), 10));
+ }
+
if (sampl_overflow || event_overflow)
debug_sprintf_event(sfdbg, 4, "%s: "
"overflows: sample %llu event %llu"
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 2794cad9312e..a08bd2522dd9 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -413,14 +413,11 @@ EXPORT_SYMBOL(arch_vcpu_is_preempted);
void smp_yield_cpu(int cpu)
{
- if (MACHINE_HAS_DIAG9C) {
- diag_stat_inc_norecursion(DIAG_STAT_X09C);
- asm volatile("diag %0,0,0x9c"
- : : "d" (pcpu_devices[cpu].address));
- } else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
- diag_stat_inc_norecursion(DIAG_STAT_X044);
- asm volatile("diag 0,0,0x44");
- }
+ if (!MACHINE_HAS_DIAG9C)
+ return;
+ diag_stat_inc_norecursion(DIAG_STAT_X09C);
+ asm volatile("diag %0,0,0x9c"
+ : : "d" (pcpu_devices[cpu].address));
}
/*
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
index da2d4d4c5b0e..707fd99f6734 100644
--- a/arch/s390/kernel/unwind_bc.c
+++ b/arch/s390/kernel/unwind_bc.c
@@ -36,10 +36,17 @@ static bool update_stack_info(struct unwind_state *state, unsigned long sp)
return true;
}
-static inline bool is_task_pt_regs(struct unwind_state *state,
- struct pt_regs *regs)
+static inline bool is_final_pt_regs(struct unwind_state *state,
+ struct pt_regs *regs)
{
- return task_pt_regs(state->task) == regs;
+ /* user mode or kernel thread pt_regs at the bottom of task stack */
+ if (task_pt_regs(state->task) == regs)
+ return true;
+
+ /* user mode pt_regs at the bottom of irq stack */
+ return state->stack_info.type == STACK_TYPE_IRQ &&
+ state->stack_info.end - sizeof(struct pt_regs) == (unsigned long)regs &&
+ READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE;
}
bool unwind_next_frame(struct unwind_state *state)
@@ -80,7 +87,7 @@ bool unwind_next_frame(struct unwind_state *state)
if (!on_stack(info, sp, sizeof(struct pt_regs)))
goto out_err;
regs = (struct pt_regs *) sp;
- if (is_task_pt_regs(state, regs))
+ if (is_final_pt_regs(state, regs))
goto out_stop;
ip = READ_ONCE_NOCHECK(regs->psw.addr);
sp = READ_ONCE_NOCHECK(regs->gprs[15]);
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ce1e4bbe53aa..9b2dab5a69f9 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -242,7 +242,6 @@ static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
void arch_spin_lock_wait(arch_spinlock_t *lp)
{
- /* Use classic spinlocks + niai if the steal time is >= 10% */
if (test_cpu_flag(CIF_DEDICATED_CPU))
arch_spin_lock_queued(lp);
else
diff --git a/arch/s390/lib/test_unwind.c b/arch/s390/lib/test_unwind.c
index bda7ac0ddd29..32b7a30b2485 100644
--- a/arch/s390/lib/test_unwind.c
+++ b/arch/s390/lib/test_unwind.c
@@ -238,7 +238,7 @@ static int test_unwind_irq(struct unwindme *u)
{
preempt_disable();
if (register_external_irq(EXT_IRQ_CLK_COMP, unwindme_irq_handler)) {
- pr_info("Couldn't reqister external interrupt handler");
+ pr_info("Couldn't register external interrupt handler");
return -1;
}
u->task = current;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index f0ce22220565..ac44bd76db4b 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -292,10 +292,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
vmem_remove_mapping(start, size);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/s390/mm/kasan_init.c b/arch/s390/mm/kasan_init.c
index 460f25572940..06345616a646 100644
--- a/arch/s390/mm/kasan_init.c
+++ b/arch/s390/mm/kasan_init.c
@@ -82,7 +82,8 @@ static pte_t * __init kasan_early_pte_alloc(void)
enum populate_mode {
POPULATE_ONE2ONE,
POPULATE_MAP,
- POPULATE_ZERO_SHADOW
+ POPULATE_ZERO_SHADOW,
+ POPULATE_SHALLOW
};
static void __init kasan_early_vmemmap_populate(unsigned long address,
unsigned long end,
@@ -116,6 +117,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
pgd_populate(&init_mm, pg_dir, p4_dir);
}
+ if (IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+ mode == POPULATE_SHALLOW) {
+ address = (address + P4D_SIZE) & P4D_MASK;
+ continue;
+ }
+
p4_dir = p4d_offset(pg_dir, address);
if (p4d_none(*p4_dir)) {
if (mode == POPULATE_ZERO_SHADOW &&
@@ -130,6 +137,12 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
p4d_populate(&init_mm, p4_dir, pu_dir);
}
+ if (!IS_ENABLED(CONFIG_KASAN_S390_4_LEVEL_PAGING) &&
+ mode == POPULATE_SHALLOW) {
+ address = (address + PUD_SIZE) & PUD_MASK;
+ continue;
+ }
+
pu_dir = pud_offset(p4_dir, address);
if (pud_none(*pu_dir)) {
if (mode == POPULATE_ZERO_SHADOW &&
@@ -195,6 +208,9 @@ static void __init kasan_early_vmemmap_populate(unsigned long address,
page = kasan_early_shadow_page;
pte_val(*pt_dir) = __pa(page) | pgt_prot_zero;
break;
+ case POPULATE_SHALLOW:
+ /* should never happen */
+ break;
}
}
address += PAGE_SIZE;
@@ -313,22 +329,50 @@ void __init kasan_early_init(void)
init_mm.pgd = early_pg_dir;
/*
* Current memory layout:
- * +- 0 -------------+ +- shadow start -+
- * | 1:1 ram mapping | /| 1/8 ram |
- * +- end of ram ----+ / +----------------+
- * | ... gap ... |/ | kasan |
- * +- shadow start --+ | zero |
- * | 1/8 addr space | | page |
- * +- shadow end -+ | mapping |
- * | ... gap ... |\ | (untracked) |
- * +- modules vaddr -+ \ +----------------+
- * | 2Gb | \| unmapped | allocated per module
- * +-----------------+ +- shadow end ---+
+ * +- 0 -------------+ +- shadow start -+
+ * | 1:1 ram mapping | /| 1/8 ram |
+ * | | / | |
+ * +- end of ram ----+ / +----------------+
+ * | ... gap ... | / | |
+ * | |/ | kasan |
+ * +- shadow start --+ | zero |
+ * | 1/8 addr space | | page |
+ * +- shadow end -+ | mapping |
+ * | ... gap ... |\ | (untracked) |
+ * +- vmalloc area -+ \ | |
+ * | vmalloc_size | \ | |
+ * +- modules vaddr -+ \ +----------------+
+ * | 2Gb | \| unmapped | allocated per module
+ * +-----------------+ +- shadow end ---+
+ *
+ * Current memory layout (KASAN_VMALLOC):
+ * +- 0 -------------+ +- shadow start -+
+ * | 1:1 ram mapping | /| 1/8 ram |
+ * | | / | |
+ * +- end of ram ----+ / +----------------+
+ * | ... gap ... | / | kasan |
+ * | |/ | zero |
+ * +- shadow start --+ | page |
+ * | 1/8 addr space | | mapping |
+ * +- shadow end -+ | (untracked) |
+ * | ... gap ... |\ | |
+ * +- vmalloc area -+ \ +- vmalloc area -+
+ * | vmalloc_size | \ |shallow populate|
+ * +- modules vaddr -+ \ +- modules area -+
+ * | 2Gb | \|shallow populate|
+ * +-----------------+ +- shadow end ---+
*/
/* populate kasan shadow (for identity mapping and zero page mapping) */
kasan_early_vmemmap_populate(__sha(0), __sha(memsize), POPULATE_MAP);
if (IS_ENABLED(CONFIG_MODULES))
untracked_mem_end = vmax - MODULES_LEN;
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+ untracked_mem_end = vmax - vmalloc_size - MODULES_LEN;
+ /* shallowly populate kasan shadow for vmalloc and modules */
+ kasan_early_vmemmap_populate(__sha(untracked_mem_end),
+ __sha(vmax), POPULATE_SHALLOW);
+ }
+ /* populate kasan shadow for untracked memory */
kasan_early_vmemmap_populate(__sha(max_physmem_end),
__sha(untracked_mem_end),
POPULATE_ZERO_SHADOW);
diff --git a/arch/s390/purgatory/.gitignore b/arch/s390/purgatory/.gitignore
index 04a03433c720..c82157f46b18 100644
--- a/arch/s390/purgatory/.gitignore
+++ b/arch/s390/purgatory/.gitignore
@@ -1,3 +1,4 @@
purgatory
+purgatory.chk
purgatory.lds
purgatory.ro
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index bc0d7a0d0394..c57f8c40e992 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y
purgatory-y := head.o purgatory.o string.o sha256.o mem.o
-targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.chk purgatory.ro
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
@@ -15,8 +15,10 @@ CFLAGS_sha256.o := -D__DISABLE_EXPORTS
$(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(call if_changed_rule,as_o_S)
-$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE
- $(call if_changed_rule,cc_o_c)
+KCOV_INSTRUMENT := n
+GCOV_PROFILE := n
+UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
@@ -26,15 +28,22 @@ KBUILD_CFLAGS += $(CLANG_FLAGS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
-LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T
+# Since we link purgatory with -r unresolved symbols are not checked, so we
+# also link a purgatory.chk binary without -r to check for unresolved symbols.
+PURGATORY_LDFLAGS := -nostdlib -z nodefaultlib
+LDFLAGS_purgatory := -r $(PURGATORY_LDFLAGS) -T
+LDFLAGS_purgatory.chk := -e purgatory_start $(PURGATORY_LDFLAGS)
$(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
+$(obj)/purgatory.chk: $(obj)/purgatory FORCE
+ $(call if_changed,ld)
+
OBJCOPYFLAGS_purgatory.ro := -O elf64-s390
OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*'
OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment'
OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
-$(obj)/purgatory.ro: $(obj)/purgatory FORCE
+$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE
$(call if_changed,objcopy)
$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
diff --git a/arch/s390/purgatory/string.c b/arch/s390/purgatory/string.c
new file mode 100644
index 000000000000..c98c22a72db7
--- /dev/null
+++ b/arch/s390/purgatory/string.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define __HAVE_ARCH_MEMCMP /* arch function */
+#include "../lib/string.c"
diff --git a/arch/sh/drivers/platform_early.c b/arch/sh/drivers/platform_early.c
index f6d148451dfc..f3dc3f25b3ff 100644
--- a/arch/sh/drivers/platform_early.c
+++ b/arch/sh/drivers/platform_early.c
@@ -325,9 +325,9 @@ int __init sh_early_platform_driver_probe(char *class_str,
}
/**
- * sh_early_platform_cleanup - clean up early platform code
+ * early_platform_cleanup - clean up early platform code
*/
-static int __init sh_early_platform_cleanup(void)
+void __init early_platform_cleanup(void)
{
struct platform_device *pd, *pd2;
@@ -337,11 +337,4 @@ static int __init sh_early_platform_cleanup(void)
list_del(&pd->dev.devres_head);
memset(&pd->dev.devres_head, 0, sizeof(pd->dev.devres_head));
}
-
- return 0;
}
-/*
- * This must happen once after all early devices are probed but before probing
- * real platform devices.
- */
-subsys_initcall(sh_early_platform_cleanup);
diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c
index 6d61f8cf4c13..0d5f3c9d52f3 100644
--- a/arch/sh/kernel/kgdb.c
+++ b/arch/sh/kernel/kgdb.c
@@ -266,6 +266,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
ptr = &remcomInBuffer[1];
if (kgdb_hex2long(&ptr, &addr))
linux_regs->pc = addr;
+ /* fallthrough */
case 'D':
case 'k':
atomic_set(&kgdb_cpu_doing_single_step, -1);
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index dfdbaa50946e..d1b1ff2be17a 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -434,9 +434,7 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/sparc/net/bpf_jit_comp_32.c b/arch/sparc/net/bpf_jit_comp_32.c
index 84cc8f7f83e9..c8eabb973b86 100644
--- a/arch/sparc/net/bpf_jit_comp_32.c
+++ b/arch/sparc/net/bpf_jit_comp_32.c
@@ -180,19 +180,19 @@ do { \
#define emit_loadptr(BASE, STRUCT, FIELD, DEST) \
do { unsigned int _off = offsetof(STRUCT, FIELD); \
- BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(void *)); \
+ BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(void *)); \
*prog++ = LDPTRI | RS1(BASE) | S13(_off) | RD(DEST); \
} while (0)
#define emit_load32(BASE, STRUCT, FIELD, DEST) \
do { unsigned int _off = offsetof(STRUCT, FIELD); \
- BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u32)); \
+ BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u32)); \
*prog++ = LD32I | RS1(BASE) | S13(_off) | RD(DEST); \
} while (0)
#define emit_load16(BASE, STRUCT, FIELD, DEST) \
do { unsigned int _off = offsetof(STRUCT, FIELD); \
- BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u16)); \
+ BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u16)); \
*prog++ = LD16I | RS1(BASE) | S13(_off) | RD(DEST); \
} while (0)
@@ -202,7 +202,7 @@ do { unsigned int _off = offsetof(STRUCT, FIELD); \
} while (0)
#define emit_load8(BASE, STRUCT, FIELD, DEST) \
-do { BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8)); \
+do { BUILD_BUG_ON(sizeof_field(STRUCT, FIELD) != sizeof(u8)); \
__emit_load8(BASE, STRUCT, FIELD, DEST); \
} while (0)
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 327b728f7244..35ebeebfc1a8 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -247,7 +247,7 @@ static void uml_net_set_multicast_list(struct net_device *dev)
return;
}
-static void uml_net_tx_timeout(struct net_device *dev)
+static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
netif_trans_update(dev);
netif_wake_queue(dev);
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 92617e16829e..0ff86391f77d 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1332,7 +1332,7 @@ static void vector_net_set_multicast_list(struct net_device *dev)
return;
}
-static void vector_net_tx_timeout(struct net_device *dev)
+static void vector_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct vector_private *vp = netdev_priv(dev);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5e8949953660..1f6a0388a65f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,6 +93,7 @@ config X86
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+ select ARCH_WANT_DEFAULT_BPF_JIT if X86_64
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANTS_THP_SWAP if X86_64
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index aa976adb7094..1dac210f7d44 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -103,7 +103,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o
quiet_cmd_check_data_rel = DATAREL $@
define cmd_check_data_rel
for obj in $(filter %.o,$^); do \
- ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \
+ $(READELF) -S $$obj | grep -qF .rel.local && { \
echo "error: $$obj has data relocations!" >&2; \
exit 1; \
} || true; \
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 9a89d98c55bd..f118af9f0718 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -376,7 +376,7 @@ int x86_add_exclusive(unsigned int what)
* LBR and BTS are still mutually exclusive.
*/
if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
- return 0;
+ goto out;
if (!atomic_inc_not_zero(&x86_pmu.lbr_exclusive[what])) {
mutex_lock(&pmc_reserve_mutex);
@@ -388,6 +388,7 @@ int x86_add_exclusive(unsigned int what)
mutex_unlock(&pmc_reserve_mutex);
}
+out:
atomic_inc(&active_events);
return 0;
@@ -398,11 +399,15 @@ fail_unlock:
void x86_del_exclusive(unsigned int what)
{
+ atomic_dec(&active_events);
+
+ /*
+ * See the comment in x86_add_exclusive().
+ */
if (x86_pmu.lbr_pt_coexist && what == x86_lbr_exclusive_pt)
return;
atomic_dec(&x86_pmu.lbr_exclusive[what]);
- atomic_dec(&active_events);
}
int x86_setup_perfctr(struct perf_event *event)
@@ -1642,9 +1647,12 @@ static struct attribute_group x86_pmu_format_group __ro_after_init = {
ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page)
{
- struct perf_pmu_events_attr *pmu_attr = \
+ struct perf_pmu_events_attr *pmu_attr =
container_of(attr, struct perf_pmu_events_attr, attr);
- u64 config = x86_pmu.event_map(pmu_attr->id);
+ u64 config = 0;
+
+ if (pmu_attr->id < x86_pmu.max_events)
+ config = x86_pmu.event_map(pmu_attr->id);
/* string trumps id */
if (pmu_attr->event_str)
@@ -1713,6 +1721,9 @@ is_visible(struct kobject *kobj, struct attribute *attr, int idx)
{
struct perf_pmu_events_attr *pmu_attr;
+ if (idx >= x86_pmu.max_events)
+ return 0;
+
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr.attr);
/* str trumps id */
return pmu_attr->event_str || x86_pmu.event_map(idx) ? attr->mode : 0;
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 38de4a7f6752..6a3b599ee0fe 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -63,9 +63,17 @@ struct bts_buffer {
static struct pmu bts_pmu;
+static int buf_nr_pages(struct page *page)
+{
+ if (!PagePrivate(page))
+ return 1;
+
+ return 1 << page_private(page);
+}
+
static size_t buf_size(struct page *page)
{
- return 1 << (PAGE_SHIFT + page_private(page));
+ return buf_nr_pages(page) * PAGE_SIZE;
}
static void *
@@ -83,9 +91,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
/* count all the high order buffers */
for (pg = 0, nbuf = 0; pg < nr_pages;) {
page = virt_to_page(pages[pg]);
- if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
- return NULL;
- pg += 1 << page_private(page);
+ pg += buf_nr_pages(page);
nbuf++;
}
@@ -109,7 +115,7 @@ bts_buffer_setup_aux(struct perf_event *event, void **pages,
unsigned int __nr_pages;
page = virt_to_page(pages[pg]);
- __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
+ __nr_pages = buf_nr_pages(page);
buf->buf[nbuf].page = page;
buf->buf[nbuf].offset = offset;
buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 5167bd2bb6b1..d6cf5c18a7e0 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -266,10 +266,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
smca_set_misc_banks_map(bank, cpu);
/* Return early if this bank was already initialized. */
- if (smca_banks[bank].hwid)
+ if (smca_banks[bank].hwid && smca_banks[bank].hwid->hwid_mcatype != 0)
return;
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_IPID(bank), &low, &high)) {
pr_warn("Failed to read MCA_IPID for bank %d\n", bank);
return;
}
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 5f42f25bac8f..2e2a421c8528 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -819,8 +819,8 @@ static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
if (quirk_no_way_out)
quirk_no_way_out(i, m, regs);
+ m->bank = i;
if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
- m->bank = i;
mce_read_aux(m, i);
*msg = tmp;
return 1;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 4cba91ec8049..2f9ec14be3b1 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -710,8 +710,12 @@ static struct chipset early_qrk[] __initdata = {
*/
{ PCI_VENDOR_ID_INTEL, 0x0f00,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+ { PCI_VENDOR_ID_INTEL, 0x3e20,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_INTEL, 0x3ec4,
PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
+ { PCI_VENDOR_ID_INTEL, 0x8a12,
+ PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet},
{ PCI_VENDOR_ID_BROADCOM, 0x4331,
PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset},
{}
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 319be936c348..fa31470bbf24 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -259,7 +259,7 @@ static void __init setup_xstate_features(void)
xmm_space);
xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
- xstate_sizes[XFEATURE_SSE] = FIELD_SIZEOF(struct fxregs_state,
+ xstate_sizes[XFEATURE_SSE] = sizeof_field(struct fxregs_state,
xmm_space);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 060a361d9d11..024c3053dbba 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1043,20 +1043,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
return;
/*
- * If the return location is actually pointing directly to
- * the start of a direct trampoline (if we trace the trampoline
- * it will still be offset by MCOUNT_INSN_SIZE), then the
- * return address is actually off by one word, and we
- * need to adjust for that.
- */
- if (ftrace_direct_func_count) {
- if (ftrace_find_direct_func(self_addr + MCOUNT_INSN_SIZE)) {
- self_addr = *parent;
- parent++;
- }
- }
-
- /*
* Protect against fault, even if it shouldn't
* happen. This tool is too much intrusive to
* ignore such a protection.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index cfafa320a8cf..cf55629ff0ff 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -402,7 +402,8 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
entry->edx |= F(SPEC_CTRL);
if (boot_cpu_has(X86_FEATURE_STIBP))
entry->edx |= F(INTEL_STIBP);
- if (boot_cpu_has(X86_FEATURE_SSBD))
+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ boot_cpu_has(X86_FEATURE_AMD_SSBD))
entry->edx |= F(SPEC_CTRL_SSBD);
/*
* We emulate ARCH_CAPABILITIES in software even
@@ -759,7 +760,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx |= F(AMD_IBRS);
if (boot_cpu_has(X86_FEATURE_STIBP))
entry->ebx |= F(AMD_STIBP);
- if (boot_cpu_has(X86_FEATURE_SSBD))
+ if (boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+ boot_cpu_has(X86_FEATURE_AMD_SSBD))
entry->ebx |= F(AMD_SSBD);
if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
entry->ebx |= F(AMD_SSB_NO);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 930edeb41ec3..0a74407ef92e 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -865,10 +865,8 @@ void arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct zone *zone;
- zone = page_zone(pfn_to_page(start_pfn));
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
}
#endif
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index dcb9bc961b39..bcfede46fe02 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1212,10 +1212,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct page *page = pfn_to_page(start_pfn) + vmem_altmap_offset(altmap);
- struct zone *zone = page_zone(page);
- __remove_pages(zone, start_pfn, nr_pages, altmap);
+ __remove_pages(start_pfn, nr_pages, altmap);
kernel_physical_mapping_remove(start, start + size);
}
#endif /* CONFIG_MEMORY_HOTPLUG */
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b8be18427277..4c8a2d1f8470 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -10,10 +10,12 @@
#include <linux/if_vlan.h>
#include <linux/bpf.h>
#include <linux/memory.h>
+#include <linux/sort.h>
#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
#include <asm/text-patching.h>
+#include <asm/asm-prototypes.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -1530,6 +1532,154 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags
return 0;
}
+static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ s64 offset;
+
+ offset = func - (ip + 2 + 4);
+ if (!is_simm32(offset)) {
+ pr_err("Target %p is out of range\n", func);
+ return -EINVAL;
+ }
+ EMIT2_off32(0x0F, jmp_cond + 0x10, offset);
+ *pprog = prog;
+ return 0;
+}
+
+static void emit_nops(u8 **pprog, unsigned int len)
+{
+ unsigned int i, noplen;
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ while (len > 0) {
+ noplen = len;
+
+ if (noplen > ASM_NOP_MAX)
+ noplen = ASM_NOP_MAX;
+
+ for (i = 0; i < noplen; i++)
+ EMIT1(ideal_nops[noplen][i]);
+ len -= noplen;
+ }
+
+ *pprog = prog;
+}
+
+static int emit_fallback_jump(u8 **pprog)
+{
+ u8 *prog = *pprog;
+ int err = 0;
+
+#ifdef CONFIG_RETPOLINE
+ /* Note that this assumes the the compiler uses external
+ * thunks for indirect calls. Both clang and GCC use the same
+ * naming convention for external thunks.
+ */
+ err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog);
+#else
+ int cnt = 0;
+
+ EMIT2(0xFF, 0xE2); /* jmp rdx */
+#endif
+ *pprog = prog;
+ return err;
+}
+
+static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs)
+{
+ u8 *jg_reloc, *jg_target, *prog = *pprog;
+ int pivot, err, jg_bytes = 1, cnt = 0;
+ s64 jg_offset;
+
+ if (a == b) {
+ /* Leaf node of recursion, i.e. not a range of indices
+ * anymore.
+ */
+ EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */
+ if (!is_simm32(progs[a]))
+ return -1;
+ EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3),
+ progs[a]);
+ err = emit_cond_near_jump(&prog, /* je func */
+ (void *)progs[a], prog,
+ X86_JE);
+ if (err)
+ return err;
+
+ err = emit_fallback_jump(&prog); /* jmp thunk/indirect */
+ if (err)
+ return err;
+
+ *pprog = prog;
+ return 0;
+ }
+
+ /* Not a leaf node, so we pivot, and recursively descend into
+ * the lower and upper ranges.
+ */
+ pivot = (b - a) / 2;
+ EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */
+ if (!is_simm32(progs[a + pivot]))
+ return -1;
+ EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]);
+
+ if (pivot > 2) { /* jg upper_part */
+ /* Require near jump. */
+ jg_bytes = 4;
+ EMIT2_off32(0x0F, X86_JG + 0x10, 0);
+ } else {
+ EMIT2(X86_JG, 0);
+ }
+ jg_reloc = prog;
+
+ err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */
+ progs);
+ if (err)
+ return err;
+
+ /* From Intel 64 and IA-32 Architectures Optimization
+ * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler
+ * Coding Rule 11: All branch targets should be 16-byte
+ * aligned.
+ */
+ jg_target = PTR_ALIGN(prog, 16);
+ if (jg_target != prog)
+ emit_nops(&prog, jg_target - prog);
+ jg_offset = prog - jg_reloc;
+ emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes);
+
+ err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */
+ b, progs);
+ if (err)
+ return err;
+
+ *pprog = prog;
+ return 0;
+}
+
+static int cmp_ips(const void *a, const void *b)
+{
+ const s64 *ipa = a;
+ const s64 *ipb = b;
+
+ if (*ipa > *ipb)
+ return 1;
+ if (*ipa < *ipb)
+ return -1;
+ return 0;
+}
+
+int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
+{
+ u8 *prog = image;
+
+ sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL);
+ return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs);
+}
+
struct x64_jit_data {
struct bpf_binary_header *header;
int *addrs;
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 7675cf754d90..f8f0220b6a66 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -260,10 +260,6 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
return;
}
- /* No need to reserve regions that will never be freed. */
- if (md.attribute & EFI_MEMORY_RUNTIME)
- return;
-
size += addr % EFI_PAGE_SIZE;
size = round_up(size, EFI_PAGE_SIZE);
addr = round_down(addr, EFI_PAGE_SIZE);
@@ -293,6 +289,8 @@ void __init efi_arch_mem_reserve(phys_addr_t addr, u64 size)
early_memunmap(new, new_size);
efi_memmap_install(new_phys, num_entries);
+ e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__update_table(e820_table);
}
/*
diff --git a/arch/xtensa/platforms/iss/network.c b/arch/xtensa/platforms/iss/network.c
index fa9f3893b002..4986226a5ab2 100644
--- a/arch/xtensa/platforms/iss/network.c
+++ b/arch/xtensa/platforms/iss/network.c
@@ -455,7 +455,7 @@ static void iss_net_set_multicast_list(struct net_device *dev)
{
}
-static void iss_net_tx_timeout(struct net_device *dev)
+static void iss_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
}
diff --git a/block/bio.c b/block/bio.c
index 9d54aa37ce6c..006bcc52a77e 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -538,6 +538,45 @@ void zero_fill_bio_iter(struct bio *bio, struct bvec_iter start)
}
EXPORT_SYMBOL(zero_fill_bio_iter);
+void bio_truncate(struct bio *bio, unsigned new_size)
+{
+ struct bio_vec bv;
+ struct bvec_iter iter;
+ unsigned int done = 0;
+ bool truncated = false;
+
+ if (new_size >= bio->bi_iter.bi_size)
+ return;
+
+ if (bio_data_dir(bio) != READ)
+ goto exit;
+
+ bio_for_each_segment(bv, bio, iter) {
+ if (done + bv.bv_len > new_size) {
+ unsigned offset;
+
+ if (!truncated)
+ offset = new_size - done;
+ else
+ offset = 0;
+ zero_user(bv.bv_page, offset, bv.bv_len - offset);
+ truncated = true;
+ }
+ done += bv.bv_len;
+ }
+
+ exit:
+ /*
+ * Don't touch bvec table here and make it really immutable, since
+ * fs bio user has to retrieve all pages via bio_for_each_segment_all
+ * in its .end_bio() callback.
+ *
+ * It is enough to truncate bio by updating .bi_size since we can make
+ * correct bvec with the updated .bi_size for drivers.
+ */
+ bio->bi_iter.bi_size = new_size;
+}
+
/**
* bio_put - release a reference to a bio
* @bio: bio to release reference to
@@ -754,10 +793,12 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page,
if (WARN_ON_ONCE(bio_flagged(bio, BIO_CLONED)))
return false;
- if (bio->bi_vcnt > 0 && !bio_full(bio, len)) {
+ if (bio->bi_vcnt > 0) {
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (page_is_mergeable(bv, page, len, off, same_page)) {
+ if (bio->bi_iter.bi_size > UINT_MAX - len)
+ return false;
bv->bv_len += len;
bio->bi_iter.bi_size += len;
return true;
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 708dea92dac8..a229b94d5390 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1062,26 +1062,6 @@ err_unlock:
}
/**
- * blkcg_drain_queue - drain blkcg part of request_queue
- * @q: request_queue to drain
- *
- * Called from blk_drain_queue(). Responsible for draining blkcg part.
- */
-void blkcg_drain_queue(struct request_queue *q)
-{
- lockdep_assert_held(&q->queue_lock);
-
- /*
- * @q could be exiting and already have destroyed all blkgs as
- * indicated by NULL root_blkg. If so, don't confuse policies.
- */
- if (!q->root_blkg)
- return;
-
- blk_throtl_drain(q);
-}
-
-/**
* blkcg_exit_queue - exit and release blkcg part of request_queue
* @q: request_queue being released
*
diff --git a/block/blk-core.c b/block/blk-core.c
index a1e228752083..089e890ab208 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -885,11 +885,14 @@ generic_make_request_checks(struct bio *bio)
}
/*
- * For a REQ_NOWAIT based request, return -EOPNOTSUPP
- * if queue is not a request based queue.
+ * Non-mq queues do not honor REQ_NOWAIT, so complete a bio
+ * with BLK_STS_AGAIN status in order to catch -EAGAIN and
+ * to give a chance to the caller to repeat request gracefully.
*/
- if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q))
- goto not_supported;
+ if ((bio->bi_opf & REQ_NOWAIT) && !queue_is_mq(q)) {
+ status = BLK_STS_AGAIN;
+ goto end_io;
+ }
if (should_fail_bio(bio))
goto end_io;
@@ -1310,7 +1313,7 @@ EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
void blk_account_io_completion(struct request *req, unsigned int bytes)
{
- if (blk_do_io_stat(req)) {
+ if (req->part && blk_do_io_stat(req)) {
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
@@ -1328,7 +1331,8 @@ void blk_account_io_done(struct request *req, u64 now)
* normal IO on queueing nor completion. Accounting the
* containing request is enough.
*/
- if (blk_do_io_stat(req) && !(req->rq_flags & RQF_FLUSH_SEQ)) {
+ if (req->part && blk_do_io_stat(req) &&
+ !(req->rq_flags & RQF_FLUSH_SEQ)) {
const int sgrp = op_stat_group(req_op(req));
struct hd_struct *part;
@@ -1792,9 +1796,9 @@ int __init blk_dev_init(void)
{
BUILD_BUG_ON(REQ_OP_LAST >= (1 << REQ_OP_BITS));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
- FIELD_SIZEOF(struct request, cmd_flags));
+ sizeof_field(struct request, cmd_flags));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
- FIELD_SIZEOF(struct bio, bi_opf));
+ sizeof_field(struct bio, bi_opf));
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 1777346baf06..3f977c517960 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -69,6 +69,7 @@
#include <linux/blkdev.h>
#include <linux/gfp.h>
#include <linux/blk-mq.h>
+#include <linux/lockdep.h>
#include "blk.h"
#include "blk-mq.h"
@@ -505,6 +506,9 @@ struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
INIT_LIST_HEAD(&fq->flush_queue[1]);
INIT_LIST_HEAD(&fq->flush_data_in_flight);
+ lockdep_register_key(&fq->key);
+ lockdep_set_class(&fq->mq_flush_lock, &fq->key);
+
return fq;
fail_rq:
@@ -519,6 +523,7 @@ void blk_free_flush_queue(struct blk_flush_queue *fq)
if (!fq)
return;
+ lockdep_unregister_key(&fq->key);
kfree(fq->flush_rq);
kfree(fq);
}
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index e01267f99183..27ca68621137 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1212,7 +1212,7 @@ static enum hrtimer_restart iocg_waitq_timer_fn(struct hrtimer *timer)
return HRTIMER_NORESTART;
}
-static void iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
+static bool iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
{
struct ioc *ioc = iocg->ioc;
struct blkcg_gq *blkg = iocg_to_blkg(iocg);
@@ -1229,11 +1229,11 @@ static void iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
/* clear or maintain depending on the overage */
if (time_before_eq64(vtime, now->vnow)) {
blkcg_clear_delay(blkg);
- return;
+ return false;
}
if (!atomic_read(&blkg->use_delay) &&
time_before_eq64(vtime, now->vnow + vmargin))
- return;
+ return false;
/* use delay */
if (cost) {
@@ -1250,10 +1250,11 @@ static void iocg_kick_delay(struct ioc_gq *iocg, struct ioc_now *now, u64 cost)
oexpires = ktime_to_ns(hrtimer_get_softexpires(&iocg->delay_timer));
if (hrtimer_is_queued(&iocg->delay_timer) &&
abs(oexpires - expires) <= margin_ns / 4)
- return;
+ return true;
hrtimer_start_range_ns(&iocg->delay_timer, ns_to_ktime(expires),
margin_ns / 4, HRTIMER_MODE_ABS);
+ return true;
}
static enum hrtimer_restart iocg_delay_timer_fn(struct hrtimer *timer)
@@ -1739,7 +1740,9 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
*/
if (bio_issue_as_root_blkg(bio) || fatal_signal_pending(current)) {
atomic64_add(abs_cost, &iocg->abs_vdebt);
- iocg_kick_delay(iocg, &now, cost);
+ if (iocg_kick_delay(iocg, &now, cost))
+ blkcg_schedule_throttle(rqos->q,
+ (bio->bi_opf & REQ_SWAP) == REQ_SWAP);
return;
}
diff --git a/block/blk-map.c b/block/blk-map.c
index 3a62e471d81b..b0790268ed9d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -151,7 +151,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
return 0;
unmap_rq:
- __blk_rq_unmap_user(bio);
+ blk_rq_unmap_user(bio);
fail:
rq->bio = NULL;
return ret;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index d783bdc4559b..347782a24a35 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -157,16 +157,14 @@ static inline unsigned get_max_io_size(struct request_queue *q,
return sectors & (lbs - 1);
}
-static unsigned get_max_segment_size(const struct request_queue *q,
- unsigned offset)
+static inline unsigned get_max_segment_size(const struct request_queue *q,
+ struct page *start_page,
+ unsigned long offset)
{
unsigned long mask = queue_segment_boundary(q);
- /* default segment boundary mask means no boundary limit */
- if (mask == BLK_SEG_BOUNDARY_MASK)
- return queue_max_segment_size(q);
-
- return min_t(unsigned long, mask - (mask & offset) + 1,
+ offset = mask & (page_to_phys(start_page) + offset);
+ return min_t(unsigned long, mask - offset + 1,
queue_max_segment_size(q));
}
@@ -201,7 +199,8 @@ static bool bvec_split_segs(const struct request_queue *q,
unsigned seg_size = 0;
while (len && *nsegs < max_segs) {
- seg_size = get_max_segment_size(q, bv->bv_offset + total_len);
+ seg_size = get_max_segment_size(q, bv->bv_page,
+ bv->bv_offset + total_len);
seg_size = min(seg_size, len);
(*nsegs)++;
@@ -419,7 +418,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
while (nbytes > 0) {
unsigned offset = bvec->bv_offset + total;
- unsigned len = min(get_max_segment_size(q, offset), nbytes);
+ unsigned len = min(get_max_segment_size(q, bvec->bv_page,
+ offset), nbytes);
struct page *page = bvec->bv_page;
/*
diff --git a/block/blk.h b/block/blk.h
index 6842f28c033e..0b8884353f6b 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -30,6 +30,7 @@ struct blk_flush_queue {
* at the same time
*/
struct request *orig_rq;
+ struct lock_class_key key;
spinlock_t mq_flush_lock;
};
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index 347dda16c2f4..6cbb7926534c 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -266,7 +266,7 @@ static blk_status_t bsg_queue_rq(struct blk_mq_hw_ctx *hctx,
struct request *req = bd->rq;
struct bsg_set *bset =
container_of(q->tag_set, struct bsg_set, tag_set);
- int sts = BLK_STS_IOERR;
+ blk_status_t sts = BLK_STS_IOERR;
int ret;
blk_mq_start_request(req);
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 6ca015f92766..3ed7a0f144a9 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -6,6 +6,7 @@
#include <linux/compat.h>
#include <linux/elevator.h>
#include <linux/hdreg.h>
+#include <linux/pr.h>
#include <linux/slab.h>
#include <linux/syscalls.h>
#include <linux/types.h>
@@ -354,6 +355,13 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
* but we call blkdev_ioctl, which gets the lock for us
*/
case BLKRRPART:
+ case BLKREPORTZONE:
+ case BLKRESETZONE:
+ case BLKOPENZONE:
+ case BLKCLOSEZONE:
+ case BLKFINISHZONE:
+ case BLKGETZONESZ:
+ case BLKGETNRZONES:
return blkdev_ioctl(bdev, mode, cmd,
(unsigned long)compat_ptr(arg));
case BLKBSZSET_32:
@@ -401,6 +409,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case BLKTRACETEARDOWN: /* compatible */
ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg));
return ret;
+ case IOC_PR_REGISTER:
+ case IOC_PR_RESERVE:
+ case IOC_PR_RELEASE:
+ case IOC_PR_PREEMPT:
+ case IOC_PR_PREEMPT_ABORT:
+ case IOC_PR_CLEAR:
+ return blkdev_ioctl(bdev, mode, cmd,
+ (unsigned long)compat_ptr(arg));
default:
if (disk->fops->compat_ioctl)
ret = disk->fops->compat_ioctl(bdev, mode, cmd, arg);
diff --git a/crypto/adiantum.c b/crypto/adiantum.c
index aded26092268..9dc53cf9b1f1 100644
--- a/crypto/adiantum.c
+++ b/crypto/adiantum.c
@@ -436,10 +436,10 @@ static int adiantum_init_tfm(struct crypto_skcipher *tfm)
BUILD_BUG_ON(offsetofend(struct adiantum_request_ctx, u) !=
sizeof(struct adiantum_request_ctx));
- subreq_size = max(FIELD_SIZEOF(struct adiantum_request_ctx,
+ subreq_size = max(sizeof_field(struct adiantum_request_ctx,
u.hash_desc) +
crypto_shash_descsize(hash),
- FIELD_SIZEOF(struct adiantum_request_ctx,
+ sizeof_field(struct adiantum_request_ctx,
u.streamcipher_req) +
crypto_skcipher_reqsize(streamcipher));
diff --git a/crypto/asymmetric_keys/asym_tpm.c b/crypto/asymmetric_keys/asym_tpm.c
index d16d893bd195..378b18b9bc34 100644
--- a/crypto/asymmetric_keys/asym_tpm.c
+++ b/crypto/asymmetric_keys/asym_tpm.c
@@ -470,6 +470,7 @@ static int tpm_key_encrypt(struct tpm_key *tk,
if (ret < 0)
goto error_free_tfm;
+ ret = -ENOMEM;
req = akcipher_request_alloc(tfm, GFP_KERNEL);
if (!req)
goto error_free_tfm;
diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
index 364b9df9d631..d7f43d4ea925 100644
--- a/crypto/asymmetric_keys/public_key.c
+++ b/crypto/asymmetric_keys/public_key.c
@@ -184,6 +184,7 @@ static int software_key_eds_op(struct kernel_pkey_params *params,
if (IS_ERR(tfm))
return PTR_ERR(tfm);
+ ret = -ENOMEM;
req = akcipher_request_alloc(tfm, GFP_KERNEL);
if (!req)
goto error_free_tfm;
diff --git a/crypto/essiv.c b/crypto/essiv.c
index 808f2b362106..495a2d1e1460 100644
--- a/crypto/essiv.c
+++ b/crypto/essiv.c
@@ -347,7 +347,7 @@ static int essiv_aead_init_tfm(struct crypto_aead *tfm)
if (IS_ERR(aead))
return PTR_ERR(aead);
- subreq_size = FIELD_SIZEOF(struct essiv_aead_request_ctx, aead_req) +
+ subreq_size = sizeof_field(struct essiv_aead_request_ctx, aead_req) +
crypto_aead_reqsize(aead);
tctx->ivoffset = offsetof(struct essiv_aead_request_ctx, aead_req) +
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 08bb9f2f2d23..5e4a8860a9c0 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1314,9 +1314,19 @@ static void acpi_dev_pm_detach(struct device *dev, bool power_off)
*/
int acpi_dev_pm_attach(struct device *dev, bool power_on)
{
+ /*
+ * Skip devices whose ACPI companions match the device IDs below,
+ * because they require special power management handling incompatible
+ * with the generic ACPI PM domain.
+ */
+ static const struct acpi_device_id special_pm_ids[] = {
+ {"PNP0C0B", }, /* Generic ACPI fan */
+ {"INT3404", }, /* Fan */
+ {}
+ };
struct acpi_device *adev = ACPI_COMPANION(dev);
- if (!adev)
+ if (!adev || !acpi_match_device_ids(adev, special_pm_ids))
return 0;
/*
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index e9bc9fcc7ea5..b2dad43dbf82 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -3310,7 +3310,7 @@ static void binder_transaction(struct binder_proc *proc,
binder_size_t parent_offset;
struct binder_fd_array_object *fda =
to_binder_fd_array_object(hdr);
- size_t num_valid = (buffer_offset - off_start_offset) *
+ size_t num_valid = (buffer_offset - off_start_offset) /
sizeof(binder_size_t);
struct binder_buffer_object *parent =
binder_validate_ptr(target_proc, t->buffer,
@@ -3384,7 +3384,7 @@ static void binder_transaction(struct binder_proc *proc,
t->buffer->user_data + sg_buf_offset;
sg_buf_offset += ALIGN(bp->length, sizeof(u64));
- num_valid = (buffer_offset - off_start_offset) *
+ num_valid = (buffer_offset - off_start_offset) /
sizeof(binder_size_t);
ret = binder_fixup_parent(t, thread, bp,
off_start_offset,
diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c
index f41744b9b38a..66a570d0da83 100644
--- a/drivers/ata/ahci_brcm.c
+++ b/drivers/ata/ahci_brcm.c
@@ -76,8 +76,7 @@ enum brcm_ahci_version {
};
enum brcm_ahci_quirks {
- BRCM_AHCI_QUIRK_NO_NCQ = BIT(0),
- BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE = BIT(1),
+ BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE = BIT(0),
};
struct brcm_ahci_priv {
@@ -213,19 +212,12 @@ static void brcm_sata_phys_disable(struct brcm_ahci_priv *priv)
brcm_sata_phy_disable(priv, i);
}
-static u32 brcm_ahci_get_portmask(struct platform_device *pdev,
+static u32 brcm_ahci_get_portmask(struct ahci_host_priv *hpriv,
struct brcm_ahci_priv *priv)
{
- void __iomem *ahci;
- struct resource *res;
u32 impl;
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ahci");
- ahci = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(ahci))
- return 0;
-
- impl = readl(ahci + HOST_PORTS_IMPL);
+ impl = readl(hpriv->mmio + HOST_PORTS_IMPL);
if (fls(impl) > SATA_TOP_MAX_PHYS)
dev_warn(priv->dev, "warning: more ports than PHYs (%#x)\n",
@@ -233,9 +225,6 @@ static u32 brcm_ahci_get_portmask(struct platform_device *pdev,
else if (!impl)
dev_info(priv->dev, "no ports found\n");
- devm_iounmap(&pdev->dev, ahci);
- devm_release_mem_region(&pdev->dev, res->start, resource_size(res));
-
return impl;
}
@@ -285,6 +274,13 @@ static unsigned int brcm_ahci_read_id(struct ata_device *dev,
/* Perform the SATA PHY reset sequence */
brcm_sata_phy_disable(priv, ap->port_no);
+ /* Reset the SATA clock */
+ ahci_platform_disable_clks(hpriv);
+ msleep(10);
+
+ ahci_platform_enable_clks(hpriv);
+ msleep(10);
+
/* Bring the PHY back on */
brcm_sata_phy_enable(priv, ap->port_no);
@@ -347,11 +343,10 @@ static int brcm_ahci_suspend(struct device *dev)
struct ata_host *host = dev_get_drvdata(dev);
struct ahci_host_priv *hpriv = host->private_data;
struct brcm_ahci_priv *priv = hpriv->plat_data;
- int ret;
- ret = ahci_platform_suspend(dev);
brcm_sata_phys_disable(priv);
- return ret;
+
+ return ahci_platform_suspend(dev);
}
static int brcm_ahci_resume(struct device *dev)
@@ -359,11 +354,44 @@ static int brcm_ahci_resume(struct device *dev)
struct ata_host *host = dev_get_drvdata(dev);
struct ahci_host_priv *hpriv = host->private_data;
struct brcm_ahci_priv *priv = hpriv->plat_data;
+ int ret;
+
+ /* Make sure clocks are turned on before re-configuration */
+ ret = ahci_platform_enable_clks(hpriv);
+ if (ret)
+ return ret;
brcm_sata_init(priv);
brcm_sata_phys_enable(priv);
brcm_sata_alpm_init(hpriv);
- return ahci_platform_resume(dev);
+
+ /* Since we had to enable clocks earlier on, we cannot use
+ * ahci_platform_resume() as-is since a second call to
+ * ahci_platform_enable_resources() would bump up the resources
+ * (regulators, clocks, PHYs) count artificially so we copy the part
+ * after ahci_platform_enable_resources().
+ */
+ ret = ahci_platform_enable_phys(hpriv);
+ if (ret)
+ goto out_disable_phys;
+
+ ret = ahci_platform_resume_host(dev);
+ if (ret)
+ goto out_disable_platform_phys;
+
+ /* We resumed so update PM runtime state */
+ pm_runtime_disable(dev);
+ pm_runtime_set_active(dev);
+ pm_runtime_enable(dev);
+
+ return 0;
+
+out_disable_platform_phys:
+ ahci_platform_disable_phys(hpriv);
+out_disable_phys:
+ brcm_sata_phys_disable(priv);
+ ahci_platform_disable_clks(hpriv);
+ return ret;
}
#endif
@@ -410,44 +438,71 @@ static int brcm_ahci_probe(struct platform_device *pdev)
if (!IS_ERR_OR_NULL(priv->rcdev))
reset_control_deassert(priv->rcdev);
- if ((priv->version == BRCM_SATA_BCM7425) ||
- (priv->version == BRCM_SATA_NSP)) {
- priv->quirks |= BRCM_AHCI_QUIRK_NO_NCQ;
+ hpriv = ahci_platform_get_resources(pdev, 0);
+ if (IS_ERR(hpriv)) {
+ ret = PTR_ERR(hpriv);
+ goto out_reset;
+ }
+
+ hpriv->plat_data = priv;
+ hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP | AHCI_HFLAG_NO_WRITE_TO_RO;
+
+ switch (priv->version) {
+ case BRCM_SATA_BCM7425:
+ hpriv->flags |= AHCI_HFLAG_DELAY_ENGINE;
+ /* fall through */
+ case BRCM_SATA_NSP:
+ hpriv->flags |= AHCI_HFLAG_NO_NCQ;
priv->quirks |= BRCM_AHCI_QUIRK_SKIP_PHY_ENABLE;
+ break;
+ default:
+ break;
}
+ ret = ahci_platform_enable_clks(hpriv);
+ if (ret)
+ goto out_reset;
+
+ /* Must be first so as to configure endianness including that
+ * of the standard AHCI register space.
+ */
brcm_sata_init(priv);
- priv->port_mask = brcm_ahci_get_portmask(pdev, priv);
- if (!priv->port_mask)
- return -ENODEV;
+ /* Initializes priv->port_mask which is used below */
+ priv->port_mask = brcm_ahci_get_portmask(hpriv, priv);
+ if (!priv->port_mask) {
+ ret = -ENODEV;
+ goto out_disable_clks;
+ }
+ /* Must be done before ahci_platform_enable_phys() */
brcm_sata_phys_enable(priv);
- hpriv = ahci_platform_get_resources(pdev, 0);
- if (IS_ERR(hpriv))
- return PTR_ERR(hpriv);
- hpriv->plat_data = priv;
- hpriv->flags = AHCI_HFLAG_WAKE_BEFORE_STOP;
-
brcm_sata_alpm_init(hpriv);
- ret = ahci_platform_enable_resources(hpriv);
+ ret = ahci_platform_enable_phys(hpriv);
if (ret)
- return ret;
-
- if (priv->quirks & BRCM_AHCI_QUIRK_NO_NCQ)
- hpriv->flags |= AHCI_HFLAG_NO_NCQ;
- hpriv->flags |= AHCI_HFLAG_NO_WRITE_TO_RO;
+ goto out_disable_phys;
ret = ahci_platform_init_host(pdev, hpriv, &ahci_brcm_port_info,
&ahci_platform_sht);
if (ret)
- return ret;
+ goto out_disable_platform_phys;
dev_info(dev, "Broadcom AHCI SATA3 registered\n");
return 0;
+
+out_disable_platform_phys:
+ ahci_platform_disable_phys(hpriv);
+out_disable_phys:
+ brcm_sata_phys_disable(priv);
+out_disable_clks:
+ ahci_platform_disable_clks(hpriv);
+out_reset:
+ if (!IS_ERR_OR_NULL(priv->rcdev))
+ reset_control_assert(priv->rcdev);
+ return ret;
}
static int brcm_ahci_remove(struct platform_device *pdev)
@@ -457,12 +512,12 @@ static int brcm_ahci_remove(struct platform_device *pdev)
struct brcm_ahci_priv *priv = hpriv->plat_data;
int ret;
+ brcm_sata_phys_disable(priv);
+
ret = ata_platform_remove_one(pdev);
if (ret)
return ret;
- brcm_sata_phys_disable(priv);
-
return 0;
}
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 8befce036af8..129556fcf6be 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_ops);
* RETURNS:
* 0 on success otherwise a negative error code
*/
-static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv)
+int ahci_platform_enable_phys(struct ahci_host_priv *hpriv)
{
int rc, i;
@@ -74,6 +74,7 @@ disable_phys:
}
return rc;
}
+EXPORT_SYMBOL_GPL(ahci_platform_enable_phys);
/**
* ahci_platform_disable_phys - Disable PHYs
@@ -81,7 +82,7 @@ disable_phys:
*
* This function disables all PHYs found in hpriv->phys.
*/
-static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv)
+void ahci_platform_disable_phys(struct ahci_host_priv *hpriv)
{
int i;
@@ -90,6 +91,7 @@ static void ahci_platform_disable_phys(struct ahci_host_priv *hpriv)
phy_exit(hpriv->phys[i]);
}
}
+EXPORT_SYMBOL_GPL(ahci_platform_disable_phys);
/**
* ahci_platform_enable_clks - Enable platform clocks
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index e9017c570bc5..6f4ab5c5b52d 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5329,6 +5329,30 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
}
/**
+ * ata_qc_get_active - get bitmask of active qcs
+ * @ap: port in question
+ *
+ * LOCKING:
+ * spin_lock_irqsave(host lock)
+ *
+ * RETURNS:
+ * Bitmask of active qcs
+ */
+u64 ata_qc_get_active(struct ata_port *ap)
+{
+ u64 qc_active = ap->qc_active;
+
+ /* ATA_TAG_INTERNAL is sent to hw as tag 0 */
+ if (qc_active & (1ULL << ATA_TAG_INTERNAL)) {
+ qc_active |= (1 << 0);
+ qc_active &= ~(1ULL << ATA_TAG_INTERNAL);
+ }
+
+ return qc_active;
+}
+EXPORT_SYMBOL_GPL(ata_qc_get_active);
+
+/**
* ata_qc_complete_multiple - Complete multiple qcs successfully
* @ap: port in question
* @qc_active: new qc_active mask
diff --git a/drivers/ata/sata_fsl.c b/drivers/ata/sata_fsl.c
index 9239615d8a04..d55ee244d693 100644
--- a/drivers/ata/sata_fsl.c
+++ b/drivers/ata/sata_fsl.c
@@ -1280,7 +1280,7 @@ static void sata_fsl_host_intr(struct ata_port *ap)
i, ioread32(hcr_base + CC),
ioread32(hcr_base + CA));
}
- ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
+ ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask);
return;
} else if ((ap->qc_active & (1ULL << ATA_TAG_INTERNAL))) {
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 277f11909fc1..d7228f8e9297 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -2829,7 +2829,7 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp
}
if (work_done) {
- ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
+ ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask);
/* Update the software queue position index in hardware */
writelfl((pp->crpb_dma & EDMA_RSP_Q_BASE_LO_MASK) |
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index f3e62f5528bd..eb9dc14e5147 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -984,7 +984,7 @@ static irqreturn_t nv_adma_interrupt(int irq, void *dev_instance)
check_commands = 0;
check_commands &= ~(1 << pos);
}
- ata_qc_complete_multiple(ap, ap->qc_active ^ done_mask);
+ ata_qc_complete_multiple(ap, ata_qc_get_active(ap) ^ done_mask);
}
}
diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c
index b23d1e4bad33..9d0d65efcd94 100644
--- a/drivers/atm/eni.c
+++ b/drivers/atm/eni.c
@@ -374,7 +374,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb,
here = (eni_vcc->descr+skip) & (eni_vcc->words-1);
dma[j++] = (here << MID_DMA_COUNT_SHIFT) | (vcc->vci
<< MID_DMA_VCI_SHIFT) | MID_DT_JK;
- j++;
+ dma[j++] = 0;
}
here = (eni_vcc->descr+size+skip) & (eni_vcc->words-1);
if (!eff) size += skip;
@@ -447,7 +447,7 @@ static int do_rx_dma(struct atm_vcc *vcc,struct sk_buff *skb,
if (size != eff) {
dma[j++] = (here << MID_DMA_COUNT_SHIFT) |
(vcc->vci << MID_DMA_VCI_SHIFT) | MID_DT_JK;
- j++;
+ dma[j++] = 0;
}
if (!j || j > 2*RX_DMA_BUF) {
printk(KERN_CRIT DEV_LABEL "!j or j too big!!!\n");
diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c
index f1a500205313..8fbd36eb8941 100644
--- a/drivers/atm/fore200e.c
+++ b/drivers/atm/fore200e.c
@@ -1414,12 +1414,14 @@ fore200e_open(struct atm_vcc *vcc)
static void
fore200e_close(struct atm_vcc* vcc)
{
- struct fore200e* fore200e = FORE200E_DEV(vcc->dev);
struct fore200e_vcc* fore200e_vcc;
+ struct fore200e* fore200e;
struct fore200e_vc_map* vc_map;
unsigned long flags;
ASSERT(vcc);
+ fore200e = FORE200E_DEV(vcc->dev);
+
ASSERT((vcc->vpi >= 0) && (vcc->vpi < 1<<FORE200E_VPI_BITS));
ASSERT((vcc->vci >= 0) && (vcc->vci < 1<<FORE200E_VCI_BITS));
@@ -1464,10 +1466,10 @@ fore200e_close(struct atm_vcc* vcc)
static int
fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
{
- struct fore200e* fore200e = FORE200E_DEV(vcc->dev);
- struct fore200e_vcc* fore200e_vcc = FORE200E_VCC(vcc);
+ struct fore200e* fore200e;
+ struct fore200e_vcc* fore200e_vcc;
struct fore200e_vc_map* vc_map;
- struct host_txq* txq = &fore200e->host_txq;
+ struct host_txq* txq;
struct host_txq_entry* entry;
struct tpd* tpd;
struct tpd_haddr tpd_haddr;
@@ -1480,9 +1482,18 @@ fore200e_send(struct atm_vcc *vcc, struct sk_buff *skb)
unsigned char* data;
unsigned long flags;
- ASSERT(vcc);
- ASSERT(fore200e);
- ASSERT(fore200e_vcc);
+ if (!vcc)
+ return -EINVAL;
+
+ fore200e = FORE200E_DEV(vcc->dev);
+ fore200e_vcc = FORE200E_VCC(vcc);
+
+ if (!fore200e)
+ return -EINVAL;
+
+ txq = &fore200e->host_txq;
+ if (!fore200e_vcc)
+ return -EINVAL;
if (!test_bit(ATM_VF_READY, &vcc->flags)) {
DPRINTK(1, "VC %d.%d.%d not ready for tx\n", vcc->itf, vcc->vpi, vcc->vpi);
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 30d0523014e0..6cdbf1531238 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -359,7 +359,7 @@ static int handle_remove(const char *nodename, struct device *dev)
* If configured, or requested by the commandline, devtmpfs will be
* auto-mounted after the kernel mounted the root filesystem.
*/
-int devtmpfs_mount(const char *mntdir)
+int devtmpfs_mount(void)
{
int err;
@@ -369,7 +369,7 @@ int devtmpfs_mount(const char *mntdir)
if (!thread)
return 0;
- err = ksys_mount("devtmpfs", mntdir, "devtmpfs", MS_SILENT, NULL);
+ err = do_mount("devtmpfs", "dev", "devtmpfs", MS_SILENT, NULL);
if (err)
printk(KERN_INFO "devtmpfs: error mounting %i\n", err);
else
@@ -394,7 +394,7 @@ static int devtmpfsd(void *p)
*err = ksys_unshare(CLONE_NEWNS);
if (*err)
goto out;
- *err = ksys_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL);
+ *err = do_mount("devtmpfs", "/", "devtmpfs", MS_SILENT, NULL);
if (*err)
goto out;
ksys_chdir("/.."); /* will traverse into overmounted root */
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 7c532548b0a6..cf6b6b722e5c 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1325,10 +1325,14 @@ struct device *platform_find_device_by_driver(struct device *start,
}
EXPORT_SYMBOL_GPL(platform_find_device_by_driver);
+void __weak __init early_platform_cleanup(void) { }
+
int __init platform_bus_init(void)
{
int error;
+ early_platform_cleanup();
+
error = device_register(&platform_bus);
if (error) {
put_device(&platform_bus);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 57532465fb83..b4607dd96185 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1296,10 +1296,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd, struct block_device *b
mutex_unlock(&nbd->config_lock);
ret = wait_event_interruptible(config->recv_wq,
atomic_read(&config->recv_threads) == 0);
- if (ret) {
+ if (ret)
sock_shutdown(nbd);
- flush_workqueue(nbd->recv_workq);
- }
+ flush_workqueue(nbd->recv_workq);
+
mutex_lock(&nbd->config_lock);
nbd_bdev_reset(bdev);
/* user requested, ignore socket errors */
diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c
index d4d88b581822..5cf49d9db95e 100644
--- a/drivers/block/null_blk_zoned.c
+++ b/drivers/block/null_blk_zoned.c
@@ -186,7 +186,10 @@ static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op,
if (zone->cond == BLK_ZONE_COND_FULL)
return BLK_STS_IOERR;
- zone->cond = BLK_ZONE_COND_CLOSED;
+ if (zone->wp == zone->start)
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ else
+ zone->cond = BLK_ZONE_COND_CLOSED;
break;
case REQ_OP_ZONE_FINISH:
if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ee67bf929fac..861fc65a1b75 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2707,7 +2707,7 @@ static const struct block_device_operations pktcdvd_ops = {
.release = pkt_close,
.ioctl = pkt_ioctl,
#ifdef CONFIG_COMPAT
- .ioctl = pkt_compat_ioctl,
+ .compat_ioctl = pkt_compat_ioctl,
#endif
.check_events = pkt_check_events,
};
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index e8c5c54e1d26..4c5d99f87813 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -171,6 +171,15 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
blkif->domid = domid;
atomic_set(&blkif->refcnt, 1);
init_completion(&blkif->drain_complete);
+
+ /*
+ * Because freeing back to the cache may be deferred, it is not
+ * safe to unload the module (and hence destroy the cache) until
+ * this has completed. To prevent premature unloading, take an
+ * extra module reference here and release only when the object
+ * has been freed back to the cache.
+ */
+ __module_get(THIS_MODULE);
INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
return blkif;
@@ -181,6 +190,9 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
{
int err;
struct xen_blkif *blkif = ring->blkif;
+ const struct blkif_common_sring *sring_common;
+ RING_IDX rsp_prod, req_prod;
+ unsigned int size;
/* Already connected through? */
if (ring->irq)
@@ -191,46 +203,62 @@ static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
if (err < 0)
return err;
+ sring_common = (struct blkif_common_sring *)ring->blk_ring;
+ rsp_prod = READ_ONCE(sring_common->rsp_prod);
+ req_prod = READ_ONCE(sring_common->req_prod);
+
switch (blkif->blk_protocol) {
case BLKIF_PROTOCOL_NATIVE:
{
- struct blkif_sring *sring;
- sring = (struct blkif_sring *)ring->blk_ring;
- BACK_RING_INIT(&ring->blk_rings.native, sring,
- XEN_PAGE_SIZE * nr_grefs);
+ struct blkif_sring *sring_native =
+ (struct blkif_sring *)ring->blk_ring;
+
+ BACK_RING_ATTACH(&ring->blk_rings.native, sring_native,
+ rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+ size = __RING_SIZE(sring_native, XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_32:
{
- struct blkif_x86_32_sring *sring_x86_32;
- sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
- BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
- XEN_PAGE_SIZE * nr_grefs);
+ struct blkif_x86_32_sring *sring_x86_32 =
+ (struct blkif_x86_32_sring *)ring->blk_ring;
+
+ BACK_RING_ATTACH(&ring->blk_rings.x86_32, sring_x86_32,
+ rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+ size = __RING_SIZE(sring_x86_32, XEN_PAGE_SIZE * nr_grefs);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
- struct blkif_x86_64_sring *sring_x86_64;
- sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
- BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
- XEN_PAGE_SIZE * nr_grefs);
+ struct blkif_x86_64_sring *sring_x86_64 =
+ (struct blkif_x86_64_sring *)ring->blk_ring;
+
+ BACK_RING_ATTACH(&ring->blk_rings.x86_64, sring_x86_64,
+ rsp_prod, XEN_PAGE_SIZE * nr_grefs);
+ size = __RING_SIZE(sring_x86_64, XEN_PAGE_SIZE * nr_grefs);
break;
}
default:
BUG();
}
+ err = -EIO;
+ if (req_prod - rsp_prod > size)
+ goto fail;
+
err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
xen_blkif_be_int, 0,
"blkif-backend", ring);
- if (err < 0) {
- xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
- ring->blk_rings.common.sring = NULL;
- return err;
- }
+ if (err < 0)
+ goto fail;
ring->irq = err;
return 0;
+
+fail:
+ xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
+ ring->blk_rings.common.sring = NULL;
+ return err;
}
static int xen_blkif_disconnect(struct xen_blkif *blkif)
@@ -320,6 +348,7 @@ static void xen_blkif_free(struct xen_blkif *blkif)
/* Make sure everything is drained before shutting down */
kmem_cache_free(xen_blkif_cachep, blkif);
+ module_put(THIS_MODULE);
}
int __init xen_blkif_interface_init(void)
@@ -1121,7 +1150,8 @@ static struct xenbus_driver xen_blkbk_driver = {
.ids = xen_blkbk_ids,
.probe = xen_blkbk_probe,
.remove = xen_blkbk_remove,
- .otherend_changed = frontend_changed
+ .otherend_changed = frontend_changed,
+ .allow_rebind = true,
};
int xen_blkif_xenbus_init(void)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index a74d03913822..c02be06c5299 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1113,8 +1113,8 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
if (!VDEV_IS_EXTENDED(info->vdevice)) {
err = xen_translate_vdev(info->vdevice, &minor, &offset);
if (err)
- return err;
- nr_parts = PARTS_PER_DISK;
+ return err;
+ nr_parts = PARTS_PER_DISK;
} else {
minor = BLKIF_MINOR_EXT(info->vdevice);
nr_parts = PARTS_PER_EXT_DISK;
diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 8e05706fe5d9..0795a49edfae 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -107,6 +107,52 @@ int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
}
EXPORT_SYMBOL_GPL(btbcm_set_bdaddr);
+int btbcm_read_pcm_int_params(struct hci_dev *hdev,
+ struct bcm_set_pcm_int_params *params)
+{
+ struct sk_buff *skb;
+ int err = 0;
+
+ skb = __hci_cmd_sync(hdev, 0xfc1d, 0, NULL, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ bt_dev_err(hdev, "BCM: Read PCM int params failed (%d)", err);
+ return err;
+ }
+
+ if (skb->len != 6 || skb->data[0]) {
+ bt_dev_err(hdev, "BCM: Read PCM int params length mismatch");
+ kfree_skb(skb);
+ return -EIO;
+ }
+
+ if (params)
+ memcpy(params, skb->data + 1, 5);
+
+ kfree_skb(skb);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btbcm_read_pcm_int_params);
+
+int btbcm_write_pcm_int_params(struct hci_dev *hdev,
+ const struct bcm_set_pcm_int_params *params)
+{
+ struct sk_buff *skb;
+ int err;
+
+ skb = __hci_cmd_sync(hdev, 0xfc1c, 5, params, HCI_INIT_TIMEOUT);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ bt_dev_err(hdev, "BCM: Write PCM int params failed (%d)", err);
+ return err;
+ }
+ kfree_skb(skb);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(btbcm_write_pcm_int_params);
+
int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw)
{
const struct hci_command_hdr *cmd;
diff --git a/drivers/bluetooth/btbcm.h b/drivers/bluetooth/btbcm.h
index d204be8a84bf..3c7dd0765837 100644
--- a/drivers/bluetooth/btbcm.h
+++ b/drivers/bluetooth/btbcm.h
@@ -54,6 +54,10 @@ struct bcm_set_pcm_format_params {
int btbcm_check_bdaddr(struct hci_dev *hdev);
int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr);
int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw);
+int btbcm_read_pcm_int_params(struct hci_dev *hdev,
+ struct bcm_set_pcm_int_params *params);
+int btbcm_write_pcm_int_params(struct hci_dev *hdev,
+ const struct bcm_set_pcm_int_params *params);
int btbcm_setup_patchram(struct hci_dev *hdev);
int btbcm_setup_apple(struct hci_dev *hdev);
@@ -74,6 +78,18 @@ static inline int btbcm_set_bdaddr(struct hci_dev *hdev, const bdaddr_t *bdaddr)
return -EOPNOTSUPP;
}
+int btbcm_read_pcm_int_params(struct hci_dev *hdev,
+ struct bcm_set_pcm_int_params *params)
+{
+ return -EOPNOTSUPP;
+}
+
+int btbcm_write_pcm_int_params(struct hci_dev *hdev,
+ const struct bcm_set_pcm_int_params *params)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int btbcm_patchram(struct hci_dev *hdev, const struct firmware *fw)
{
return -EOPNOTSUPP;
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 70e385987d41..0eaeca0a64fb 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -552,9 +552,9 @@ static void btusb_rtl_cmd_timeout(struct hci_dev *hdev)
}
bt_dev_err(hdev, "Reset Realtek device via gpio");
- gpiod_set_value_cansleep(reset_gpio, 0);
- msleep(200);
gpiod_set_value_cansleep(reset_gpio, 1);
+ msleep(200);
+ gpiod_set_value_cansleep(reset_gpio, 0);
}
static inline void btusb_free_frags(struct btusb_data *data)
@@ -2602,7 +2602,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb)
* and being processed the events from there then.
*/
if (test_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) {
- data->evt_skb = skb_clone(skb, GFP_KERNEL);
+ data->evt_skb = skb_clone(skb, GFP_ATOMIC);
if (!data->evt_skb)
goto err_out;
}
@@ -2867,7 +2867,7 @@ static int btusb_mtk_setup_firmware(struct hci_dev *hdev, const char *fwname)
err = btusb_mtk_hci_wmt_sync(hdev, &wmt_params);
if (err < 0) {
bt_dev_err(hdev, "Failed to send wmt rst (%d)", err);
- return err;
+ goto err_release_fw;
}
/* Wait a few moments for firmware activation done */
@@ -3832,6 +3832,10 @@ static int btusb_probe(struct usb_interface *intf,
* (DEVICE_REMOTE_WAKEUP)
*/
set_bit(BTUSB_WAKEUP_DISABLE, &data->flags);
+
+ err = usb_autopm_get_interface(intf);
+ if (err < 0)
+ goto out_free_dev;
}
if (id->driver_info & BTUSB_AMP) {
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index d2a6a4afdbbb..f8f5c593a05c 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -48,6 +48,14 @@
#define BCM_NUM_SUPPLIES 2
/**
+ * struct bcm_device_data - device specific data
+ * @no_early_set_baudrate: Disallow set baudrate before driver setup()
+ */
+struct bcm_device_data {
+ bool no_early_set_baudrate;
+};
+
+/**
* struct bcm_device - device driver resources
* @serdev_hu: HCI UART controller struct
* @list: bcm_device_list node
@@ -79,6 +87,7 @@
* @hu: pointer to HCI UART controller struct,
* used to disable flow control during runtime suspend and system sleep
* @is_suspended: whether flow control is currently disabled
+ * @no_early_set_baudrate: don't set_baudrate before setup()
*/
struct bcm_device {
/* Must be the first member, hci_serdev.c expects this. */
@@ -112,6 +121,8 @@ struct bcm_device {
struct hci_uart *hu;
bool is_suspended;
#endif
+ bool no_early_set_baudrate;
+ u8 pcm_int_params[5];
};
/* generic bcm uart resources */
@@ -447,7 +458,13 @@ out:
if (bcm->dev) {
hci_uart_set_flow_control(hu, true);
hu->init_speed = bcm->dev->init_speed;
- hu->oper_speed = bcm->dev->oper_speed;
+
+ /* If oper_speed is set, ldisc/serdev will set the baudrate
+ * before calling setup()
+ */
+ if (!bcm->dev->no_early_set_baudrate)
+ hu->oper_speed = bcm->dev->oper_speed;
+
err = bcm_gpio_set_power(bcm->dev, true);
hci_uart_set_flow_control(hu, false);
if (err)
@@ -565,6 +582,8 @@ static int bcm_setup(struct hci_uart *hu)
/* Operational speed if any */
if (hu->oper_speed)
speed = hu->oper_speed;
+ else if (bcm->dev && bcm->dev->oper_speed)
+ speed = bcm->dev->oper_speed;
else if (hu->proto->oper_speed)
speed = hu->proto->oper_speed;
else
@@ -576,6 +595,16 @@ static int bcm_setup(struct hci_uart *hu)
host_set_baudrate(hu, speed);
}
+ /* PCM parameters if provided */
+ if (bcm->dev && bcm->dev->pcm_int_params[0] != 0xff) {
+ struct bcm_set_pcm_int_params params;
+
+ btbcm_read_pcm_int_params(hu->hdev, &params);
+
+ memcpy(&params, bcm->dev->pcm_int_params, 5);
+ btbcm_write_pcm_int_params(hu->hdev, &params);
+ }
+
finalize:
release_firmware(fw);
@@ -1113,6 +1142,8 @@ static int bcm_acpi_probe(struct bcm_device *dev)
static int bcm_of_probe(struct bcm_device *bdev)
{
device_property_read_u32(bdev->dev, "max-speed", &bdev->oper_speed);
+ device_property_read_u8_array(bdev->dev, "brcm,bt-pcm-int-params",
+ bdev->pcm_int_params, 5);
return 0;
}
@@ -1128,6 +1159,9 @@ static int bcm_probe(struct platform_device *pdev)
dev->dev = &pdev->dev;
dev->irq = platform_get_irq(pdev, 0);
+ /* Initialize routing field to an unused value */
+ dev->pcm_int_params[0] = 0xff;
+
if (has_acpi_companion(&pdev->dev)) {
ret = bcm_acpi_probe(dev);
if (ret)
@@ -1374,6 +1408,7 @@ static struct platform_driver bcm_driver = {
static int bcm_serdev_probe(struct serdev_device *serdev)
{
struct bcm_device *bcmdev;
+ const struct bcm_device_data *data;
int err;
bcmdev = devm_kzalloc(&serdev->dev, sizeof(*bcmdev), GFP_KERNEL);
@@ -1387,6 +1422,9 @@ static int bcm_serdev_probe(struct serdev_device *serdev)
bcmdev->serdev_hu.serdev = serdev;
serdev_device_set_drvdata(serdev, bcmdev);
+ /* Initialize routing field to an unused value */
+ bcmdev->pcm_int_params[0] = 0xff;
+
if (has_acpi_companion(&serdev->dev))
err = bcm_acpi_probe(bcmdev);
else
@@ -1408,6 +1446,10 @@ static int bcm_serdev_probe(struct serdev_device *serdev)
if (err)
dev_err(&serdev->dev, "Failed to power down\n");
+ data = device_get_match_data(bcmdev->dev);
+ if (data)
+ bcmdev->no_early_set_baudrate = data->no_early_set_baudrate;
+
return hci_uart_register_device(&bcmdev->serdev_hu, &bcm_proto);
}
@@ -1419,12 +1461,16 @@ static void bcm_serdev_remove(struct serdev_device *serdev)
}
#ifdef CONFIG_OF
+static struct bcm_device_data bcm4354_device_data = {
+ .no_early_set_baudrate = true,
+};
+
static const struct of_device_id bcm_bluetooth_of_match[] = {
{ .compatible = "brcm,bcm20702a1" },
{ .compatible = "brcm,bcm4345c5" },
{ .compatible = "brcm,bcm4330-bt" },
{ .compatible = "brcm,bcm43438-bt" },
- { .compatible = "brcm,bcm43540-bt" },
+ { .compatible = "brcm,bcm43540-bt", .data = &bcm4354_device_data },
{ .compatible = "brcm,bcm4335a0" },
{ },
};
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 56887c6877a7..f4d1597df0a2 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -981,7 +981,8 @@ static int sysc_disable_module(struct device *dev)
return ret;
}
- if (ddata->cfg.quirks & SYSC_QUIRK_SWSUP_MSTANDBY)
+ if (ddata->cfg.quirks & (SYSC_QUIRK_SWSUP_MSTANDBY) ||
+ ddata->cfg.quirks & (SYSC_QUIRK_FORCE_MSTANDBY))
best_mode = SYSC_IDLE_FORCE;
reg &= ~(SYSC_IDLE_MASK << regbits->midle_shift);
@@ -1583,6 +1584,10 @@ static int sysc_reset(struct sysc *ddata)
sysc_val |= sysc_mask;
sysc_write(ddata, sysc_offset, sysc_val);
+ if (ddata->cfg.srst_udelay)
+ usleep_range(ddata->cfg.srst_udelay,
+ ddata->cfg.srst_udelay * 2);
+
if (ddata->clk_enable_quirk)
ddata->clk_enable_quirk(ddata);
diff --git a/drivers/char/agp/isoch.c b/drivers/char/agp/isoch.c
index 31c374b1b91b..7ecf20a6d19c 100644
--- a/drivers/char/agp/isoch.c
+++ b/drivers/char/agp/isoch.c
@@ -84,7 +84,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
unsigned int cdev = 0;
u32 mnistat, tnistat, tstatus, mcmd;
u16 tnicmd, mnicmd;
- u8 mcapndx;
u32 tot_bw = 0, tot_n = 0, tot_rq = 0, y_max, rq_isoch, rq_async;
u32 step, rem, rem_isoch, rem_async;
int ret = 0;
@@ -138,8 +137,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
cur = list_entry(pos, struct agp_3_5_dev, list);
dev = cur->dev;
- mcapndx = cur->capndx;
-
pci_read_config_dword(dev, cur->capndx+AGPNISTAT, &mnistat);
master[cdev].maxbw = (mnistat >> 16) & 0xff;
@@ -251,8 +248,6 @@ static int agp_3_5_isochronous_node_enable(struct agp_bridge_data *bridge,
cur = master[cdev].dev;
dev = cur->dev;
- mcapndx = cur->capndx;
-
master[cdev].rq += (cdev == ndevs - 1)
? (rem_async + rem_isoch) : step;
@@ -319,7 +314,7 @@ int agp_3_5_enable(struct agp_bridge_data *bridge)
{
struct pci_dev *td = bridge->dev, *dev = NULL;
u8 mcapndx;
- u32 isoch, arqsz;
+ u32 isoch;
u32 tstatus, mstatus, ncapid;
u32 mmajor;
u16 mpstat;
@@ -334,8 +329,6 @@ int agp_3_5_enable(struct agp_bridge_data *bridge)
if (isoch == 0) /* isoch xfers not available, bail out. */
return -ENODEV;
- arqsz = (tstatus >> 13) & 0x7;
-
/*
* Allocate a head for our AGP 3.5 device list
* (multiple AGP v3 devices are allowed behind a single bridge).
diff --git a/drivers/char/pcmcia/synclink_cs.c b/drivers/char/pcmcia/synclink_cs.c
index 82f9a6a814ae..e342daa73d1b 100644
--- a/drivers/char/pcmcia/synclink_cs.c
+++ b/drivers/char/pcmcia/synclink_cs.c
@@ -4169,7 +4169,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
*
* dev pointer to network device structure
*/
-static void hdlcdev_tx_timeout(struct net_device *dev)
+static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
MGSLPC_INFO *info = dev_to_port(dev);
unsigned long flags;
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 909e0c3d82ea..cda12933a17d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2175,6 +2175,7 @@ const struct file_operations urandom_fops = {
.read = urandom_read,
.write = random_write,
.unlocked_ioctl = random_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
};
diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
index 2ec47a69a2a6..87f449340202 100644
--- a/drivers/char/tpm/tpm-dev-common.c
+++ b/drivers/char/tpm/tpm-dev-common.c
@@ -61,6 +61,12 @@ static void tpm_dev_async_work(struct work_struct *work)
mutex_lock(&priv->buffer_mutex);
priv->command_enqueued = false;
+ ret = tpm_try_get_ops(priv->chip);
+ if (ret) {
+ priv->response_length = ret;
+ goto out;
+ }
+
ret = tpm_dev_transmit(priv->chip, priv->space, priv->data_buffer,
sizeof(priv->data_buffer));
tpm_put_ops(priv->chip);
@@ -68,6 +74,7 @@ static void tpm_dev_async_work(struct work_struct *work)
priv->response_length = ret;
mod_timer(&priv->user_read_timer, jiffies + (120 * HZ));
}
+out:
mutex_unlock(&priv->buffer_mutex);
wake_up_interruptible(&priv->async_wait);
}
@@ -123,7 +130,7 @@ ssize_t tpm_common_read(struct file *file, char __user *buf,
priv->response_read = true;
ret_size = min_t(ssize_t, size, priv->response_length);
- if (!ret_size) {
+ if (ret_size <= 0) {
priv->response_length = 0;
goto out;
}
@@ -204,6 +211,7 @@ ssize_t tpm_common_write(struct file *file, const char __user *buf,
if (file->f_flags & O_NONBLOCK) {
priv->command_enqueued = true;
queue_work(tpm_dev_wq, &priv->async_work);
+ tpm_put_ops(priv->chip);
mutex_unlock(&priv->buffer_mutex);
return size;
}
diff --git a/drivers/char/tpm/tpm-dev.h b/drivers/char/tpm/tpm-dev.h
index 1089fc0bb290..f3742bcc73e3 100644
--- a/drivers/char/tpm/tpm-dev.h
+++ b/drivers/char/tpm/tpm-dev.h
@@ -14,7 +14,7 @@ struct file_priv {
struct work_struct timeout_work;
struct work_struct async_work;
wait_queue_head_t async_wait;
- size_t response_length;
+ ssize_t response_length;
bool response_read;
bool command_enqueued;
diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h
index b9e1547be6b5..5620747da0cf 100644
--- a/drivers/char/tpm/tpm.h
+++ b/drivers/char/tpm/tpm.h
@@ -218,7 +218,6 @@ int tpm2_pcr_read(struct tpm_chip *chip, u32 pcr_idx,
int tpm2_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
struct tpm_digest *digests);
int tpm2_get_random(struct tpm_chip *chip, u8 *dest, size_t max);
-void tpm2_flush_context(struct tpm_chip *chip, u32 handle);
ssize_t tpm2_get_tpm_pt(struct tpm_chip *chip, u32 property_id,
u32 *value, const char *desc);
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
index fdb457704aa7..13696deceae8 100644
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -362,6 +362,7 @@ void tpm2_flush_context(struct tpm_chip *chip, u32 handle)
tpm_transmit_cmd(chip, &buf, 0, "flushing context");
tpm_buf_destroy(&buf);
}
+EXPORT_SYMBOL_GPL(tpm2_flush_context);
struct tpm2_get_cap_out {
u8 more_data;
diff --git a/drivers/char/tpm/tpm_ftpm_tee.c b/drivers/char/tpm/tpm_ftpm_tee.c
index 6640a14dbe48..22bf553ccf9d 100644
--- a/drivers/char/tpm/tpm_ftpm_tee.c
+++ b/drivers/char/tpm/tpm_ftpm_tee.c
@@ -32,7 +32,7 @@ static const uuid_t ftpm_ta_uuid =
0x82, 0xCB, 0x34, 0x3F, 0xB7, 0xF3, 0x78, 0x96);
/**
- * ftpm_tee_tpm_op_recv - retrieve fTPM response.
+ * ftpm_tee_tpm_op_recv() - retrieve fTPM response.
* @chip: the tpm_chip description as specified in driver/char/tpm/tpm.h.
* @buf: the buffer to store data.
* @count: the number of bytes to read.
@@ -61,7 +61,7 @@ static int ftpm_tee_tpm_op_recv(struct tpm_chip *chip, u8 *buf, size_t count)
}
/**
- * ftpm_tee_tpm_op_send - send TPM commands through the TEE shared memory.
+ * ftpm_tee_tpm_op_send() - send TPM commands through the TEE shared memory.
* @chip: the tpm_chip description as specified in driver/char/tpm/tpm.h
* @buf: the buffer to send.
* @len: the number of bytes to send.
@@ -208,7 +208,7 @@ static int ftpm_tee_match(struct tee_ioctl_version_data *ver, const void *data)
}
/**
- * ftpm_tee_probe - initialize the fTPM
+ * ftpm_tee_probe() - initialize the fTPM
* @pdev: the platform_device description.
*
* Return:
@@ -298,7 +298,7 @@ out_tee_session:
}
/**
- * ftpm_tee_remove - remove the TPM device
+ * ftpm_tee_remove() - remove the TPM device
* @pdev: the platform_device description.
*
* Return:
@@ -328,6 +328,19 @@ static int ftpm_tee_remove(struct platform_device *pdev)
return 0;
}
+/**
+ * ftpm_tee_shutdown() - shutdown the TPM device
+ * @pdev: the platform_device description.
+ */
+static void ftpm_tee_shutdown(struct platform_device *pdev)
+{
+ struct ftpm_tee_private *pvt_data = dev_get_drvdata(&pdev->dev);
+
+ tee_shm_free(pvt_data->shm);
+ tee_client_close_session(pvt_data->ctx, pvt_data->session);
+ tee_client_close_context(pvt_data->ctx);
+}
+
static const struct of_device_id of_ftpm_tee_ids[] = {
{ .compatible = "microsoft,ftpm" },
{ }
@@ -341,6 +354,7 @@ static struct platform_driver ftpm_tee_driver = {
},
.probe = ftpm_tee_probe,
.remove = ftpm_tee_remove,
+ .shutdown = ftpm_tee_shutdown,
};
module_platform_driver(ftpm_tee_driver);
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 8af2cee1a762..27c6ca031e23 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -1059,8 +1059,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
goto out_err;
}
- tpm_chip_start(chip);
- chip->flags |= TPM_CHIP_FLAG_IRQ;
if (irq) {
tpm_tis_probe_irq_single(chip, intmask, IRQF_SHARED,
irq);
@@ -1070,7 +1068,6 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
} else {
tpm_tis_probe_irq(chip, intmask);
}
- tpm_chip_stop(chip);
}
rc = tpm_chip_register(chip);
diff --git a/drivers/clk/at91/at91sam9260.c b/drivers/clk/at91/at91sam9260.c
index 0aabe49aed09..a9d4234758d7 100644
--- a/drivers/clk/at91/at91sam9260.c
+++ b/drivers/clk/at91/at91sam9260.c
@@ -348,7 +348,7 @@ static void __init at91sam926x_pmc_setup(struct device_node *np,
return;
mainxtal_name = of_clk_get_parent_name(np, i);
- regmap = syscon_node_to_regmap(np);
+ regmap = device_node_to_regmap(np);
if (IS_ERR(regmap))
return;
diff --git a/drivers/clk/at91/at91sam9rl.c b/drivers/clk/at91/at91sam9rl.c
index 0ac34cdaa106..77fe83a73bf4 100644
--- a/drivers/clk/at91/at91sam9rl.c
+++ b/drivers/clk/at91/at91sam9rl.c
@@ -83,7 +83,7 @@ static void __init at91sam9rl_pmc_setup(struct device_node *np)
return;
mainxtal_name = of_clk_get_parent_name(np, i);
- regmap = syscon_node_to_regmap(np);
+ regmap = device_node_to_regmap(np);
if (IS_ERR(regmap))
return;
diff --git a/drivers/clk/at91/at91sam9x5.c b/drivers/clk/at91/at91sam9x5.c
index 0855f3a80cc7..086cf0b4955c 100644
--- a/drivers/clk/at91/at91sam9x5.c
+++ b/drivers/clk/at91/at91sam9x5.c
@@ -146,7 +146,7 @@ static void __init at91sam9x5_pmc_setup(struct device_node *np,
return;
mainxtal_name = of_clk_get_parent_name(np, i);
- regmap = syscon_node_to_regmap(np);
+ regmap = device_node_to_regmap(np);
if (IS_ERR(regmap))
return;
diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index 0b03cfae3a9d..b71515acdec1 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -275,7 +275,7 @@ static int __init pmc_register_ops(void)
np = of_find_matching_node(NULL, sama5d2_pmc_dt_ids);
- pmcreg = syscon_node_to_regmap(np);
+ pmcreg = device_node_to_regmap(np);
if (IS_ERR(pmcreg))
return PTR_ERR(pmcreg);
diff --git a/drivers/clk/at91/sama5d2.c b/drivers/clk/at91/sama5d2.c
index 0de1108737db..ff7e3f727082 100644
--- a/drivers/clk/at91/sama5d2.c
+++ b/drivers/clk/at91/sama5d2.c
@@ -162,7 +162,7 @@ static void __init sama5d2_pmc_setup(struct device_node *np)
return;
mainxtal_name = of_clk_get_parent_name(np, i);
- regmap = syscon_node_to_regmap(np);
+ regmap = device_node_to_regmap(np);
if (IS_ERR(regmap))
return;
diff --git a/drivers/clk/at91/sama5d4.c b/drivers/clk/at91/sama5d4.c
index 25b156d4e645..a6dee4a3b6e4 100644
--- a/drivers/clk/at91/sama5d4.c
+++ b/drivers/clk/at91/sama5d4.c
@@ -136,7 +136,7 @@ static void __init sama5d4_pmc_setup(struct device_node *np)
return;
mainxtal_name = of_clk_get_parent_name(np, i);
- regmap = syscon_node_to_regmap(np);
+ regmap = device_node_to_regmap(np);
if (IS_ERR(regmap))
return;
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b68e200829f2..6a11239ccde3 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -3249,6 +3249,34 @@ static inline void clk_debug_unregister(struct clk_core *core)
}
#endif
+static void clk_core_reparent_orphans_nolock(void)
+{
+ struct clk_core *orphan;
+ struct hlist_node *tmp2;
+
+ /*
+ * walk the list of orphan clocks and reparent any that newly finds a
+ * parent.
+ */
+ hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
+ struct clk_core *parent = __clk_init_parent(orphan);
+
+ /*
+ * We need to use __clk_set_parent_before() and _after() to
+ * to properly migrate any prepare/enable count of the orphan
+ * clock. This is important for CLK_IS_CRITICAL clocks, which
+ * are enabled during init but might not have a parent yet.
+ */
+ if (parent) {
+ /* update the clk tree topology */
+ __clk_set_parent_before(orphan, parent);
+ __clk_set_parent_after(orphan, parent, NULL);
+ __clk_recalc_accuracies(orphan);
+ __clk_recalc_rates(orphan, 0);
+ }
+ }
+}
+
/**
* __clk_core_init - initialize the data structures in a struct clk_core
* @core: clk_core being initialized
@@ -3259,8 +3287,6 @@ static inline void clk_debug_unregister(struct clk_core *core)
static int __clk_core_init(struct clk_core *core)
{
int ret;
- struct clk_core *orphan;
- struct hlist_node *tmp2;
unsigned long rate;
if (!core)
@@ -3407,27 +3433,8 @@ static int __clk_core_init(struct clk_core *core)
clk_enable_unlock(flags);
}
- /*
- * walk the list of orphan clocks and reparent any that newly finds a
- * parent.
- */
- hlist_for_each_entry_safe(orphan, tmp2, &clk_orphan_list, child_node) {
- struct clk_core *parent = __clk_init_parent(orphan);
+ clk_core_reparent_orphans_nolock();
- /*
- * We need to use __clk_set_parent_before() and _after() to
- * to properly migrate any prepare/enable count of the orphan
- * clock. This is important for CLK_IS_CRITICAL clocks, which
- * are enabled during init but might not have a parent yet.
- */
- if (parent) {
- /* update the clk tree topology */
- __clk_set_parent_before(orphan, parent);
- __clk_set_parent_after(orphan, parent, NULL);
- __clk_recalc_accuracies(orphan);
- __clk_recalc_rates(orphan, 0);
- }
- }
kref_init(&core->ref);
out:
@@ -4179,6 +4186,13 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb)
EXPORT_SYMBOL_GPL(clk_notifier_unregister);
#ifdef CONFIG_OF
+static void clk_core_reparent_orphans(void)
+{
+ clk_prepare_lock();
+ clk_core_reparent_orphans_nolock();
+ clk_prepare_unlock();
+}
+
/**
* struct of_clk_provider - Clock provider registration structure
* @link: Entry in global list of clock providers
@@ -4274,6 +4288,8 @@ int of_clk_add_provider(struct device_node *np,
mutex_unlock(&of_clk_mutex);
pr_debug("Added clock from %pOF\n", np);
+ clk_core_reparent_orphans();
+
ret = of_clk_set_defaults(np, true);
if (ret < 0)
of_clk_del_provider(np);
@@ -4309,6 +4325,8 @@ int of_clk_add_hw_provider(struct device_node *np,
mutex_unlock(&of_clk_mutex);
pr_debug("Added clk_hw provider from %pOF\n", np);
+ clk_core_reparent_orphans();
+
ret = of_clk_set_defaults(np, true);
if (ret < 0)
of_clk_del_provider(np);
diff --git a/drivers/clk/imx/clk-composite-8m.c b/drivers/clk/imx/clk-composite-8m.c
index 388bdb94f841..d3486ee79ab5 100644
--- a/drivers/clk/imx/clk-composite-8m.c
+++ b/drivers/clk/imx/clk-composite-8m.c
@@ -142,6 +142,7 @@ struct clk *imx8m_clk_composite_flags(const char *name,
mux->reg = reg;
mux->shift = PCG_PCS_SHIFT;
mux->mask = PCG_PCS_MASK;
+ mux->lock = &imx_ccm_lock;
div = kzalloc(sizeof(*div), GFP_KERNEL);
if (!div)
@@ -161,6 +162,7 @@ struct clk *imx8m_clk_composite_flags(const char *name,
gate_hw = &gate->hw;
gate->reg = reg;
gate->bit_idx = PCG_CGC_SHIFT;
+ gate->lock = &imx_ccm_lock;
hw = clk_hw_register_composite(NULL, name, parent_names, num_parents,
mux_hw, &clk_mux_ops, div_hw,
diff --git a/drivers/clk/imx/clk-imx7ulp.c b/drivers/clk/imx/clk-imx7ulp.c
index 3fdf3d494f0a..281191b55b3a 100644
--- a/drivers/clk/imx/clk-imx7ulp.c
+++ b/drivers/clk/imx/clk-imx7ulp.c
@@ -40,6 +40,7 @@ static const struct clk_div_table ulp_div_table[] = {
{ .val = 5, .div = 16, },
{ .val = 6, .div = 32, },
{ .val = 7, .div = 64, },
+ { /* sentinel */ },
};
static const int pcc2_uart_clk_ids[] __initconst = {
diff --git a/drivers/clk/imx/clk-pll14xx.c b/drivers/clk/imx/clk-pll14xx.c
index 5c458199060a..3636c8035c7d 100644
--- a/drivers/clk/imx/clk-pll14xx.c
+++ b/drivers/clk/imx/clk-pll14xx.c
@@ -159,7 +159,7 @@ static int clk_pll14xx_wait_lock(struct clk_pll14xx *pll)
{
u32 val;
- return readl_poll_timeout(pll->base, val, val & LOCK_TIMEOUT_US, 0,
+ return readl_poll_timeout(pll->base, val, val & LOCK_STATUS, 0,
LOCK_TIMEOUT_US);
}
diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c
index 38424e63bcae..7f59fb8da033 100644
--- a/drivers/clk/qcom/gcc-sc7180.c
+++ b/drivers/clk/qcom/gcc-sc7180.c
@@ -2186,7 +2186,8 @@ static struct gdsc hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc = {
.pd = {
.name = "hlos1_vote_mmnoc_mmu_tbu_hf0_gdsc",
},
- .pwrsts = PWRSTS_OFF_ON | VOTABLE,
+ .pwrsts = PWRSTS_OFF_ON,
+ .flags = VOTABLE,
};
static struct gdsc hlos1_vote_mmnoc_mmu_tbu_sf_gdsc = {
@@ -2194,7 +2195,8 @@ static struct gdsc hlos1_vote_mmnoc_mmu_tbu_sf_gdsc = {
.pd = {
.name = "hlos1_vote_mmnoc_mmu_tbu_sf_gdsc",
},
- .pwrsts = PWRSTS_OFF_ON | VOTABLE,
+ .pwrsts = PWRSTS_OFF_ON,
+ .flags = VOTABLE,
};
static struct gdsc *gcc_sc7180_gdscs[] = {
diff --git a/drivers/clk/qcom/gpucc-msm8998.c b/drivers/clk/qcom/gpucc-msm8998.c
index e5e2492b20c5..9b3923af02a1 100644
--- a/drivers/clk/qcom/gpucc-msm8998.c
+++ b/drivers/clk/qcom/gpucc-msm8998.c
@@ -242,10 +242,12 @@ static struct clk_branch gfx3d_isense_clk = {
static struct gdsc gpu_cx_gdsc = {
.gdscr = 0x1004,
+ .gds_hw_ctrl = 0x1008,
.pd = {
.name = "gpu_cx",
},
.pwrsts = PWRSTS_OFF_ON,
+ .flags = VOTABLE,
};
static struct gdsc gpu_gx_gdsc = {
diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c
index 4e54856ce2a5..c4f15c4068c0 100644
--- a/drivers/clocksource/timer-riscv.c
+++ b/drivers/clocksource/timer-riscv.c
@@ -56,7 +56,7 @@ static unsigned long long riscv_clocksource_rdtime(struct clocksource *cs)
return get_cycles64();
}
-static u64 riscv_sched_clock(void)
+static u64 notrace riscv_sched_clock(void)
{
return get_cycles64();
}
diff --git a/drivers/cpufreq/vexpress-spc-cpufreq.c b/drivers/cpufreq/vexpress-spc-cpufreq.c
index 506e3f2bf53a..83c85d3d67e3 100644
--- a/drivers/cpufreq/vexpress-spc-cpufreq.c
+++ b/drivers/cpufreq/vexpress-spc-cpufreq.c
@@ -434,7 +434,7 @@ static int ve_spc_cpufreq_init(struct cpufreq_policy *policy)
if (cur_cluster < MAX_CLUSTERS) {
int cpu;
- cpumask_copy(policy->cpus, topology_core_cpumask(policy->cpu));
+ dev_pm_opp_get_sharing_cpus(cpu_dev, policy->cpus);
for_each_cpu(cpu, policy->cpus)
per_cpu(physical_cluster, cpu) = cur_cluster;
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 0005be5ea2b4..33d19c8eb027 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -381,7 +381,8 @@ u64 cpuidle_poll_time(struct cpuidle_driver *drv,
if (dev->states_usage[i].disable)
continue;
- limit_ns = (u64)drv->states[i].target_residency_ns;
+ limit_ns = drv->states[i].target_residency_ns;
+ break;
}
dev->poll_limit_ns = limit_ns;
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index c76423aaef4d..ce6a5f80fb83 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -403,6 +403,13 @@ void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
mutex_lock(&cpuidle_lock);
+ spin_lock(&cpuidle_driver_lock);
+
+ if (!drv->cpumask) {
+ drv->states[idx].flags |= CPUIDLE_FLAG_UNUSABLE;
+ goto unlock;
+ }
+
for_each_cpu(cpu, drv->cpumask) {
struct cpuidle_device *dev = per_cpu(cpuidle_devices, cpu);
@@ -415,5 +422,8 @@ void cpuidle_driver_state_disabled(struct cpuidle_driver *drv, int idx,
dev->states_usage[idx].disable &= ~CPUIDLE_STATE_DISABLED_BY_DRIVER;
}
+unlock:
+ spin_unlock(&cpuidle_driver_lock);
+
mutex_unlock(&cpuidle_lock);
}
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index aca75237bbcf..72e5b0f65a91 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -1273,7 +1273,7 @@ static int chtls_pass_accept_req(struct chtls_dev *cdev, struct sk_buff *skb)
ctx = (struct listen_ctx *)data;
lsk = ctx->lsk;
- if (unlikely(tid >= cdev->tids->ntids)) {
+ if (unlikely(tid_out_of_range(cdev->tids, tid))) {
pr_info("passive open TID %u too large\n", tid);
return 1;
}
diff --git a/drivers/devfreq/Kconfig b/drivers/devfreq/Kconfig
index defe1d438710..35535833b6f7 100644
--- a/drivers/devfreq/Kconfig
+++ b/drivers/devfreq/Kconfig
@@ -83,7 +83,6 @@ config ARM_EXYNOS_BUS_DEVFREQ
select DEVFREQ_GOV_PASSIVE
select DEVFREQ_EVENT_EXYNOS_PPMU
select PM_DEVFREQ_EVENT
- select PM_OPP
help
This adds the common DEVFREQ driver for Exynos Memory bus. Exynos
Memory bus has one more group of memory bus (e.g, MIF and INT block).
@@ -98,7 +97,7 @@ config ARM_TEGRA_DEVFREQ
ARCH_TEGRA_132_SOC || ARCH_TEGRA_124_SOC || \
ARCH_TEGRA_210_SOC || \
COMPILE_TEST
- select PM_OPP
+ depends on COMMON_CLK
help
This adds the DEVFREQ driver for the Tegra family of SoCs.
It reads ACTMON counters of memory controllers and adjusts the
@@ -109,7 +108,6 @@ config ARM_TEGRA20_DEVFREQ
depends on (TEGRA_MC && TEGRA20_EMC) || COMPILE_TEST
depends on COMMON_CLK
select DEVFREQ_GOV_SIMPLE_ONDEMAND
- select PM_OPP
help
This adds the DEVFREQ driver for the Tegra20 family of SoCs.
It reads Memory Controller counters and adjusts the operating
@@ -121,7 +119,6 @@ config ARM_RK3399_DMC_DEVFREQ
select DEVFREQ_EVENT_ROCKCHIP_DFI
select DEVFREQ_GOV_SIMPLE_ONDEMAND
select PM_DEVFREQ_EVENT
- select PM_OPP
help
This adds the DEVFREQ driver for the RK3399 DMC(Dynamic Memory Controller).
It sets the frequency for the memory controller and reads the usage counts
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 425149e8bab0..57f6944d65a6 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -24,11 +24,14 @@
#include <linux/printk.h>
#include <linux/hrtimer.h>
#include <linux/of.h>
+#include <linux/pm_qos.h>
#include "governor.h"
#define CREATE_TRACE_POINTS
#include <trace/events/devfreq.h>
+#define HZ_PER_KHZ 1000
+
static struct class *devfreq_class;
/*
@@ -99,6 +102,54 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq)
}
/**
+ * get_freq_range() - Get the current freq range
+ * @devfreq: the devfreq instance
+ * @min_freq: the min frequency
+ * @max_freq: the max frequency
+ *
+ * This takes into consideration all constraints.
+ */
+static void get_freq_range(struct devfreq *devfreq,
+ unsigned long *min_freq,
+ unsigned long *max_freq)
+{
+ unsigned long *freq_table = devfreq->profile->freq_table;
+ s32 qos_min_freq, qos_max_freq;
+
+ lockdep_assert_held(&devfreq->lock);
+
+ /*
+ * Initialize minimum/maximum frequency from freq table.
+ * The devfreq drivers can initialize this in either ascending or
+ * descending order and devfreq core supports both.
+ */
+ if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
+ *min_freq = freq_table[0];
+ *max_freq = freq_table[devfreq->profile->max_state - 1];
+ } else {
+ *min_freq = freq_table[devfreq->profile->max_state - 1];
+ *max_freq = freq_table[0];
+ }
+
+ /* Apply constraints from PM QoS */
+ qos_min_freq = dev_pm_qos_read_value(devfreq->dev.parent,
+ DEV_PM_QOS_MIN_FREQUENCY);
+ qos_max_freq = dev_pm_qos_read_value(devfreq->dev.parent,
+ DEV_PM_QOS_MAX_FREQUENCY);
+ *min_freq = max(*min_freq, (unsigned long)HZ_PER_KHZ * qos_min_freq);
+ if (qos_max_freq != PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE)
+ *max_freq = min(*max_freq,
+ (unsigned long)HZ_PER_KHZ * qos_max_freq);
+
+ /* Apply constraints from OPP interface */
+ *min_freq = max(*min_freq, devfreq->scaling_min_freq);
+ *max_freq = min(*max_freq, devfreq->scaling_max_freq);
+
+ if (*min_freq > *max_freq)
+ *min_freq = *max_freq;
+}
+
+/**
* devfreq_get_freq_level() - Lookup freq_table for the frequency
* @devfreq: the devfreq instance
* @freq: the target frequency
@@ -351,16 +402,7 @@ int update_devfreq(struct devfreq *devfreq)
err = devfreq->governor->get_target_freq(devfreq, &freq);
if (err)
return err;
-
- /*
- * Adjust the frequency with user freq, QoS and available freq.
- *
- * List from the highest priority
- * max_freq
- * min_freq
- */
- max_freq = min(devfreq->scaling_max_freq, devfreq->max_freq);
- min_freq = max(devfreq->scaling_min_freq, devfreq->min_freq);
+ get_freq_range(devfreq, &min_freq, &max_freq);
if (freq < min_freq) {
freq = min_freq;
@@ -568,26 +610,69 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
void *devp)
{
struct devfreq *devfreq = container_of(nb, struct devfreq, nb);
- int ret;
+ int err = -EINVAL;
mutex_lock(&devfreq->lock);
devfreq->scaling_min_freq = find_available_min_freq(devfreq);
- if (!devfreq->scaling_min_freq) {
- mutex_unlock(&devfreq->lock);
- return -EINVAL;
- }
+ if (!devfreq->scaling_min_freq)
+ goto out;
devfreq->scaling_max_freq = find_available_max_freq(devfreq);
if (!devfreq->scaling_max_freq) {
- mutex_unlock(&devfreq->lock);
- return -EINVAL;
+ devfreq->scaling_max_freq = ULONG_MAX;
+ goto out;
}
- ret = update_devfreq(devfreq);
+ err = update_devfreq(devfreq);
+
+out:
mutex_unlock(&devfreq->lock);
+ if (err)
+ dev_err(devfreq->dev.parent,
+ "failed to update frequency from OPP notifier (%d)\n",
+ err);
- return ret;
+ return NOTIFY_OK;
+}
+
+/**
+ * qos_notifier_call() - Common handler for QoS constraints.
+ * @devfreq: the devfreq instance.
+ */
+static int qos_notifier_call(struct devfreq *devfreq)
+{
+ int err;
+
+ mutex_lock(&devfreq->lock);
+ err = update_devfreq(devfreq);
+ mutex_unlock(&devfreq->lock);
+ if (err)
+ dev_err(devfreq->dev.parent,
+ "failed to update frequency from PM QoS (%d)\n",
+ err);
+
+ return NOTIFY_OK;
+}
+
+/**
+ * qos_min_notifier_call() - Callback for QoS min_freq changes.
+ * @nb: Should be devfreq->nb_min
+ */
+static int qos_min_notifier_call(struct notifier_block *nb,
+ unsigned long val, void *ptr)
+{
+ return qos_notifier_call(container_of(nb, struct devfreq, nb_min));
+}
+
+/**
+ * qos_max_notifier_call() - Callback for QoS max_freq changes.
+ * @nb: Should be devfreq->nb_max
+ */
+static int qos_max_notifier_call(struct notifier_block *nb,
+ unsigned long val, void *ptr)
+{
+ return qos_notifier_call(container_of(nb, struct devfreq, nb_max));
}
/**
@@ -599,16 +684,36 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
static void devfreq_dev_release(struct device *dev)
{
struct devfreq *devfreq = to_devfreq(dev);
+ int err;
mutex_lock(&devfreq_list_lock);
- if (IS_ERR(find_device_devfreq(devfreq->dev.parent))) {
- mutex_unlock(&devfreq_list_lock);
- dev_warn(&devfreq->dev, "releasing devfreq which doesn't exist\n");
- return;
- }
list_del(&devfreq->node);
mutex_unlock(&devfreq_list_lock);
+ err = dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
+ DEV_PM_QOS_MAX_FREQUENCY);
+ if (err && err != -ENOENT)
+ dev_warn(dev->parent,
+ "Failed to remove max_freq notifier: %d\n", err);
+ err = dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
+ DEV_PM_QOS_MIN_FREQUENCY);
+ if (err && err != -ENOENT)
+ dev_warn(dev->parent,
+ "Failed to remove min_freq notifier: %d\n", err);
+
+ if (dev_pm_qos_request_active(&devfreq->user_max_freq_req)) {
+ err = dev_pm_qos_remove_request(&devfreq->user_max_freq_req);
+ if (err)
+ dev_warn(dev->parent,
+ "Failed to remove max_freq request: %d\n", err);
+ }
+ if (dev_pm_qos_request_active(&devfreq->user_min_freq_req)) {
+ err = dev_pm_qos_remove_request(&devfreq->user_min_freq_req);
+ if (err)
+ dev_warn(dev->parent,
+ "Failed to remove min_freq request: %d\n", err);
+ }
+
if (devfreq->profile->exit)
devfreq->profile->exit(devfreq->dev.parent);
@@ -660,6 +765,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
devfreq->dev.parent = dev;
devfreq->dev.class = devfreq_class;
devfreq->dev.release = devfreq_dev_release;
+ INIT_LIST_HEAD(&devfreq->node);
devfreq->profile = profile;
strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
devfreq->previous_freq = profile->initial_freq;
@@ -681,7 +787,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
err = -EINVAL;
goto err_dev;
}
- devfreq->min_freq = devfreq->scaling_min_freq;
devfreq->scaling_max_freq = find_available_max_freq(devfreq);
if (!devfreq->scaling_max_freq) {
@@ -689,7 +794,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
err = -EINVAL;
goto err_dev;
}
- devfreq->max_freq = devfreq->scaling_max_freq;
devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
atomic_set(&devfreq->suspend_count, 0);
@@ -730,6 +834,28 @@ struct devfreq *devfreq_add_device(struct device *dev,
mutex_unlock(&devfreq->lock);
+ err = dev_pm_qos_add_request(dev, &devfreq->user_min_freq_req,
+ DEV_PM_QOS_MIN_FREQUENCY, 0);
+ if (err < 0)
+ goto err_devfreq;
+ err = dev_pm_qos_add_request(dev, &devfreq->user_max_freq_req,
+ DEV_PM_QOS_MAX_FREQUENCY,
+ PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE);
+ if (err < 0)
+ goto err_devfreq;
+
+ devfreq->nb_min.notifier_call = qos_min_notifier_call;
+ err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
+ DEV_PM_QOS_MIN_FREQUENCY);
+ if (err)
+ goto err_devfreq;
+
+ devfreq->nb_max.notifier_call = qos_max_notifier_call;
+ err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_max,
+ DEV_PM_QOS_MAX_FREQUENCY);
+ if (err)
+ goto err_devfreq;
+
mutex_lock(&devfreq_list_lock);
governor = try_then_request_governor(devfreq->governor_name);
@@ -1303,41 +1429,37 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
unsigned long value;
int ret;
+ /*
+ * Protect against theoretical sysfs writes between
+ * device_add and dev_pm_qos_add_request
+ */
+ if (!dev_pm_qos_request_active(&df->user_min_freq_req))
+ return -EAGAIN;
+
ret = sscanf(buf, "%lu", &value);
if (ret != 1)
return -EINVAL;
- mutex_lock(&df->lock);
-
- if (value) {
- if (value > df->max_freq) {
- ret = -EINVAL;
- goto unlock;
- }
- } else {
- unsigned long *freq_table = df->profile->freq_table;
-
- /* Get minimum frequency according to sorting order */
- if (freq_table[0] < freq_table[df->profile->max_state - 1])
- value = freq_table[0];
- else
- value = freq_table[df->profile->max_state - 1];
- }
+ /* Round down to kHz for PM QoS */
+ ret = dev_pm_qos_update_request(&df->user_min_freq_req,
+ value / HZ_PER_KHZ);
+ if (ret < 0)
+ return ret;
- df->min_freq = value;
- update_devfreq(df);
- ret = count;
-unlock:
- mutex_unlock(&df->lock);
- return ret;
+ return count;
}
static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct devfreq *df = to_devfreq(dev);
+ unsigned long min_freq, max_freq;
- return sprintf(buf, "%lu\n", max(df->scaling_min_freq, df->min_freq));
+ mutex_lock(&df->lock);
+ get_freq_range(df, &min_freq, &max_freq);
+ mutex_unlock(&df->lock);
+
+ return sprintf(buf, "%lu\n", min_freq);
}
static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
@@ -1347,33 +1469,37 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
unsigned long value;
int ret;
+ /*
+ * Protect against theoretical sysfs writes between
+ * device_add and dev_pm_qos_add_request
+ */
+ if (!dev_pm_qos_request_active(&df->user_max_freq_req))
+ return -EINVAL;
+
ret = sscanf(buf, "%lu", &value);
if (ret != 1)
return -EINVAL;
- mutex_lock(&df->lock);
-
- if (value) {
- if (value < df->min_freq) {
- ret = -EINVAL;
- goto unlock;
- }
- } else {
- unsigned long *freq_table = df->profile->freq_table;
+ /*
+ * PM QoS frequencies are in kHz so we need to convert. Convert by
+ * rounding upwards so that the acceptable interval never shrinks.
+ *
+ * For example if the user writes "666666666" to sysfs this value will
+ * be converted to 666667 kHz and back to 666667000 Hz before an OPP
+ * lookup, this ensures that an OPP of 666666666Hz is still accepted.
+ *
+ * A value of zero means "no limit".
+ */
+ if (value)
+ value = DIV_ROUND_UP(value, HZ_PER_KHZ);
+ else
+ value = PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE;
- /* Get maximum frequency according to sorting order */
- if (freq_table[0] < freq_table[df->profile->max_state - 1])
- value = freq_table[df->profile->max_state - 1];
- else
- value = freq_table[0];
- }
+ ret = dev_pm_qos_update_request(&df->user_max_freq_req, value);
+ if (ret < 0)
+ return ret;
- df->max_freq = value;
- update_devfreq(df);
- ret = count;
-unlock:
- mutex_unlock(&df->lock);
- return ret;
+ return count;
}
static DEVICE_ATTR_RW(min_freq);
@@ -1381,8 +1507,13 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct devfreq *df = to_devfreq(dev);
+ unsigned long min_freq, max_freq;
+
+ mutex_lock(&df->lock);
+ get_freq_range(df, &min_freq, &max_freq);
+ mutex_unlock(&df->lock);
- return sprintf(buf, "%lu\n", min(df->scaling_max_freq, df->max_freq));
+ return sprintf(buf, "%lu\n", max_freq);
}
static DEVICE_ATTR_RW(max_freq);
diff --git a/drivers/dma-buf/sync_file.c b/drivers/dma-buf/sync_file.c
index 76fb072c22dc..5a5a1da01a00 100644
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@@ -221,7 +221,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,
a_fences = get_fences(a, &a_num_fences);
b_fences = get_fences(b, &b_num_fences);
if (a_num_fences > INT_MAX - b_num_fences)
- return NULL;
+ goto err;
num_fences = a_num_fences + b_num_fences;
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index fa626acdc9b9..44af435628f8 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -999,7 +999,8 @@ static const struct jz4780_dma_soc_data jz4740_dma_soc_data = {
static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = {
.nb_channels = 6,
.transfer_ord_max = 5,
- .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+ .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC |
+ JZ_SOC_DATA_BREAK_LINKS,
};
static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 1a422a8b43cf..18c011e57592 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -377,10 +377,11 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
descs->virt = dma_alloc_coherent(to_dev(ioat_chan),
SZ_2M, &descs->hw, flags);
- if (!descs->virt && (i > 0)) {
+ if (!descs->virt) {
int idx;
for (idx = 0; idx < i; idx++) {
+ descs = &ioat_chan->descs[idx];
dma_free_coherent(to_dev(ioat_chan), SZ_2M,
descs->virt, descs->hw);
descs->virt = NULL;
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index adecea51814f..c5c1aa0dcaed 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -229,9 +229,11 @@ static irqreturn_t k3_dma_int_handler(int irq, void *dev_id)
c = p->vchan;
if (c && (tc1 & BIT(i))) {
spin_lock_irqsave(&c->vc.lock, flags);
- vchan_cookie_complete(&p->ds_run->vd);
- p->ds_done = p->ds_run;
- p->ds_run = NULL;
+ if (p->ds_run != NULL) {
+ vchan_cookie_complete(&p->ds_run->vd);
+ p->ds_done = p->ds_run;
+ p->ds_run = NULL;
+ }
spin_unlock_irqrestore(&c->vc.lock, flags);
}
if (c && (tc2 & BIT(i))) {
@@ -271,6 +273,10 @@ static int k3_dma_start_txd(struct k3_dma_chan *c)
if (BIT(c->phy->idx) & k3_dma_get_chan_stat(d))
return -EAGAIN;
+ /* Avoid losing track of ds_run if a transaction is in flight */
+ if (c->phy->ds_run)
+ return -EAGAIN;
+
if (vd) {
struct k3_dma_desc_sw *ds =
container_of(vd, struct k3_dma_desc_sw, vd);
diff --git a/drivers/dma/virt-dma.c b/drivers/dma/virt-dma.c
index ec4adf4260a0..256fc662c500 100644
--- a/drivers/dma/virt-dma.c
+++ b/drivers/dma/virt-dma.c
@@ -104,9 +104,8 @@ static void vchan_complete(unsigned long arg)
dmaengine_desc_get_callback(&vd->tx, &cb);
list_del(&vd->node);
- vchan_vdesc_fini(vd);
-
dmaengine_desc_callback_invoke(&cb, &vd->tx_result);
+ vchan_vdesc_fini(vd);
}
}
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index 417dad635526..5c8272329a65 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -462,7 +462,7 @@ config EDAC_ALTERA_SDMMC
config EDAC_SIFIVE
bool "Sifive platform EDAC driver"
- depends on EDAC=y && RISCV
+ depends on EDAC=y && SIFIVE_L2
help
Support for error detection and correction on the SiFive SoCs.
diff --git a/drivers/firmware/broadcom/tee_bnxt_fw.c b/drivers/firmware/broadcom/tee_bnxt_fw.c
index 5b7ef89eb701..ed10da5313e8 100644
--- a/drivers/firmware/broadcom/tee_bnxt_fw.c
+++ b/drivers/firmware/broadcom/tee_bnxt_fw.c
@@ -215,7 +215,6 @@ static int tee_bnxt_fw_probe(struct device *dev)
fw_shm_pool = tee_shm_alloc(pvt_data.ctx, MAX_SHM_MEM_SZ,
TEE_SHM_MAPPED | TEE_SHM_DMA_BUF);
if (IS_ERR(fw_shm_pool)) {
- tee_client_close_context(pvt_data.ctx);
dev_err(pvt_data.dev, "tee_shm_alloc failed\n");
err = PTR_ERR(fw_shm_pool);
goto out_sess;
diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c
index c9a0efca17b0..d4077db6dc97 100644
--- a/drivers/firmware/efi/earlycon.c
+++ b/drivers/firmware/efi/earlycon.c
@@ -13,18 +13,57 @@
#include <asm/early_ioremap.h>
+static const struct console *earlycon_console __initdata;
static const struct font_desc *font;
static u32 efi_x, efi_y;
static u64 fb_base;
static pgprot_t fb_prot;
+static void *efi_fb;
+
+/*
+ * EFI earlycon needs to use early_memremap() to map the framebuffer.
+ * But early_memremap() is not usable for 'earlycon=efifb keep_bootcon',
+ * memremap() should be used instead. memremap() will be available after
+ * paging_init() which is earlier than initcall callbacks. Thus adding this
+ * early initcall function early_efi_map_fb() to map the whole EFI framebuffer.
+ */
+static int __init efi_earlycon_remap_fb(void)
+{
+ /* bail if there is no bootconsole or it has been disabled already */
+ if (!earlycon_console || !(earlycon_console->flags & CON_ENABLED))
+ return 0;
+
+ if (pgprot_val(fb_prot) == pgprot_val(PAGE_KERNEL))
+ efi_fb = memremap(fb_base, screen_info.lfb_size, MEMREMAP_WB);
+ else
+ efi_fb = memremap(fb_base, screen_info.lfb_size, MEMREMAP_WC);
+
+ return efi_fb ? 0 : -ENOMEM;
+}
+early_initcall(efi_earlycon_remap_fb);
+
+static int __init efi_earlycon_unmap_fb(void)
+{
+ /* unmap the bootconsole fb unless keep_bootcon has left it enabled */
+ if (efi_fb && !(earlycon_console->flags & CON_ENABLED))
+ memunmap(efi_fb);
+ return 0;
+}
+late_initcall(efi_earlycon_unmap_fb);
static __ref void *efi_earlycon_map(unsigned long start, unsigned long len)
{
+ if (efi_fb)
+ return efi_fb + start;
+
return early_memremap_prot(fb_base + start, len, pgprot_val(fb_prot));
}
static __ref void efi_earlycon_unmap(void *addr, unsigned long len)
{
+ if (efi_fb)
+ return;
+
early_memunmap(addr, len);
}
@@ -201,6 +240,7 @@ static int __init efi_earlycon_setup(struct earlycon_device *device,
efi_earlycon_scroll_up();
device->con->write = efi_earlycon_write;
+ earlycon_console = device->con;
return 0;
}
EARLYCON_DECLARE(efifb, efi_earlycon_setup);
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index d101f072c8f8..2b02cb165f16 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -681,7 +681,7 @@ device_initcall(efi_load_efivars);
{ name }, \
{ prop }, \
offsetof(struct efi_fdt_params, field), \
- FIELD_SIZEOF(struct efi_fdt_params, field) \
+ sizeof_field(struct efi_fdt_params, field) \
}
struct params {
@@ -979,6 +979,24 @@ static int __init efi_memreserve_map_root(void)
return 0;
}
+static int efi_mem_reserve_iomem(phys_addr_t addr, u64 size)
+{
+ struct resource *res, *parent;
+
+ res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
+ if (!res)
+ return -ENOMEM;
+
+ res->name = "reserved";
+ res->flags = IORESOURCE_MEM;
+ res->start = addr;
+ res->end = addr + size - 1;
+
+ /* we expect a conflict with a 'System RAM' region */
+ parent = request_resource_conflict(&iomem_resource, res);
+ return parent ? request_resource(parent, res) : 0;
+}
+
int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
{
struct linux_efi_memreserve *rsv;
@@ -1003,7 +1021,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
rsv->entry[index].size = size;
memunmap(rsv);
- return 0;
+ return efi_mem_reserve_iomem(addr, size);
}
memunmap(rsv);
}
@@ -1013,6 +1031,12 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
if (!rsv)
return -ENOMEM;
+ rc = efi_mem_reserve_iomem(__pa(rsv), SZ_4K);
+ if (rc) {
+ free_page((unsigned long)rsv);
+ return rc;
+ }
+
/*
* The memremap() call above assumes that a linux_efi_memreserve entry
* never crosses a page boundary, so let's ensure that this remains true
@@ -1029,7 +1053,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
efi_memreserve_root->next = __pa(rsv);
spin_unlock(&efi_mem_reserve_persistent_lock);
- return 0;
+ return efi_mem_reserve_iomem(addr, size);
}
static int __init efi_memreserve_root_init(void)
diff --git a/drivers/firmware/efi/libstub/gop.c b/drivers/firmware/efi/libstub/gop.c
index 0101ca4c13b1..b7bf1e993b8b 100644
--- a/drivers/firmware/efi/libstub/gop.c
+++ b/drivers/firmware/efi/libstub/gop.c
@@ -84,30 +84,6 @@ setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line,
}
static efi_status_t
-__gop_query32(efi_system_table_t *sys_table_arg,
- struct efi_graphics_output_protocol_32 *gop32,
- struct efi_graphics_output_mode_info **info,
- unsigned long *size, u64 *fb_base)
-{
- struct efi_graphics_output_protocol_mode_32 *mode;
- efi_graphics_output_protocol_query_mode query_mode;
- efi_status_t status;
- unsigned long m;
-
- m = gop32->mode;
- mode = (struct efi_graphics_output_protocol_mode_32 *)m;
- query_mode = (void *)(unsigned long)gop32->query_mode;
-
- status = __efi_call_early(query_mode, (void *)gop32, mode->mode, size,
- info);
- if (status != EFI_SUCCESS)
- return status;
-
- *fb_base = mode->frame_buffer_base;
- return status;
-}
-
-static efi_status_t
setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
efi_guid_t *proto, unsigned long size, void **gop_handle)
{
@@ -119,7 +95,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
u64 fb_base;
struct efi_pixel_bitmask pixel_info;
int pixel_format;
- efi_status_t status = EFI_NOT_FOUND;
+ efi_status_t status;
u32 *handles = (u32 *)(unsigned long)gop_handle;
int i;
@@ -128,6 +104,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
nr_gops = size / sizeof(u32);
for (i = 0; i < nr_gops; i++) {
+ struct efi_graphics_output_protocol_mode_32 *mode;
struct efi_graphics_output_mode_info *info = NULL;
efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
bool conout_found = false;
@@ -145,9 +122,11 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query32(sys_table_arg, gop32, &info, &size,
- &current_fb_base);
- if (status == EFI_SUCCESS && (!first_gop || conout_found) &&
+ mode = (void *)(unsigned long)gop32->mode;
+ info = (void *)(unsigned long)mode->info;
+ current_fb_base = mode->frame_buffer_base;
+
+ if ((!first_gop || conout_found) &&
info->pixel_format != PIXEL_BLT_ONLY) {
/*
* Systems that use the UEFI Console Splitter may
@@ -175,7 +154,7 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
/* Did we find any GOPs? */
if (!first_gop)
- goto out;
+ return EFI_NOT_FOUND;
/* EFI framebuffer */
si->orig_video_isVGA = VIDEO_TYPE_EFI;
@@ -197,32 +176,8 @@ setup_gop32(efi_system_table_t *sys_table_arg, struct screen_info *si,
si->lfb_size = si->lfb_linelength * si->lfb_height;
si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
- return status;
-}
-
-static efi_status_t
-__gop_query64(efi_system_table_t *sys_table_arg,
- struct efi_graphics_output_protocol_64 *gop64,
- struct efi_graphics_output_mode_info **info,
- unsigned long *size, u64 *fb_base)
-{
- struct efi_graphics_output_protocol_mode_64 *mode;
- efi_graphics_output_protocol_query_mode query_mode;
- efi_status_t status;
- unsigned long m;
-
- m = gop64->mode;
- mode = (struct efi_graphics_output_protocol_mode_64 *)m;
- query_mode = (void *)(unsigned long)gop64->query_mode;
-
- status = __efi_call_early(query_mode, (void *)gop64, mode->mode, size,
- info);
- if (status != EFI_SUCCESS)
- return status;
- *fb_base = mode->frame_buffer_base;
- return status;
+ return EFI_SUCCESS;
}
static efi_status_t
@@ -237,7 +192,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
u64 fb_base;
struct efi_pixel_bitmask pixel_info;
int pixel_format;
- efi_status_t status = EFI_NOT_FOUND;
+ efi_status_t status;
u64 *handles = (u64 *)(unsigned long)gop_handle;
int i;
@@ -246,6 +201,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
nr_gops = size / sizeof(u64);
for (i = 0; i < nr_gops; i++) {
+ struct efi_graphics_output_protocol_mode_64 *mode;
struct efi_graphics_output_mode_info *info = NULL;
efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
bool conout_found = false;
@@ -263,9 +219,11 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
if (status == EFI_SUCCESS)
conout_found = true;
- status = __gop_query64(sys_table_arg, gop64, &info, &size,
- &current_fb_base);
- if (status == EFI_SUCCESS && (!first_gop || conout_found) &&
+ mode = (void *)(unsigned long)gop64->mode;
+ info = (void *)(unsigned long)mode->info;
+ current_fb_base = mode->frame_buffer_base;
+
+ if ((!first_gop || conout_found) &&
info->pixel_format != PIXEL_BLT_ONLY) {
/*
* Systems that use the UEFI Console Splitter may
@@ -293,7 +251,7 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
/* Did we find any GOPs? */
if (!first_gop)
- goto out;
+ return EFI_NOT_FOUND;
/* EFI framebuffer */
si->orig_video_isVGA = VIDEO_TYPE_EFI;
@@ -315,8 +273,8 @@ setup_gop64(efi_system_table_t *sys_table_arg, struct screen_info *si,
si->lfb_size = si->lfb_linelength * si->lfb_height;
si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
-out:
- return status;
+
+ return EFI_SUCCESS;
}
/*
diff --git a/drivers/firmware/efi/rci2-table.c b/drivers/firmware/efi/rci2-table.c
index 76b0c354a027..de1a9a1f9f14 100644
--- a/drivers/firmware/efi/rci2-table.c
+++ b/drivers/firmware/efi/rci2-table.c
@@ -81,6 +81,9 @@ static int __init efi_rci2_sysfs_init(void)
struct kobject *tables_kobj;
int ret = -ENOMEM;
+ if (rci2_table_phys == EFI_INVALID_TABLE_ADDR)
+ return 0;
+
rci2_base = memremap(rci2_table_phys,
sizeof(struct rci2_table_global_hdr),
MEMREMAP_WB);
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 8adffd42f8cb..6ab25fe1c423 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -553,8 +553,8 @@ config GPIO_TEGRA
config GPIO_TEGRA186
tristate "NVIDIA Tegra186 GPIO support"
- default ARCH_TEGRA_186_SOC
- depends on ARCH_TEGRA_186_SOC || COMPILE_TEST
+ default ARCH_TEGRA_186_SOC || ARCH_TEGRA_194_SOC
+ depends on ARCH_TEGRA_186_SOC || ARCH_TEGRA_194_SOC || COMPILE_TEST
depends on OF_GPIO
select GPIOLIB_IRQCHIP
select IRQ_DOMAIN_HIERARCHY
diff --git a/drivers/gpio/gpio-aspeed-sgpio.c b/drivers/gpio/gpio-aspeed-sgpio.c
index 7e99860ca447..8319812593e3 100644
--- a/drivers/gpio/gpio-aspeed-sgpio.c
+++ b/drivers/gpio/gpio-aspeed-sgpio.c
@@ -107,7 +107,7 @@ static void __iomem *bank_reg(struct aspeed_sgpio *gpio,
return gpio->base + bank->irq_regs + GPIO_IRQ_STATUS;
default:
/* acturally if code runs to here, it's an error case */
- BUG_ON(1);
+ BUG();
}
}
diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c
index 56d647a30e3e..c4fdc192ea4e 100644
--- a/drivers/gpio/gpio-mockup.c
+++ b/drivers/gpio/gpio-mockup.c
@@ -226,7 +226,7 @@ static int gpio_mockup_get_direction(struct gpio_chip *gc, unsigned int offset)
int direction;
mutex_lock(&chip->lock);
- direction = !chip->lines[offset].dir;
+ direction = chip->lines[offset].dir;
mutex_unlock(&chip->lock);
return direction;
@@ -395,7 +395,7 @@ static int gpio_mockup_probe(struct platform_device *pdev)
struct gpio_chip *gc;
struct device *dev;
const char *name;
- int rv, base;
+ int rv, base, i;
u16 ngpio;
dev = &pdev->dev;
@@ -447,6 +447,9 @@ static int gpio_mockup_probe(struct platform_device *pdev)
if (!chip->lines)
return -ENOMEM;
+ for (i = 0; i < gc->ngpio; i++)
+ chip->lines[i].dir = GPIO_LINE_DIRECTION_IN;
+
if (device_property_read_bool(dev, "named-gpio-lines")) {
rv = gpio_mockup_name_lines(dev, chip);
if (rv)
diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c
index f1e164cecff8..5ae30de3490a 100644
--- a/drivers/gpio/gpio-mpc8xxx.c
+++ b/drivers/gpio/gpio-mpc8xxx.c
@@ -346,6 +346,7 @@ static int mpc8xxx_probe(struct platform_device *pdev)
return -ENOMEM;
gc = &mpc8xxx_gc->gc;
+ gc->parent = &pdev->dev;
if (of_property_read_bool(np, "little-endian")) {
ret = bgpio_init(gc, &pdev->dev, 4,
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 6652bee01966..9853547e7276 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -568,16 +568,18 @@ static void pca953x_irq_mask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct pca953x_chip *chip = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
- chip->irq_mask[d->hwirq / BANK_SZ] &= ~BIT(d->hwirq % BANK_SZ);
+ clear_bit(hwirq, chip->irq_mask);
}
static void pca953x_irq_unmask(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct pca953x_chip *chip = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
- chip->irq_mask[d->hwirq / BANK_SZ] |= BIT(d->hwirq % BANK_SZ);
+ set_bit(hwirq, chip->irq_mask);
}
static int pca953x_irq_set_wake(struct irq_data *d, unsigned int on)
@@ -635,8 +637,7 @@ static int pca953x_irq_set_type(struct irq_data *d, unsigned int type)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct pca953x_chip *chip = gpiochip_get_data(gc);
- int bank_nb = d->hwirq / BANK_SZ;
- u8 mask = BIT(d->hwirq % BANK_SZ);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
if (!(type & IRQ_TYPE_EDGE_BOTH)) {
dev_err(&chip->client->dev, "irq %d: unsupported type %d\n",
@@ -644,15 +645,8 @@ static int pca953x_irq_set_type(struct irq_data *d, unsigned int type)
return -EINVAL;
}
- if (type & IRQ_TYPE_EDGE_FALLING)
- chip->irq_trig_fall[bank_nb] |= mask;
- else
- chip->irq_trig_fall[bank_nb] &= ~mask;
-
- if (type & IRQ_TYPE_EDGE_RISING)
- chip->irq_trig_raise[bank_nb] |= mask;
- else
- chip->irq_trig_raise[bank_nb] &= ~mask;
+ assign_bit(hwirq, chip->irq_trig_fall, type & IRQ_TYPE_EDGE_FALLING);
+ assign_bit(hwirq, chip->irq_trig_raise, type & IRQ_TYPE_EDGE_RISING);
return 0;
}
@@ -661,10 +655,10 @@ static void pca953x_irq_shutdown(struct irq_data *d)
{
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct pca953x_chip *chip = gpiochip_get_data(gc);
- u8 mask = BIT(d->hwirq % BANK_SZ);
+ irq_hw_number_t hwirq = irqd_to_hwirq(d);
- chip->irq_trig_raise[d->hwirq / BANK_SZ] &= ~mask;
- chip->irq_trig_fall[d->hwirq / BANK_SZ] &= ~mask;
+ clear_bit(hwirq, chip->irq_trig_raise);
+ clear_bit(hwirq, chip->irq_trig_fall);
}
static bool pca953x_irq_pending(struct pca953x_chip *chip, unsigned long *pending)
diff --git a/drivers/gpio/gpio-xgs-iproc.c b/drivers/gpio/gpio-xgs-iproc.c
index 773e5c24309e..b21c2e436b61 100644
--- a/drivers/gpio/gpio-xgs-iproc.c
+++ b/drivers/gpio/gpio-xgs-iproc.c
@@ -280,7 +280,7 @@ static int iproc_gpio_probe(struct platform_device *pdev)
return 0;
}
-static int __exit iproc_gpio_remove(struct platform_device *pdev)
+static int iproc_gpio_remove(struct platform_device *pdev)
{
struct iproc_gpio_chip *chip;
diff --git a/drivers/gpio/gpio-xtensa.c b/drivers/gpio/gpio-xtensa.c
index 08d7c3b32038..c8af34a6368f 100644
--- a/drivers/gpio/gpio-xtensa.c
+++ b/drivers/gpio/gpio-xtensa.c
@@ -44,15 +44,14 @@ static inline unsigned long enable_cp(unsigned long *cpenable)
unsigned long flags;
local_irq_save(flags);
- RSR_CPENABLE(*cpenable);
- WSR_CPENABLE(*cpenable | BIT(XCHAL_CP_ID_XTIOP));
-
+ *cpenable = xtensa_get_sr(cpenable);
+ xtensa_set_sr(*cpenable | BIT(XCHAL_CP_ID_XTIOP), cpenable);
return flags;
}
static inline void disable_cp(unsigned long flags, unsigned long cpenable)
{
- WSR_CPENABLE(cpenable);
+ xtensa_set_sr(cpenable, cpenable);
local_irq_restore(flags);
}
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index dc27b1a88e93..b696e4598a24 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -23,6 +23,29 @@
#include "gpiolib.h"
#include "gpiolib-of.h"
+/**
+ * of_gpio_spi_cs_get_count() - special GPIO counting for SPI
+ * Some elder GPIO controllers need special quirks. Currently we handle
+ * the Freescale GPIO controller with bindings that doesn't use the
+ * established "cs-gpios" for chip selects but instead rely on
+ * "gpios" for the chip select lines. If we detect this, we redirect
+ * the counting of "cs-gpios" to count "gpios" transparent to the
+ * driver.
+ */
+static int of_gpio_spi_cs_get_count(struct device *dev, const char *con_id)
+{
+ struct device_node *np = dev->of_node;
+
+ if (!IS_ENABLED(CONFIG_SPI_MASTER))
+ return 0;
+ if (!con_id || strcmp(con_id, "cs"))
+ return 0;
+ if (!of_device_is_compatible(np, "fsl,spi") &&
+ !of_device_is_compatible(np, "aeroflexgaisler,spictrl"))
+ return 0;
+ return of_gpio_named_count(np, "gpios");
+}
+
/*
* This is used by external users of of_gpio_count() from <linux/of_gpio.h>
*
@@ -35,6 +58,10 @@ int of_gpio_get_count(struct device *dev, const char *con_id)
char propname[32];
unsigned int i;
+ ret = of_gpio_spi_cs_get_count(dev, con_id);
+ if (ret > 0)
+ return ret;
+
for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
if (con_id)
snprintf(propname, sizeof(propname), "%s-%s",
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 9913886ede90..78a16e42f222 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -220,6 +220,14 @@ int gpiod_get_direction(struct gpio_desc *desc)
chip = gpiod_to_chip(desc);
offset = gpio_chip_hwgpio(desc);
+ /*
+ * Open drain emulation using input mode may incorrectly report
+ * input here, fix that up.
+ */
+ if (test_bit(FLAG_OPEN_DRAIN, &desc->flags) &&
+ test_bit(FLAG_IS_OUT, &desc->flags))
+ return 0;
+
if (!chip->get_direction)
return -ENOTSUPP;
@@ -4472,8 +4480,9 @@ static struct gpio_desc *gpiod_find(struct device *dev, const char *con_id,
if (chip->ngpio <= p->chip_hwnum) {
dev_err(dev,
- "requested GPIO %d is out of range [0..%d] for chip %s\n",
- idx, chip->ngpio, chip->label);
+ "requested GPIO %u (%u) is out of range [0..%u] for chip %s\n",
+ idx, p->chip_hwnum, chip->ngpio - 1,
+ chip->label);
return ERR_PTR(-EINVAL);
}
diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig
index d968c2471412..0d12ebf66174 100644
--- a/drivers/gpu/drm/amd/acp/Kconfig
+++ b/drivers/gpu/drm/amd/acp/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
menu "ACP (Audio CoProcessor) Configuration"
config DRM_AMD_ACP
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 2e98c016cb47..9375e7f12420 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
config DRM_AMDGPU_SI
bool "Enable amdgpu support for SI parts"
depends on DRM_AMDGPU
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index a97fb759e2f4..3e35a8f2c5e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -613,7 +613,17 @@ static bool amdgpu_atpx_detect(void)
bool d3_supported = false;
struct pci_dev *parent_pdev;
- while ((pdev = pci_get_class(PCI_BASE_CLASS_DISPLAY << 16, pdev)) != NULL) {
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+ vga_count++;
+
+ has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true);
+
+ parent_pdev = pci_upstream_bridge(pdev);
+ d3_supported |= parent_pdev && parent_pdev->bridge_d3;
+ amdgpu_atpx_get_quirks(pdev);
+ }
+
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
vga_count++;
has_atpx |= (amdgpu_atpx_pci_probe_handle(pdev) == true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 2cdaf3b2a721..6614d8a6f4c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -604,11 +604,8 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
continue;
}
- for (i = 0; i < num_entities; i++) {
- mutex_lock(&ctx->adev->lock_reset);
+ for (i = 0; i < num_entities; i++)
drm_sched_entity_fini(&ctx->entities[0][i].entity);
- mutex_unlock(&ctx->adev->lock_reset);
- }
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 44be3a45b25e..e1b8d8daeafc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -1488,7 +1488,7 @@ out:
/* Start rlc autoload after psp recieved all the gfx firmware */
if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ?
- AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) {
+ AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) {
ret = psp_rlc_autoload(psp);
if (ret) {
DRM_ERROR("Failed to start rlc autoload\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 410587b950f3..914acecda5cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -292,10 +292,10 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MEC2_JT,
AMDGPU_UCODE_ID_CP_MES,
AMDGPU_UCODE_ID_CP_MES_DATA,
- AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,
+ AMDGPU_UCODE_ID_RLC_G,
AMDGPU_UCODE_ID_STORAGE,
AMDGPU_UCODE_ID_SMC,
AMDGPU_UCODE_ID_UVD,
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 16fbd2bc8ad1..4043ebcea5de 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -268,23 +268,29 @@ static void df_v3_6_update_medium_grain_clock_gating(struct amdgpu_device *adev,
{
u32 tmp;
- /* Put DF on broadcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, true);
-
- if (enable && (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG)) {
- tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
- tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
- WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
- } else {
- tmp = RREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater);
- tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
- tmp |= DF_V3_6_MGCG_DISABLE;
- WREG32_SOC15(DF, 0, mmDF_PIE_AON0_DfGlobalClkGater, tmp);
- }
+ if (adev->cg_flags & AMD_CG_SUPPORT_DF_MGCG) {
+ /* Put DF on broadcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, true);
+
+ if (enable) {
+ tmp = RREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_ENABLE_15_CYCLE_DELAY;
+ WREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ } else {
+ tmp = RREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater);
+ tmp &= ~DF_PIE_AON0_DfGlobalClkGater__MGCGMode_MASK;
+ tmp |= DF_V3_6_MGCG_DISABLE;
+ WREG32_SOC15(DF, 0,
+ mmDF_PIE_AON0_DfGlobalClkGater, tmp);
+ }
- /* Exit broadcast mode */
- adev->df_funcs->enable_broadcast_mode(adev, false);
+ /* Exit broadcast mode */
+ adev->df_funcs->enable_broadcast_mode(adev, false);
+ }
}
static void df_v3_6_get_clockgating_state(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index f2c1b026397b..ba9e53a1abc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -117,10 +117,13 @@ static const struct soc15_reg_golden golden_settings_gc_10_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0x10000000, 0x10000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffff9fff, 0x00001188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070104),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000100, 0x00000130),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
@@ -162,10 +165,13 @@ static const struct soc15_reg_golden golden_settings_gc_10_1_1[] =
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CGTT_SCLK_CTRL, 0xffff0fff, 0x10000100),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL2, 0xffffffff, 0x1402002f),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmGL2C_CTRL3, 0xffffbfff, 0x00000188),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_TIMEOUT_COUNTER, 0xffffffff, 0x00000800),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x08000009),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00400000, 0x04440000),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_2, 0x00000800, 0x00000820),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_SPARE, 0xffffffff, 0xffff3101),
+ SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL, 0x001f0000, 0x00070105),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ALU_CLK_CTRL, 0xffffffff, 0xffffffff),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_ARB_CONFIG, 0x00000133, 0x00000130),
SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_LDS_CLK_CTRL, 0xffffffff, 0xffffffff),
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 983db77999e7..52a647d7022d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6146,7 +6146,23 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
- /* EVENT_WRITE_EOP - flush caches, send int */
+ /* Workaround for cache flush problems. First send a dummy EOP
+ * event down the pipe with seq one below.
+ */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
+ amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
+ EOP_TC_ACTION_EN |
+ EOP_TC_WB_ACTION_EN |
+ EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ EVENT_INDEX(5)));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
+ DATA_SEL(1) | INT_SEL(0));
+ amdgpu_ring_write(ring, lower_32_bits(seq - 1));
+ amdgpu_ring_write(ring, upper_32_bits(seq - 1));
+
+ /* Then send the real EOP event down the pipe:
+ * EVENT_WRITE_EOP - flush caches, send int */
amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
EOP_TC_ACTION_EN |
@@ -6888,7 +6904,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5 + /* COND_EXEC */
7 + /* PIPELINE_SYNC */
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */
- 8 + /* FENCE for VM_FLUSH */
+ 12 + /* FENCE for VM_FLUSH */
20 + /* GDS switch */
4 + /* double SWITCH_BUFFER,
the first COND_EXEC jump to the place just
@@ -6900,7 +6916,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
31 + /* DE_META */
3 + /* CNTX_CTRL */
5 + /* HDP_INVL */
- 8 + 8 + /* FENCE x2 */
+ 12 + 12 + /* FENCE x2 */
2, /* SWITCH_BUFFER */
.emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 66328ffa395a..97105a5bb246 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1052,17 +1052,10 @@ static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
case CHIP_VEGA20:
break;
case CHIP_RAVEN:
- /* Disable GFXOFF on original raven. There are combinations
- * of sbios and platforms that are not stable.
- */
- if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8))
- adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
- else if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
- &&((adev->gfx.rlc_fw_version != 106 &&
- adev->gfx.rlc_fw_version < 531) ||
- (adev->gfx.rlc_fw_version == 53815) ||
- (adev->gfx.rlc_feature_version < 1) ||
- !adev->gfx.rlc.is_rlc_v2_1))
+ if (!(adev->rev_id >= 0x8 ||
+ adev->pdev->device == 0x15d8) &&
+ (adev->pm.fw_version < 0x41e2b || /* not raven1 fresh */
+ !adev->gfx.rlc.is_rlc_v2_1)) /* without rlc save restore ucodes */
adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
if (adev->pm.pp_feature & PP_GFXOFF_MASK)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 232469507446..f5725336a5f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -219,6 +219,21 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
return req;
}
+/**
+ * gmc_v10_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) &&
+ (!amdgpu_sriov_vf(adev)));
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -229,6 +244,7 @@ static uint32_t gmc_v10_0_get_invalidate_req(unsigned int vmid,
static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
unsigned int vmhub, uint32_t flush_type)
{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = &adev->vmhub[vmhub];
u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
/* Use register 17 for GART */
@@ -244,8 +260,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) {
+ if (use_semaphore) {
for (i = 0; i < adev->usec_timeout; i++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
@@ -278,8 +293,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid,
}
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
@@ -369,6 +383,7 @@ error_alloc:
static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
+ bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v10_0_get_invalidate_req(vmid, 0);
unsigned eng = ring->vm_inv_eng;
@@ -381,8 +396,7 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
- ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/* a read return value of 1 means semaphore acuqire */
amdgpu_ring_emit_reg_wait(ring,
hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
@@ -398,8 +412,7 @@ static uint64_t gmc_v10_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
req, 1 << vmid);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
- ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3c355fb5d2b4..a5b68b5e452f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -416,6 +416,24 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
return req;
}
+/**
+ * gmc_v9_0_use_invalidate_semaphore - judge whether to use semaphore
+ *
+ * @adev: amdgpu_device pointer
+ * @vmhub: vmhub type
+ *
+ */
+static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev,
+ uint32_t vmhub)
+{
+ return ((vmhub == AMDGPU_MMHUB_0 ||
+ vmhub == AMDGPU_MMHUB_1) &&
+ (!amdgpu_sriov_vf(adev)) &&
+ (!(adev->asic_type == CHIP_RAVEN &&
+ adev->rev_id < 0x8 &&
+ adev->pdev->device == 0x15d8)));
+}
+
/*
* GART
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -435,6 +453,7 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid,
static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type)
{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
const unsigned eng = 17;
u32 j, tmp;
struct amdgpu_vmhub *hub;
@@ -468,8 +487,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1) {
+ if (use_semaphore) {
for (j = 0; j < adev->usec_timeout; j++) {
/* a read return value of 1 means semaphore acuqire */
tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_sem + eng);
@@ -499,8 +517,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
}
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (vmhub == AMDGPU_MMHUB_0 ||
- vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
@@ -518,6 +535,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
unsigned vmid, uint64_t pd_addr)
{
+ bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(ring->adev, ring->funcs->vmhub);
struct amdgpu_device *adev = ring->adev;
struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub];
uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0);
@@ -531,8 +549,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
*/
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
- ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/* a read return value of 1 means semaphore acuqire */
amdgpu_ring_emit_reg_wait(ring,
hub->vm_inv_eng0_sem + eng, 0x1, 0x1);
@@ -548,8 +565,7 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,
req, 1 << vmid);
/* TODO: It needs to continue working on debugging with semaphore for GFXHUB as well. */
- if (ring->funcs->vmhub == AMDGPU_MMHUB_0 ||
- ring->funcs->vmhub == AMDGPU_MMHUB_1)
+ if (use_semaphore)
/*
* add semaphore release after invalidation,
* write with 0 means semaphore release
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index ba0e68057a89..b3672d10ea54 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
#
# Heterogenous system architecture configuration
#
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 313183b80032..ae161fe86ebb 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0-only
+# SPDX-License-Identifier: MIT
menu "Display Engine Configuration"
depends on DRM && DRM_AMDGPU
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 7873abea4112..5c3fcaa47410 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -1625,6 +1625,7 @@ static enum bp_result construct_integrated_info(
/* Don't need to check major revision as they are all 1 */
switch (revision.minor) {
case 11:
+ case 12:
result = get_integrated_info_v11(bp, info);
break;
default:
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
index 790a2d211bd6..35c55e54eac0 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c
@@ -471,12 +471,28 @@ static void rn_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
}
+static bool rn_are_clock_states_equal(struct dc_clocks *a,
+ struct dc_clocks *b)
+{
+ if (a->dispclk_khz != b->dispclk_khz)
+ return false;
+ else if (a->dppclk_khz != b->dppclk_khz)
+ return false;
+ else if (a->dcfclk_khz != b->dcfclk_khz)
+ return false;
+ else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
+ return false;
+
+ return true;
+}
+
+
static struct clk_mgr_funcs dcn21_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.update_clocks = rn_update_clocks,
.init_clocks = rn_init_clocks,
.enable_pme_wa = rn_enable_pme_wa,
- /* .dump_clk_registers = rn_dump_clk_registers, */
+ .are_clock_states_equal = rn_are_clock_states_equal,
.notify_wm_ranges = rn_notify_wm_ranges
};
@@ -518,36 +534,83 @@ struct clk_bw_params rn_bw_params = {
.num_entries = 4,
},
- .wm_table = {
- .entries = {
- {
- .wm_inst = WM_A,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 23.84,
- .valid = true,
- },
- {
- .wm_inst = WM_B,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 23.84,
- .valid = true,
- },
- {
- .wm_inst = WM_C,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 23.84,
- .valid = true,
- },
- {
- .wm_inst = WM_D,
- .wm_type = WM_TYPE_PSTATE_CHG,
- .pstate_latency_us = 23.84,
- .valid = true,
- },
+};
+
+struct wm_table ddr4_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 6.09,
+ .sr_enter_plus_exit_time_us = 7.14,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 11.72,
+ .sr_exit_time_us = 10.12,
+ .sr_enter_plus_exit_time_us = 11.48,
+ .valid = true,
},
}
};
+struct wm_table lpddr4_wm_table = {
+ .entries = {
+ {
+ .wm_inst = WM_A,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 23.84,
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 17.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_B,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 23.84,
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 17.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_C,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 23.84,
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 17.0,
+ .valid = true,
+ },
+ {
+ .wm_inst = WM_D,
+ .wm_type = WM_TYPE_PSTATE_CHG,
+ .pstate_latency_us = 23.84,
+ .sr_exit_time_us = 12.5,
+ .sr_enter_plus_exit_time_us = 17.0,
+ .valid = true,
+ },
+ }
+};
+
+
static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
{
int i;
@@ -561,7 +624,7 @@ static unsigned int find_dcfclk_for_voltage(struct dpm_clocks *clock_table, unsi
return 0;
}
-static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct hw_asic_id *asic_id)
+static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params, struct dpm_clocks *clock_table, struct integrated_info *bios_info)
{
int i, j = 0;
@@ -593,8 +656,8 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
bw_params->clk_table.entries[i].dcfclk_mhz = find_dcfclk_for_voltage(clock_table, clock_table->FClocks[j].Vol);
}
- bw_params->vram_type = asic_id->vram_type;
- bw_params->num_channels = asic_id->vram_width / DDR4_DRAM_WIDTH;
+ bw_params->vram_type = bios_info->memory_type;
+ bw_params->num_channels = bios_info->ma_channel_number;
for (i = 0; i < WM_SET_COUNT; i++) {
bw_params->wm_table.entries[i].wm_inst = i;
@@ -669,15 +732,24 @@ void rn_clk_mgr_construct(
ASSERT(clk_mgr->base.dprefclk_khz == 600000);
clk_mgr->base.dprefclk_khz = 600000;
}
+
+ if (ctx->dc_bios->integrated_info->memory_type == LpDdr4MemType) {
+ rn_bw_params.wm_table = lpddr4_wm_table;
+ } else {
+ rn_bw_params.wm_table = ddr4_wm_table;
+ }
}
dce_clock_read_ss_info(clk_mgr);
+
clk_mgr->base.bw_params = &rn_bw_params;
if (pp_smu && pp_smu->rn_funcs.get_dpm_clock_table) {
pp_smu->rn_funcs.get_dpm_clock_table(&pp_smu->rn_funcs.pp_smu, &clock_table);
- rn_clk_mgr_helper_populate_bw_params(clk_mgr->base.bw_params, &clock_table, &ctx->asic_id);
+ if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
+ rn_clk_mgr_helper_populate_bw_params (clk_mgr->base.bw_params, &clock_table, ctx->dc_bios->integrated_info);
+ }
}
if (!IS_FPGA_MAXIMUS_DC(ctx->dce_environment) && clk_mgr->smu_ver >= 0x00371500) {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 12ba6fdf89b7..62d8289abb4e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -372,7 +372,7 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
if (GPIO_RESULT_OK != dal_ddc_open(
ddc, GPIO_MODE_INPUT, GPIO_DDC_CONFIG_TYPE_MODE_I2C)) {
- dal_gpio_destroy_ddc(&ddc);
+ dal_ddc_close(ddc);
return present;
}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
index 7f904d55c1bc..81789191d4ec 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
@@ -586,7 +586,7 @@ bool dal_ddc_service_query_ddc_data(
bool dal_ddc_submit_aux_command(struct ddc_service *ddc,
struct aux_payload *payload)
{
- uint8_t retrieved = 0;
+ uint32_t retrieved = 0;
bool ret = 0;
if (!ddc)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 0f59b68aa4c2..504055fc70e8 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -3522,7 +3522,14 @@ void dp_set_fec_enable(struct dc_link *link, bool enable)
if (link_enc->funcs->fec_set_enable &&
link->dpcd_caps.fec_cap.bits.FEC_CAPABLE) {
if (link->fec_state == dc_link_fec_ready && enable) {
- msleep(1);
+ /* Accord to DP spec, FEC enable sequence can first
+ * be transmitted anytime after 1000 LL codes have
+ * been transmitted on the link after link training
+ * completion. Using 1 lane RBR should have the maximum
+ * time for transmitting 1000 LL codes which is 6.173 us.
+ * So use 7 microseconds delay instead.
+ */
+ udelay(7);
link_enc->funcs->fec_set_enable(link_enc, true);
link->fec_state = dc_link_fec_enabled;
} else if (link->fec_state == dc_link_fec_enabled && !enable) {
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
index e472608faf33..793c0cec407f 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_aux.c
@@ -583,6 +583,8 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
uint8_t reply;
bool payload_reply = true;
enum aux_channel_operation_result operation_result;
+ bool retry_on_defer = false;
+
int aux_ack_retries = 0,
aux_defer_retries = 0,
aux_i2c_defer_retries = 0,
@@ -613,8 +615,10 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
break;
case AUX_TRANSACTION_REPLY_AUX_DEFER:
- case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK:
case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_DEFER:
+ retry_on_defer = true;
+ /* fall through */
+ case AUX_TRANSACTION_REPLY_I2C_OVER_AUX_NACK:
if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES) {
goto fail;
} else {
@@ -647,15 +651,24 @@ bool dce_aux_transfer_with_retries(struct ddc_service *ddc,
break;
case AUX_CHANNEL_OPERATION_FAILED_TIMEOUT:
- if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES)
- goto fail;
- else {
- /*
- * DP 1.4, 2.8.2: AUX Transaction Response/Reply Timeouts
- * According to the DP spec there should be 3 retries total
- * with a 400us wait inbetween each. Hardware already waits
- * for 550us therefore no wait is required here.
- */
+ // Check whether a DEFER had occurred before the timeout.
+ // If so, treat timeout as a DEFER.
+ if (retry_on_defer) {
+ if (++aux_defer_retries >= AUX_MAX_DEFER_RETRIES)
+ goto fail;
+ else if (payload->defer_delay > 0)
+ msleep(payload->defer_delay);
+ } else {
+ if (++aux_timeout_retries >= AUX_MAX_TIMEOUT_RETRIES)
+ goto fail;
+ else {
+ /*
+ * DP 1.4, 2.8.2: AUX Transaction Response/Reply Timeouts
+ * According to the DP spec there should be 3 retries total
+ * with a 400us wait inbetween each. Hardware already waits
+ * for 550us therefore no wait is required here.
+ */
+ }
}
break;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
index 63f3bddba7da..10b47986526b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: MIT
#
# Makefile for DCN.
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 09793336d84f..23ff2f1c75b5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -923,7 +923,9 @@ static const struct resource_caps res_cap_nv14 = {
.num_dwb = 1,
.num_ddc = 5,
.num_vmid = 16,
+#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
.num_dsc = 5,
+#endif
};
static const struct dc_debug_options debug_defaults_drv = {
@@ -1536,13 +1538,20 @@ enum dc_status dcn20_build_mapped_resource(const struct dc *dc, struct dc_state
static void acquire_dsc(struct resource_context *res_ctx,
const struct resource_pool *pool,
- struct display_stream_compressor **dsc)
+ struct display_stream_compressor **dsc,
+ int pipe_idx)
{
int i;
ASSERT(*dsc == NULL);
*dsc = NULL;
+ if (pool->res_cap->num_dsc == pool->res_cap->num_opp) {
+ *dsc = pool->dscs[pipe_idx];
+ res_ctx->is_dsc_acquired[pipe_idx] = true;
+ return;
+ }
+
/* Find first free DSC */
for (i = 0; i < pool->res_cap->num_dsc; i++)
if (!res_ctx->is_dsc_acquired[i]) {
@@ -1585,7 +1594,7 @@ static enum dc_status add_dsc_to_stream_resource(struct dc *dc,
if (pipe_ctx->stream != dc_stream)
continue;
- acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc);
+ acquire_dsc(&dc_ctx->res_ctx, pool, &pipe_ctx->stream_res.dsc, i);
/* The number of DSCs can be less than the number of pipes */
if (!pipe_ctx->stream_res.dsc) {
@@ -1785,7 +1794,7 @@ bool dcn20_split_stream_for_odm(
next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
if (next_odm_pipe->stream->timing.flags.DSC == 1) {
- acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc);
+ acquire_dsc(res_ctx, pool, &next_odm_pipe->stream_res.dsc, next_odm_pipe->pipe_idx);
ASSERT(next_odm_pipe->stream_res.dsc);
if (next_odm_pipe->stream_res.dsc == NULL)
return false;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
index 4b3401616434..fcb3877b4fcb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_stream_encoder.c
@@ -492,15 +492,23 @@ void enc2_stream_encoder_dp_unblank(
DP_VID_N_MUL, n_multiply);
}
- /* set DIG_START to 0x1 to reset FIFO */
+ /* make sure stream is disabled before resetting steer fifo */
+ REG_UPDATE(DP_VID_STREAM_CNTL, DP_VID_STREAM_ENABLE, false);
+ REG_WAIT(DP_VID_STREAM_CNTL, DP_VID_STREAM_STATUS, 0, 10, 5000);
+ /* set DIG_START to 0x1 to reset FIFO */
REG_UPDATE(DIG_FE_CNTL, DIG_START, 1);
+ udelay(1);
/* write 0 to take the FIFO out of reset */
REG_UPDATE(DIG_FE_CNTL, DIG_START, 0);
- /* switch DP encoder to CRTC data */
+ /* switch DP encoder to CRTC data, but reset it the fifo first. It may happen
+ * that it overflows during mode transition, and sometimes doesn't recover.
+ */
+ REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 1);
+ udelay(10);
REG_UPDATE(DP_STEER_FIFO, DP_STEER_FIFO_RESET, 0);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
index 14113ccf498d..5b8c17564bc1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: MIT
#
# Makefile for DCN21.
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index 459bd9a5caed..b29b2c99a564 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -23,6 +23,8 @@
*
*/
+#include <linux/slab.h>
+
#include "dm_services.h"
#include "dc.h"
@@ -257,7 +259,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
.vmm_page_size_bytes = 4096,
.dram_clock_change_latency_us = 23.84,
.return_bus_width_bytes = 64,
- .dispclk_dppclk_vco_speed_mhz = 3550,
+ .dispclk_dppclk_vco_speed_mhz = 3600,
.xfc_bus_transport_time_us = 4,
.xfc_xbuf_latency_tolerance_us = 4,
.use_urgent_burst_bw = 1,
@@ -1000,6 +1002,8 @@ static void calculate_wm_set_for_vlevel(
pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz;
dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us;
+ dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us;
+ dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us;
wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000;
wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000;
@@ -1017,14 +1021,21 @@ static void calculate_wm_set_for_vlevel(
static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_st *bb)
{
+ int i;
+
kernel_fpu_begin();
if (dc->bb_overrides.sr_exit_time_ns) {
- bb->sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us =
+ dc->bb_overrides.sr_exit_time_ns / 1000.0;
+ }
}
if (dc->bb_overrides.sr_enter_plus_exit_time_ns) {
- bb->sr_enter_plus_exit_time_us =
- dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].sr_enter_plus_exit_time_us =
+ dc->bb_overrides.sr_enter_plus_exit_time_ns / 1000.0;
+ }
}
if (dc->bb_overrides.urgent_latency_ns) {
@@ -1032,9 +1043,12 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s
}
if (dc->bb_overrides.dram_clock_change_latency_ns) {
- bb->dram_clock_change_latency_us =
+ for (i = 0; i < WM_SET_COUNT; i++) {
+ dc->clk_mgr->bw_params->wm_table.entries[i].pstate_latency_us =
dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
+ }
}
+
kernel_fpu_end();
}
diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
index 970737217e53..641ffb7cfaed 100644
--- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: MIT
#
# Makefile for the 'dsc' sub-component of DAL.
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
index 4e18e77dcf42..026e6a2a2c44 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr.h
@@ -69,6 +69,8 @@ struct wm_range_table_entry {
unsigned int wm_inst;
unsigned int wm_type;
double pstate_latency_us;
+ double sr_exit_time_us;
+ double sr_enter_plus_exit_time_us;
bool valid;
};
diff --git a/drivers/gpu/drm/amd/display/include/i2caux_interface.h b/drivers/gpu/drm/amd/display/include/i2caux_interface.h
index bb012cb1a9f5..c7fbb9c3ad6b 100644
--- a/drivers/gpu/drm/amd/display/include/i2caux_interface.h
+++ b/drivers/gpu/drm/amd/display/include/i2caux_interface.h
@@ -42,7 +42,7 @@ struct aux_payload {
bool write;
bool mot;
uint32_t address;
- uint8_t length;
+ uint32_t length;
uint8_t *data;
/*
* used to return the reply type of the transaction
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 16e69bbc69aa..5437b50e9f90 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -37,8 +37,8 @@
#define STATIC_SCREEN_RAMP_DELTA_REFRESH_RATE_PER_FRAME ((1000 / 60) * 65)
/* Number of elements in the render times cache array */
#define RENDER_TIMES_MAX_COUNT 10
-/* Threshold to exit/exit BTR (to avoid frequent enter-exits at the lower limit) */
-#define BTR_MAX_MARGIN 2500
+/* Threshold to exit BTR (to avoid frequent enter-exits at the lower limit) */
+#define BTR_EXIT_MARGIN 2000
/* Threshold to change BTR multiplier (to avoid frequent changes) */
#define BTR_DRIFT_MARGIN 2000
/*Threshold to exit fixed refresh rate*/
@@ -254,22 +254,24 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
unsigned int delta_from_mid_point_in_us_1 = 0xFFFFFFFF;
unsigned int delta_from_mid_point_in_us_2 = 0xFFFFFFFF;
unsigned int frames_to_insert = 0;
+ unsigned int min_frame_duration_in_ns = 0;
+ unsigned int max_render_time_in_us = in_out_vrr->max_duration_in_us;
unsigned int delta_from_mid_point_delta_in_us;
- unsigned int max_render_time_in_us =
- in_out_vrr->max_duration_in_us - in_out_vrr->btr.margin_in_us;
+
+ min_frame_duration_in_ns = ((unsigned int) (div64_u64(
+ (1000000000ULL * 1000000),
+ in_out_vrr->max_refresh_in_uhz)));
/* Program BTR */
- if ((last_render_time_in_us + in_out_vrr->btr.margin_in_us / 2) < max_render_time_in_us) {
+ if (last_render_time_in_us + BTR_EXIT_MARGIN < max_render_time_in_us) {
/* Exit Below the Range */
if (in_out_vrr->btr.btr_active) {
in_out_vrr->btr.frame_counter = 0;
in_out_vrr->btr.btr_active = false;
}
- } else if (last_render_time_in_us > (max_render_time_in_us + in_out_vrr->btr.margin_in_us / 2)) {
+ } else if (last_render_time_in_us > max_render_time_in_us) {
/* Enter Below the Range */
- if (!in_out_vrr->btr.btr_active) {
- in_out_vrr->btr.btr_active = true;
- }
+ in_out_vrr->btr.btr_active = true;
}
/* BTR set to "not active" so disengage */
@@ -325,9 +327,7 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
/* Choose number of frames to insert based on how close it
* can get to the mid point of the variable range.
*/
- if ((frame_time_in_us / mid_point_frames_ceil) > in_out_vrr->min_duration_in_us &&
- (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2 ||
- mid_point_frames_floor < 2)) {
+ if (delta_from_mid_point_in_us_1 < delta_from_mid_point_in_us_2) {
frames_to_insert = mid_point_frames_ceil;
delta_from_mid_point_delta_in_us = delta_from_mid_point_in_us_2 -
delta_from_mid_point_in_us_1;
@@ -343,7 +343,7 @@ static void apply_below_the_range(struct core_freesync *core_freesync,
if (in_out_vrr->btr.frames_to_insert != 0 &&
delta_from_mid_point_delta_in_us < BTR_DRIFT_MARGIN) {
if (((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) <
- max_render_time_in_us) &&
+ in_out_vrr->max_duration_in_us) &&
((last_render_time_in_us / in_out_vrr->btr.frames_to_insert) >
in_out_vrr->min_duration_in_us))
frames_to_insert = in_out_vrr->btr.frames_to_insert;
@@ -796,11 +796,6 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
refresh_range = in_out_vrr->max_refresh_in_uhz -
in_out_vrr->min_refresh_in_uhz;
- in_out_vrr->btr.margin_in_us = in_out_vrr->max_duration_in_us -
- 2 * in_out_vrr->min_duration_in_us;
- if (in_out_vrr->btr.margin_in_us > BTR_MAX_MARGIN)
- in_out_vrr->btr.margin_in_us = BTR_MAX_MARGIN;
-
in_out_vrr->supported = true;
}
@@ -816,7 +811,6 @@ void mod_freesync_build_vrr_params(struct mod_freesync *mod_freesync,
in_out_vrr->btr.inserted_duration_in_us = 0;
in_out_vrr->btr.frames_to_insert = 0;
in_out_vrr->btr.frame_counter = 0;
-
in_out_vrr->btr.mid_point_in_us =
(in_out_vrr->min_duration_in_us +
in_out_vrr->max_duration_in_us) / 2;
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
index dbe7835aabcf..dc187844d10b 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_freesync.h
@@ -92,7 +92,6 @@ struct mod_vrr_params_btr {
uint32_t inserted_duration_in_us;
uint32_t frames_to_insert;
uint32_t frame_counter;
- uint32_t margin_in_us;
};
struct mod_vrr_params_fixed_refresh {
diff --git a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
index 5ff7ccedfbed..a23729d3174b 100644
--- a/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/powerplay/amdgpu_smu.c
@@ -866,6 +866,7 @@ static int smu_sw_init(void *handle)
smu->smu_baco.platform_support = false;
mutex_init(&smu->sensor_lock);
+ mutex_init(&smu->metrics_lock);
smu->watermarks_bitmap = 0;
smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
diff --git a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
index ce3566ca3e24..472e9fed411a 100644
--- a/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/arcturus_ppt.c
@@ -862,18 +862,21 @@ static int arcturus_get_metrics_table(struct smu_context *smu,
struct smu_table_context *smu_table= &smu->smu_table;
int ret = 0;
+ mutex_lock(&smu->metrics_lock);
if (!smu_table->metrics_time ||
time_after(jiffies, smu_table->metrics_time + HZ / 1000)) {
ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
(void *)smu_table->metrics_table, false);
if (ret) {
pr_info("Failed to export SMU metrics table!\n");
+ mutex_unlock(&smu->metrics_lock);
return ret;
}
smu_table->metrics_time = jiffies;
}
memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+ mutex_unlock(&smu->metrics_lock);
return ret;
}
@@ -1313,12 +1316,17 @@ static int arcturus_get_power_profile_mode(struct smu_context *smu,
"VR",
"COMPUTE",
"CUSTOM"};
+ static const char *title[] = {
+ "PROFILE_INDEX(NAME)"};
uint32_t i, size = 0;
int16_t workload_type = 0;
if (!smu->pm_enabled || !buf)
return -EINVAL;
+ size += sprintf(buf + size, "%16s\n",
+ title[0]);
+
for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) {
/*
* Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
index ac9758305ab3..41fce75b263f 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amdgpu_smu.h
@@ -349,6 +349,7 @@ struct smu_context
const struct pptable_funcs *ppt_funcs;
struct mutex mutex;
struct mutex sensor_lock;
+ struct mutex metrics_lock;
uint64_t pool_size;
struct smu_table_context smu_table;
diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
index 4a14fd1f9fd5..ca62e92e5a4f 100644
--- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c
@@ -562,17 +562,20 @@ static int navi10_get_metrics_table(struct smu_context *smu,
struct smu_table_context *smu_table= &smu->smu_table;
int ret = 0;
+ mutex_lock(&smu->metrics_lock);
if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + msecs_to_jiffies(100))) {
ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
(void *)smu_table->metrics_table, false);
if (ret) {
pr_info("Failed to export SMU metrics table!\n");
+ mutex_unlock(&smu->metrics_lock);
return ret;
}
smu_table->metrics_time = jiffies;
}
memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+ mutex_unlock(&smu->metrics_lock);
return ret;
}
diff --git a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
index 60b9ff097142..0d3a3b0a934e 100644
--- a/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
+++ b/drivers/gpu/drm/amd/powerplay/vega20_ppt.c
@@ -1678,17 +1678,20 @@ static int vega20_get_metrics_table(struct smu_context *smu,
struct smu_table_context *smu_table= &smu->smu_table;
int ret = 0;
+ mutex_lock(&smu->metrics_lock);
if (!smu_table->metrics_time || time_after(jiffies, smu_table->metrics_time + HZ / 1000)) {
ret = smu_update_table(smu, SMU_TABLE_SMU_METRICS, 0,
(void *)smu_table->metrics_table, false);
if (ret) {
pr_info("Failed to export SMU metrics table!\n");
+ mutex_unlock(&smu->metrics_lock);
return ret;
}
smu_table->metrics_time = jiffies;
}
memcpy(metrics_table, smu_table->metrics_table, sizeof(SmuMetrics_t));
+ mutex_unlock(&smu->metrics_lock);
return ret;
}
diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c
index 875a3a9eabfa..7d0e7b031e44 100644
--- a/drivers/gpu/drm/arm/malidp_mw.c
+++ b/drivers/gpu/drm/arm/malidp_mw.c
@@ -56,7 +56,7 @@ malidp_mw_connector_mode_valid(struct drm_connector *connector,
return MODE_OK;
}
-const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = {
+static const struct drm_connector_helper_funcs malidp_mw_connector_helper_funcs = {
.get_modes = malidp_mw_connector_get_modes,
.mode_valid = malidp_mw_connector_mode_valid,
};
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gsc.c b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
index 7ae087b0504d..88b6fcaa20be 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gsc.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gsc.c
@@ -1313,6 +1313,7 @@ static int gsc_remove(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
+ component_del(dev, &gsc_component_ops);
pm_runtime_dont_use_autosuspend(dev);
pm_runtime_disable(dev);
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index c7c2b349858d..2a27fb5d7dc6 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -3986,6 +3986,7 @@ static void intel_enable_ddi(struct intel_encoder *encoder,
if (conn_state->content_protection ==
DRM_MODE_CONTENT_PROTECTION_DESIRED)
intel_hdcp_enable(to_intel_connector(conn_state->connector),
+ crtc_state->cpu_transcoder,
(u8)conn_state->hdcp_content_type);
}
@@ -4089,7 +4090,9 @@ static void intel_ddi_update_pipe(struct intel_encoder *encoder,
if (conn_state->content_protection ==
DRM_MODE_CONTENT_PROTECTION_DESIRED ||
content_protection_type_changed)
- intel_hdcp_enable(connector, (u8)conn_state->hdcp_content_type);
+ intel_hdcp_enable(connector,
+ crtc_state->cpu_transcoder,
+ (u8)conn_state->hdcp_content_type);
}
static void
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index 6f5e3bd13ad1..effc4250b230 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -15112,7 +15112,7 @@ intel_prepare_plane_fb(struct drm_plane *plane,
return ret;
fb_obj_bump_render_priority(obj);
- intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_DIRTYFB);
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_DIRTYFB);
if (!new_plane_state->base.fence) { /* implicit fencing */
struct dma_fence *fence;
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index ce1b64f4dd44..12ba74788cce 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -3688,6 +3688,151 @@ static const struct i915_power_well_desc icl_power_wells[] = {
},
};
+static const struct i915_power_well_desc ehl_power_wells[] = {
+ {
+ .name = "always-on",
+ .always_on = true,
+ .domains = POWER_DOMAIN_MASK,
+ .ops = &i9xx_always_on_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ },
+ {
+ .name = "power well 1",
+ /* Handled by the DMC firmware */
+ .always_on = true,
+ .domains = 0,
+ .ops = &hsw_power_well_ops,
+ .id = SKL_DISP_PW_1,
+ {
+ .hsw.regs = &hsw_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_PW_1,
+ .hsw.has_fuses = true,
+ },
+ },
+ {
+ .name = "DC off",
+ .domains = ICL_DISPLAY_DC_OFF_POWER_DOMAINS,
+ .ops = &gen9_dc_off_power_well_ops,
+ .id = SKL_DISP_DC_OFF,
+ },
+ {
+ .name = "power well 2",
+ .domains = ICL_PW_2_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = SKL_DISP_PW_2,
+ {
+ .hsw.regs = &hsw_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_PW_2,
+ .hsw.has_fuses = true,
+ },
+ },
+ {
+ .name = "power well 3",
+ .domains = ICL_PW_3_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &hsw_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_PW_3,
+ .hsw.irq_pipe_mask = BIT(PIPE_B),
+ .hsw.has_vga = true,
+ .hsw.has_fuses = true,
+ },
+ },
+ {
+ .name = "DDI A IO",
+ .domains = ICL_DDI_IO_A_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &icl_ddi_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_DDI_A,
+ },
+ },
+ {
+ .name = "DDI B IO",
+ .domains = ICL_DDI_IO_B_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &icl_ddi_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_DDI_B,
+ },
+ },
+ {
+ .name = "DDI C IO",
+ .domains = ICL_DDI_IO_C_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &icl_ddi_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_DDI_C,
+ },
+ },
+ {
+ .name = "DDI D IO",
+ .domains = ICL_DDI_IO_D_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &icl_ddi_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_DDI_D,
+ },
+ },
+ {
+ .name = "AUX A",
+ .domains = ICL_AUX_A_IO_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &icl_aux_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_AUX_A,
+ },
+ },
+ {
+ .name = "AUX B",
+ .domains = ICL_AUX_B_IO_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &icl_aux_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_AUX_B,
+ },
+ },
+ {
+ .name = "AUX C",
+ .domains = ICL_AUX_C_TC1_IO_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &icl_aux_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_AUX_C,
+ },
+ },
+ {
+ .name = "AUX D",
+ .domains = ICL_AUX_D_TC2_IO_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &icl_aux_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_AUX_D,
+ },
+ },
+ {
+ .name = "power well 4",
+ .domains = ICL_PW_4_POWER_DOMAINS,
+ .ops = &hsw_power_well_ops,
+ .id = DISP_PW_ID_NONE,
+ {
+ .hsw.regs = &hsw_power_well_regs,
+ .hsw.idx = ICL_PW_CTL_IDX_PW_4,
+ .hsw.has_fuses = true,
+ .hsw.irq_pipe_mask = BIT(PIPE_C),
+ },
+ },
+};
+
static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "always-on",
@@ -3832,7 +3977,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX A",
.domains = TGL_AUX_A_IO_POWER_DOMAINS,
- .ops = &icl_combo_phy_aux_power_well_ops,
+ .ops = &hsw_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
@@ -3842,7 +3987,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX B",
.domains = TGL_AUX_B_IO_POWER_DOMAINS,
- .ops = &icl_combo_phy_aux_power_well_ops,
+ .ops = &hsw_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
@@ -3852,7 +3997,7 @@ static const struct i915_power_well_desc tgl_power_wells[] = {
{
.name = "AUX C",
.domains = TGL_AUX_C_IO_POWER_DOMAINS,
- .ops = &icl_combo_phy_aux_power_well_ops,
+ .ops = &hsw_power_well_ops,
.id = DISP_PW_ID_NONE,
{
.hsw.regs = &icl_aux_power_well_regs,
@@ -4162,6 +4307,8 @@ int intel_power_domains_init(struct drm_i915_private *dev_priv)
*/
if (IS_GEN(dev_priv, 12)) {
err = set_power_wells(power_domains, tgl_power_wells);
+ } else if (IS_ELKHARTLAKE(dev_priv)) {
+ err = set_power_wells(power_domains, ehl_power_wells);
} else if (IS_GEN(dev_priv, 11)) {
err = set_power_wells(power_domains, icl_power_wells);
} else if (IS_CANNONLAKE(dev_priv)) {
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 050655a1a3d8..b05b2191b919 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -2414,9 +2414,6 @@ intel_dp_compute_config(struct intel_encoder *encoder,
intel_psr_compute_config(intel_dp, pipe_config);
- intel_hdcp_transcoder_config(intel_connector,
- pipe_config->cpu_transcoder);
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 3111ecaeabd0..20616639b8ab 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1284,7 +1284,7 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv)
return 0;
/* https://bugs.freedesktop.org/show_bug.cgi?id=108085 */
- if (IS_GEMINILAKE(dev_priv))
+ if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv))
return 0;
if (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) >= 9)
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index 84b164f31895..6cb02c912acc 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -229,11 +229,11 @@ static void frontbuffer_release(struct kref *ref)
vma->display_alignment = I915_GTT_MIN_ALIGNMENT;
spin_unlock(&obj->vma.lock);
- obj->frontbuffer = NULL;
+ RCU_INIT_POINTER(obj->frontbuffer, NULL);
spin_unlock(&to_i915(obj->base.dev)->fb_tracking.lock);
i915_gem_object_put(obj);
- kfree(front);
+ kfree_rcu(front, rcu);
}
struct intel_frontbuffer *
@@ -242,11 +242,7 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
struct drm_i915_private *i915 = to_i915(obj->base.dev);
struct intel_frontbuffer *front;
- spin_lock(&i915->fb_tracking.lock);
- front = obj->frontbuffer;
- if (front)
- kref_get(&front->ref);
- spin_unlock(&i915->fb_tracking.lock);
+ front = __intel_frontbuffer_get(obj);
if (front)
return front;
@@ -262,13 +258,13 @@ intel_frontbuffer_get(struct drm_i915_gem_object *obj)
i915_active_may_sleep(frontbuffer_retire));
spin_lock(&i915->fb_tracking.lock);
- if (obj->frontbuffer) {
+ if (rcu_access_pointer(obj->frontbuffer)) {
kfree(front);
- front = obj->frontbuffer;
+ front = rcu_dereference_protected(obj->frontbuffer, true);
kref_get(&front->ref);
} else {
i915_gem_object_get(obj);
- obj->frontbuffer = front;
+ rcu_assign_pointer(obj->frontbuffer, front);
}
spin_unlock(&i915->fb_tracking.lock);
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.h b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
index adc64d61a4a5..6d41f5394425 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.h
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.h
@@ -27,10 +27,10 @@
#include <linux/atomic.h>
#include <linux/kref.h>
+#include "gem/i915_gem_object_types.h"
#include "i915_active.h"
struct drm_i915_private;
-struct drm_i915_gem_object;
enum fb_op_origin {
ORIGIN_GTT,
@@ -45,6 +45,7 @@ struct intel_frontbuffer {
atomic_t bits;
struct i915_active write;
struct drm_i915_gem_object *obj;
+ struct rcu_head rcu;
};
void intel_frontbuffer_flip_prepare(struct drm_i915_private *i915,
@@ -54,6 +55,35 @@ void intel_frontbuffer_flip_complete(struct drm_i915_private *i915,
void intel_frontbuffer_flip(struct drm_i915_private *i915,
unsigned frontbuffer_bits);
+void intel_frontbuffer_put(struct intel_frontbuffer *front);
+
+static inline struct intel_frontbuffer *
+__intel_frontbuffer_get(const struct drm_i915_gem_object *obj)
+{
+ struct intel_frontbuffer *front;
+
+ if (likely(!rcu_access_pointer(obj->frontbuffer)))
+ return NULL;
+
+ rcu_read_lock();
+ do {
+ front = rcu_dereference(obj->frontbuffer);
+ if (!front)
+ break;
+
+ if (unlikely(!kref_get_unless_zero(&front->ref)))
+ continue;
+
+ if (likely(front == rcu_access_pointer(obj->frontbuffer)))
+ break;
+
+ intel_frontbuffer_put(front);
+ } while (1);
+ rcu_read_unlock();
+
+ return front;
+}
+
struct intel_frontbuffer *
intel_frontbuffer_get(struct drm_i915_gem_object *obj);
@@ -119,6 +149,4 @@ void intel_frontbuffer_track(struct intel_frontbuffer *old,
struct intel_frontbuffer *new,
unsigned int frontbuffer_bits);
-void intel_frontbuffer_put(struct intel_frontbuffer *front);
-
#endif /* __INTEL_FRONTBUFFER_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.c b/drivers/gpu/drm/i915/display/intel_hdcp.c
index f1f41ca8402b..a448815d8fc2 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.c
@@ -1821,23 +1821,6 @@ enum mei_fw_tc intel_get_mei_fw_tc(enum transcoder cpu_transcoder)
}
}
-void intel_hdcp_transcoder_config(struct intel_connector *connector,
- enum transcoder cpu_transcoder)
-{
- struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
- struct intel_hdcp *hdcp = &connector->hdcp;
-
- if (!hdcp->shim)
- return;
-
- if (INTEL_GEN(dev_priv) >= 12) {
- mutex_lock(&hdcp->mutex);
- hdcp->cpu_transcoder = cpu_transcoder;
- hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder);
- mutex_unlock(&hdcp->mutex);
- }
-}
-
static inline int initialize_hdcp_port_data(struct intel_connector *connector,
const struct intel_hdcp_shim *shim)
{
@@ -1959,8 +1942,10 @@ int intel_hdcp_init(struct intel_connector *connector,
return 0;
}
-int intel_hdcp_enable(struct intel_connector *connector, u8 content_type)
+int intel_hdcp_enable(struct intel_connector *connector,
+ enum transcoder cpu_transcoder, u8 content_type)
{
+ struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
struct intel_hdcp *hdcp = &connector->hdcp;
unsigned long check_link_interval = DRM_HDCP_CHECK_PERIOD_MS;
int ret = -EINVAL;
@@ -1972,6 +1957,11 @@ int intel_hdcp_enable(struct intel_connector *connector, u8 content_type)
WARN_ON(hdcp->value == DRM_MODE_CONTENT_PROTECTION_ENABLED);
hdcp->content_type = content_type;
+ if (INTEL_GEN(dev_priv) >= 12) {
+ hdcp->cpu_transcoder = cpu_transcoder;
+ hdcp->port_data.fw_tc = intel_get_mei_fw_tc(cpu_transcoder);
+ }
+
/*
* Considering that HDCP2.2 is more secure than HDCP1.4, If the setup
* is capable of HDCP2.2, it is preferred to use HDCP2.2.
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp.h b/drivers/gpu/drm/i915/display/intel_hdcp.h
index 41c1053d9e38..f3c3272e712a 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp.h
@@ -21,11 +21,10 @@ enum transcoder;
void intel_hdcp_atomic_check(struct drm_connector *connector,
struct drm_connector_state *old_state,
struct drm_connector_state *new_state);
-void intel_hdcp_transcoder_config(struct intel_connector *connector,
- enum transcoder cpu_transcoder);
int intel_hdcp_init(struct intel_connector *connector,
const struct intel_hdcp_shim *hdcp_shim);
-int intel_hdcp_enable(struct intel_connector *connector, u8 content_type);
+int intel_hdcp_enable(struct intel_connector *connector,
+ enum transcoder cpu_transcoder, u8 content_type);
int intel_hdcp_disable(struct intel_connector *connector);
bool is_hdcp_supported(struct drm_i915_private *dev_priv, enum port port);
bool intel_hdcp_capable(struct intel_connector *connector);
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index f6f5312205c4..f56fffc474fa 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -2489,9 +2489,6 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
return -EINVAL;
}
- intel_hdcp_transcoder_config(intel_hdmi->attached_connector,
- pipe_config->cpu_transcoder);
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c
index 848ce07a8ec2..8a98a1aa7adc 100644
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -279,12 +279,21 @@ static void intel_overlay_flip_prepare(struct intel_overlay *overlay,
struct i915_vma *vma)
{
enum pipe pipe = overlay->crtc->pipe;
+ struct intel_frontbuffer *from = NULL, *to = NULL;
WARN_ON(overlay->old_vma);
- intel_frontbuffer_track(overlay->vma ? overlay->vma->obj->frontbuffer : NULL,
- vma ? vma->obj->frontbuffer : NULL,
- INTEL_FRONTBUFFER_OVERLAY(pipe));
+ if (overlay->vma)
+ from = intel_frontbuffer_get(overlay->vma->obj);
+ if (vma)
+ to = intel_frontbuffer_get(vma->obj);
+
+ intel_frontbuffer_track(from, to, INTEL_FRONTBUFFER_OVERLAY(pipe));
+
+ if (to)
+ intel_frontbuffer_put(to);
+ if (from)
+ intel_frontbuffer_put(from);
intel_frontbuffer_flip_prepare(overlay->i915,
INTEL_FRONTBUFFER_OVERLAY(pipe));
@@ -766,7 +775,7 @@ static int intel_overlay_do_put_image(struct intel_overlay *overlay,
ret = PTR_ERR(vma);
goto out_pin_section;
}
- intel_frontbuffer_flush(new_bo->frontbuffer, ORIGIN_DIRTYFB);
+ i915_gem_object_flush_frontbuffer(new_bo, ORIGIN_DIRTYFB);
if (!overlay->active) {
u32 oconfig;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
index b9f504ba3b32..18ee708585a9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c
@@ -20,7 +20,8 @@ static void __do_clflush(struct drm_i915_gem_object *obj)
{
GEM_BUG_ON(!i915_gem_object_has_pages(obj));
drm_clflush_sg(obj->mm.pages);
- intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
}
static int clflush_work(struct dma_fence_work *base)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 337ba17b1e0e..42385277c684 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -2167,8 +2167,7 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
ext_data.fpriv = file->driver_priv;
if (client_is_banned(ext_data.fpriv)) {
DRM_DEBUG("client %s[%d] banned from creating ctx\n",
- current->comm,
- pid_nr(get_task_pid(current, PIDTYPE_PID)));
+ current->comm, task_pid_nr(current));
return -EIO;
}
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
index 9937b4c341f1..f86400a191b0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
@@ -664,7 +664,7 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
i915_gem_object_unlock(obj);
if (write_domain)
- intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
out_unpin:
i915_gem_object_unpin_pages(obj);
@@ -784,7 +784,7 @@ int i915_gem_object_prepare_write(struct drm_i915_gem_object *obj,
}
out:
- intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
obj->mm.dirty = true;
/* return with the pages pinned */
return 0;
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index f0998f1225af..bc3a67226163 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -2694,6 +2694,7 @@ i915_gem_do_execbuffer(struct drm_device *dev,
err = eb_submit(&eb);
err_request:
add_to_client(eb.request, file);
+ i915_request_get(eb.request);
i915_request_add(eb.request);
if (fences)
@@ -2709,6 +2710,7 @@ err_request:
fput(out_fence->file);
}
}
+ i915_request_put(eb.request);
err_batch_unpin:
if (eb.batch_flags & I915_DISPATCH_SECURE)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
index a50296cce0d8..a596548c07bf 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
@@ -280,7 +280,7 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
for_each_ggtt_vma(vma, obj)
intel_gt_flush_ggtt_writes(vma->vm->gt);
- intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
for_each_ggtt_vma(vma, obj) {
if (vma->iomap)
@@ -308,6 +308,30 @@ i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
obj->write_domain = 0;
}
+void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin)
+{
+ struct intel_frontbuffer *front;
+
+ front = __intel_frontbuffer_get(obj);
+ if (front) {
+ intel_frontbuffer_flush(front, origin);
+ intel_frontbuffer_put(front);
+ }
+}
+
+void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin)
+{
+ struct intel_frontbuffer *front;
+
+ front = __intel_frontbuffer_get(obj);
+ if (front) {
+ intel_frontbuffer_invalidate(front, origin);
+ intel_frontbuffer_put(front);
+ }
+}
+
void i915_gem_init__objects(struct drm_i915_private *i915)
{
INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h
index 458cd51331f1..4b93591fd5c7 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h
@@ -13,8 +13,8 @@
#include <drm/i915_drm.h>
+#include "display/intel_frontbuffer.h"
#include "i915_gem_object_types.h"
-
#include "i915_gem_gtt.h"
void i915_gem_init__objects(struct drm_i915_private *i915);
@@ -463,4 +463,25 @@ int i915_gem_object_wait_priority(struct drm_i915_gem_object *obj,
unsigned int flags,
const struct i915_sched_attr *attr);
+void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin);
+void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin);
+
+static inline void
+i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin)
+{
+ if (unlikely(rcu_access_pointer(obj->frontbuffer)))
+ __i915_gem_object_flush_frontbuffer(obj, origin);
+}
+
+static inline void
+i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
+ enum fb_op_origin origin)
+{
+ if (unlikely(rcu_access_pointer(obj->frontbuffer)))
+ __i915_gem_object_invalidate_frontbuffer(obj, origin);
+}
+
#endif
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
index 96008374a412..e3f3944fbd90 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
+++ b/drivers/gpu/drm/i915/gem/i915_gem_object_types.h
@@ -150,7 +150,7 @@ struct drm_i915_gem_object {
*/
u16 write_domain;
- struct intel_frontbuffer *frontbuffer;
+ struct intel_frontbuffer __rcu *frontbuffer;
/** Current tiling stride for the object, if it's tiled. */
unsigned int tiling_and_stride;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index a459a42ad5c2..7e64b7d7d330 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -94,8 +94,9 @@ static int __gt_park(struct intel_wakeref *wf)
intel_uncore_forcewake_put(&i915->uncore, FORCEWAKE_ALL);
}
+ /* Defer dropping the display power well for 100ms, it's slow! */
GEM_BUG_ON(!wakeref);
- intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ, wakeref);
+ intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
i915_globals_park();
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index 9fdefbdc3546..75dd0e0367b7 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -845,12 +845,6 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine)
}
}
-static void unwind_wa_tail(struct i915_request *rq)
-{
- rq->tail = intel_ring_wrap(rq->ring, rq->wa_tail - WA_TAIL_BYTES);
- assert_ring_tail_valid(rq->ring, rq->tail);
-}
-
static struct i915_request *
__unwind_incomplete_requests(struct intel_engine_cs *engine)
{
@@ -863,12 +857,10 @@ __unwind_incomplete_requests(struct intel_engine_cs *engine)
list_for_each_entry_safe_reverse(rq, rn,
&engine->active.requests,
sched.link) {
-
if (i915_request_completed(rq))
continue; /* XXX */
__i915_request_unsubmit(rq);
- unwind_wa_tail(rq);
/*
* Push the request back into the queue for later resubmission.
@@ -1161,13 +1153,29 @@ execlists_schedule_out(struct i915_request *rq)
i915_request_put(rq);
}
-static u64 execlists_update_context(const struct i915_request *rq)
+static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->hw_context;
- u64 desc;
+ u64 desc = ce->lrc_desc;
+ u32 tail;
- ce->lrc_reg_state[CTX_RING_TAIL] =
- intel_ring_set_tail(rq->ring, rq->tail);
+ /*
+ * WaIdleLiteRestore:bdw,skl
+ *
+ * We should never submit the context with the same RING_TAIL twice
+ * just in case we submit an empty ring, which confuses the HW.
+ *
+ * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of
+ * the normal request to be able to always advance the RING_TAIL on
+ * subsequent resubmissions (for lite restore). Should that fail us,
+ * and we try and submit the same tail again, force the context
+ * reload.
+ */
+ tail = intel_ring_set_tail(rq->ring, rq->tail);
+ if (unlikely(ce->lrc_reg_state[CTX_RING_TAIL] == tail))
+ desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_reg_state[CTX_RING_TAIL] = tail;
+ rq->tail = rq->wa_tail;
/*
* Make sure the context image is complete before we submit it to HW.
@@ -1186,13 +1194,11 @@ static u64 execlists_update_context(const struct i915_request *rq)
*/
mb();
- desc = ce->lrc_desc;
- ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
-
/* Wa_1607138340:tgl */
if (IS_TGL_REVID(rq->i915, TGL_REVID_A0, TGL_REVID_A0))
desc |= CTX_DESC_FORCE_RESTORE;
+ ce->lrc_desc &= ~CTX_DESC_FORCE_RESTORE;
return desc;
}
@@ -1703,16 +1709,6 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
return;
}
-
- /*
- * WaIdleLiteRestore:bdw,skl
- * Apply the wa NOOPs to prevent
- * ring:HEAD == rq:TAIL as we resubmit the
- * request. See gen8_emit_fini_breadcrumb() for
- * where we prepare the padding after the
- * end of the request.
- */
- last->tail = last->wa_tail;
}
}
@@ -4120,17 +4116,18 @@ static void virtual_context_destroy(struct kref *kref)
for (n = 0; n < ve->num_siblings; n++) {
struct intel_engine_cs *sibling = ve->siblings[n];
struct rb_node *node = &ve->nodes[sibling->id].rb;
+ unsigned long flags;
if (RB_EMPTY_NODE(node))
continue;
- spin_lock_irq(&sibling->active.lock);
+ spin_lock_irqsave(&sibling->active.lock, flags);
/* Detachment is lazily performed in the execlists tasklet */
if (!RB_EMPTY_NODE(node))
rb_erase_cached(node, &sibling->execlists.virtual);
- spin_unlock_irq(&sibling->active.lock);
+ spin_unlock_irqrestore(&sibling->active.lock, flags);
}
GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet));
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index e451298d11c3..2477a1e5a166 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -36,13 +36,32 @@
#define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12))
+static int vgpu_pin_dma_address(struct intel_vgpu *vgpu,
+ unsigned long size,
+ dma_addr_t dma_addr)
+{
+ int ret = 0;
+
+ if (intel_gvt_hypervisor_dma_pin_guest_page(vgpu, dma_addr))
+ ret = -EINVAL;
+
+ return ret;
+}
+
+static void vgpu_unpin_dma_address(struct intel_vgpu *vgpu,
+ dma_addr_t dma_addr)
+{
+ intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, dma_addr);
+}
+
static int vgpu_gem_get_pages(
struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+ struct intel_vgpu *vgpu;
struct sg_table *st;
struct scatterlist *sg;
- int i, ret;
+ int i, j, ret;
gen8_pte_t __iomem *gtt_entries;
struct intel_vgpu_fb_info *fb_info;
u32 page_num;
@@ -51,6 +70,10 @@ static int vgpu_gem_get_pages(
if (WARN_ON(!fb_info))
return -ENODEV;
+ vgpu = fb_info->obj->vgpu;
+ if (WARN_ON(!vgpu))
+ return -ENODEV;
+
st = kmalloc(sizeof(*st), GFP_KERNEL);
if (unlikely(!st))
return -ENOMEM;
@@ -64,21 +87,53 @@ static int vgpu_gem_get_pages(
gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
(fb_info->start >> PAGE_SHIFT);
for_each_sg(st->sgl, sg, page_num, i) {
+ dma_addr_t dma_addr =
+ GEN8_DECODE_PTE(readq(&gtt_entries[i]));
+ if (vgpu_pin_dma_address(vgpu, PAGE_SIZE, dma_addr)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
sg->offset = 0;
sg->length = PAGE_SIZE;
- sg_dma_address(sg) =
- GEN8_DECODE_PTE(readq(&gtt_entries[i]));
sg_dma_len(sg) = PAGE_SIZE;
+ sg_dma_address(sg) = dma_addr;
}
__i915_gem_object_set_pages(obj, st, PAGE_SIZE);
+out:
+ if (ret) {
+ dma_addr_t dma_addr;
+
+ for_each_sg(st->sgl, sg, i, j) {
+ dma_addr = sg_dma_address(sg);
+ if (dma_addr)
+ vgpu_unpin_dma_address(vgpu, dma_addr);
+ }
+ sg_free_table(st);
+ kfree(st);
+ }
+
+ return ret;
- return 0;
}
static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj,
struct sg_table *pages)
{
+ struct scatterlist *sg;
+
+ if (obj->base.dma_buf) {
+ struct intel_vgpu_fb_info *fb_info = obj->gvt_info;
+ struct intel_vgpu_dmabuf_obj *obj = fb_info->obj;
+ struct intel_vgpu *vgpu = obj->vgpu;
+ int i;
+
+ for_each_sg(pages->sgl, sg, fb_info->size, i)
+ vgpu_unpin_dma_address(vgpu,
+ sg_dma_address(sg));
+ }
+
sg_free_table(pages);
kfree(pages);
}
@@ -163,6 +218,7 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
drm_gem_private_object_init(dev, &obj->base,
roundup(info->size, PAGE_SIZE));
i915_gem_object_init(obj, &intel_vgpu_gem_ops, &lock_class);
+ i915_gem_object_set_readonly(obj);
obj->read_domains = I915_GEM_DOMAIN_GTT;
obj->write_domain = 0;
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index bb9fe6bf5275..1043e6d564df 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -341,6 +341,10 @@ static int gdrst_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
gvt_dbg_mmio("vgpu%d: request VCS2 Reset\n", vgpu->id);
engine_mask |= BIT(VCS1);
}
+ if (data & GEN9_GRDOM_GUC) {
+ gvt_dbg_mmio("vgpu%d: request GUC Reset\n", vgpu->id);
+ vgpu_vreg_t(vgpu, GUC_STATUS) |= GS_MIA_IN_RESET;
+ }
engine_mask &= INTEL_INFO(vgpu->gvt->dev_priv)->engine_mask;
}
@@ -1636,6 +1640,16 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu,
return 0;
}
+static int guc_status_read(struct intel_vgpu *vgpu,
+ unsigned int offset, void *p_data,
+ unsigned int bytes)
+{
+ /* keep MIA_IN_RESET before clearing */
+ read_vreg(vgpu, offset, p_data, bytes);
+ vgpu_vreg(vgpu, offset) &= ~GS_MIA_IN_RESET;
+ return 0;
+}
+
static int mmio_read_from_hw(struct intel_vgpu *vgpu,
unsigned int offset, void *p_data, unsigned int bytes)
{
@@ -2672,6 +2686,8 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
MMIO_DH(EDP_PSR_IMR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
MMIO_DH(EDP_PSR_IIR, D_BDW_PLUS, NULL, edp_psr_imr_iir_write);
+ MMIO_DH(GUC_STATUS, D_ALL, guc_status_read, NULL);
+
return 0;
}
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index 4862fb12778e..b19a3b1ea4c1 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -62,6 +62,8 @@ struct intel_gvt_mpt {
unsigned long size, dma_addr_t *dma_addr);
void (*dma_unmap_guest_page)(unsigned long handle, dma_addr_t dma_addr);
+ int (*dma_pin_guest_page)(unsigned long handle, dma_addr_t dma_addr);
+
int (*map_gfn_to_mfn)(unsigned long handle, unsigned long gfn,
unsigned long mfn, unsigned int nr, bool map);
int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 04a5a0d90823..3259a1fa69e1 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1916,6 +1916,28 @@ err_unlock:
return ret;
}
+static int kvmgt_dma_pin_guest_page(unsigned long handle, dma_addr_t dma_addr)
+{
+ struct kvmgt_guest_info *info;
+ struct gvt_dma *entry;
+ int ret = 0;
+
+ if (!handle_valid(handle))
+ return -ENODEV;
+
+ info = (struct kvmgt_guest_info *)handle;
+
+ mutex_lock(&info->vgpu->vdev.cache_lock);
+ entry = __gvt_cache_find_dma_addr(info->vgpu, dma_addr);
+ if (entry)
+ kref_get(&entry->ref);
+ else
+ ret = -ENOMEM;
+ mutex_unlock(&info->vgpu->vdev.cache_lock);
+
+ return ret;
+}
+
static void __gvt_dma_release(struct kref *ref)
{
struct gvt_dma *entry = container_of(ref, typeof(*entry), ref);
@@ -2027,6 +2049,7 @@ static struct intel_gvt_mpt kvmgt_mpt = {
.gfn_to_mfn = kvmgt_gfn_to_pfn,
.dma_map_guest_page = kvmgt_dma_map_guest_page,
.dma_unmap_guest_page = kvmgt_dma_unmap_guest_page,
+ .dma_pin_guest_page = kvmgt_dma_pin_guest_page,
.set_opregion = kvmgt_set_opregion,
.set_edid = kvmgt_set_edid,
.get_vfio_device = kvmgt_get_vfio_device,
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 0f9440128123..9ad224df9c68 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -255,6 +255,21 @@ static inline void intel_gvt_hypervisor_dma_unmap_guest_page(
}
/**
+ * intel_gvt_hypervisor_dma_pin_guest_page - pin guest dma buf
+ * @vgpu: a vGPU
+ * @dma_addr: guest dma addr
+ *
+ * Returns:
+ * 0 on success, negative error code if failed.
+ */
+static inline int
+intel_gvt_hypervisor_dma_pin_guest_page(struct intel_vgpu *vgpu,
+ dma_addr_t dma_addr)
+{
+ return intel_gvt_host.mpt->dma_pin_guest_page(vgpu->handle, dma_addr);
+}
+
+/**
* intel_gvt_hypervisor_map_gfn_to_mfn - map a GFN region to MFN
* @vgpu: a vGPU
* @gfn: guest PFN
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index d5a6e4e3d0fd..85bd9bf4f6ee 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -212,9 +212,9 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
*/
void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
{
- mutex_lock(&vgpu->gvt->lock);
+ mutex_lock(&vgpu->vgpu_lock);
vgpu->active = true;
- mutex_unlock(&vgpu->gvt->lock);
+ mutex_unlock(&vgpu->vgpu_lock);
}
/**
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b9eb6b3149b7..905890e3ac24 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -45,6 +45,7 @@
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_ioctls.h"
#include "gem/i915_gem_pm.h"
+#include "gt/intel_context.h"
#include "gt/intel_engine_user.h"
#include "gt/intel_gt.h"
#include "gt/intel_gt_pm.h"
@@ -160,7 +161,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
* We manually control the domain here and pretend that it
* remains coherent i.e. in the GTT domain, like shmem_pwrite.
*/
- intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
if (copy_from_user(vaddr, user_data, args->size))
return -EFAULT;
@@ -168,7 +169,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
drm_clflush_virt_range(vaddr, args->size);
intel_gt_chipset_flush(&to_i915(obj->base.dev)->gt);
- intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
return 0;
}
@@ -588,7 +589,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
goto out_unpin;
}
- intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CPU);
+ i915_gem_object_invalidate_frontbuffer(obj, ORIGIN_CPU);
user_data = u64_to_user_ptr(args->data_ptr);
offset = args->offset;
@@ -630,7 +631,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
user_data += page_length;
offset += page_length;
}
- intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
i915_gem_object_unlock_fence(obj, fence);
out_unpin:
@@ -720,7 +721,7 @@ i915_gem_shmem_pwrite(struct drm_i915_gem_object *obj,
offset = 0;
}
- intel_frontbuffer_flush(obj->frontbuffer, ORIGIN_CPU);
+ i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
i915_gem_object_unlock_fence(obj, fence);
return ret;
@@ -1053,6 +1054,18 @@ out:
return err;
}
+static int __intel_context_flush_retire(struct intel_context *ce)
+{
+ struct intel_timeline *tl;
+
+ tl = intel_context_timeline_lock(ce);
+ if (IS_ERR(tl))
+ return PTR_ERR(tl);
+
+ intel_context_timeline_unlock(tl);
+ return 0;
+}
+
static int __intel_engines_record_defaults(struct intel_gt *gt)
{
struct i915_request *requests[I915_NUM_ENGINES] = {};
@@ -1121,13 +1134,20 @@ err_rq:
if (!rq)
continue;
- /* We want to be able to unbind the state from the GGTT */
- GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
-
+ GEM_BUG_ON(!test_bit(CONTEXT_ALLOC_BIT,
+ &rq->hw_context->flags));
state = rq->hw_context->state;
if (!state)
continue;
+ /* Serialise with retirement on another CPU */
+ err = __intel_context_flush_retire(rq->hw_context);
+ if (err)
+ goto out;
+
+ /* We want to be able to unbind the state from the GGTT */
+ GEM_BUG_ON(intel_context_is_pinned(rq->hw_context));
+
/*
* As we will hold a reference to the logical state, it will
* not be torn down with the context, and importantly the
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 65d7c2e599de..2ae14bc14931 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2078,20 +2078,12 @@ gen8_update_reg_state_unlocked(const struct intel_context *ce,
u32 *reg_state = ce->lrc_reg_state;
int i;
- if (IS_GEN(stream->perf->i915, 12)) {
- u32 format = stream->oa_buffer.format;
+ reg_state[ctx_oactxctrl + 1] =
+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ GEN8_OA_COUNTER_RESUME;
- reg_state[ctx_oactxctrl + 1] =
- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
- (stream->oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
- } else {
- reg_state[ctx_oactxctrl + 1] =
- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
- GEN8_OA_COUNTER_RESUME;
- }
-
- for (i = 0; !!ctx_flexeu0 && i < ARRAY_SIZE(flex_regs); i++)
+ for (i = 0; i < ARRAY_SIZE(flex_regs); i++)
reg_state[ctx_flexeu0 + i * 2 + 1] =
oa_config_flex_reg(stream->oa_config, flex_regs[i]);
@@ -2224,34 +2216,51 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
return err;
}
-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
+static int gen12_configure_oar_context(struct i915_perf_stream *stream, bool enable)
{
- struct i915_request *rq;
- u32 *cs;
- int err = 0;
-
- rq = i915_request_create(ce);
- if (IS_ERR(rq))
- return PTR_ERR(rq);
-
- cs = intel_ring_begin(rq, 4);
- if (IS_ERR(cs)) {
- err = PTR_ERR(cs);
- goto out;
- }
-
- *cs++ = MI_LOAD_REGISTER_IMM(1);
- *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
- *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
- enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
- *cs++ = MI_NOOP;
+ int err;
+ struct intel_context *ce = stream->pinned_ctx;
+ u32 format = stream->oa_buffer.format;
+ struct flex regs_context[] = {
+ {
+ GEN8_OACTXCONTROL,
+ stream->perf->ctx_oactxctrl_offset + 1,
+ enable ? GEN8_OA_COUNTER_RESUME : 0,
+ },
+ };
+ /* Offsets in regs_lri are not used since this configuration is only
+ * applied using LRI. Initialize the correct offsets for posterity.
+ */
+#define GEN12_OAR_OACONTROL_OFFSET 0x5B0
+ struct flex regs_lri[] = {
+ {
+ GEN12_OAR_OACONTROL,
+ GEN12_OAR_OACONTROL_OFFSET + 1,
+ (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+ (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0)
+ },
+ {
+ RING_CONTEXT_CONTROL(ce->engine->mmio_base),
+ CTX_CONTEXT_CONTROL,
+ _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+ enable ?
+ GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE :
+ 0)
+ },
+ };
- intel_ring_advance(rq, cs);
+ /* Modify the context image of pinned context with regs_context*/
+ err = intel_context_lock_pinned(ce);
+ if (err)
+ return err;
-out:
- i915_request_add(rq);
+ err = gen8_modify_context(ce, regs_context, ARRAY_SIZE(regs_context));
+ intel_context_unlock_pinned(ce);
+ if (err)
+ return err;
- return err;
+ /* Apply regs_lri using LRI with pinned context */
+ return gen8_modify_self(ce, regs_lri, ARRAY_SIZE(regs_lri));
}
/*
@@ -2277,53 +2286,16 @@ out:
* per-context OA state.
*
* Note: it's only the RCS/Render context that has any OA state.
+ * Note: the first flex register passed must always be R_PWR_CLK_STATE
*/
-static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
- const struct i915_oa_config *oa_config)
+static int oa_configure_all_contexts(struct i915_perf_stream *stream,
+ struct flex *regs,
+ size_t num_regs)
{
struct drm_i915_private *i915 = stream->perf->i915;
- /* The MMIO offsets for Flex EU registers aren't contiguous */
- const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
-#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
- struct flex regs[] = {
- {
- GEN8_R_PWR_CLK_STATE,
- CTX_R_PWR_CLK_STATE,
- },
- {
- IS_GEN(i915, 12) ?
- GEN12_OAR_OACONTROL : GEN8_OACTXCONTROL,
- stream->perf->ctx_oactxctrl_offset + 1,
- },
- { EU_PERF_CNTL0, ctx_flexeuN(0) },
- { EU_PERF_CNTL1, ctx_flexeuN(1) },
- { EU_PERF_CNTL2, ctx_flexeuN(2) },
- { EU_PERF_CNTL3, ctx_flexeuN(3) },
- { EU_PERF_CNTL4, ctx_flexeuN(4) },
- { EU_PERF_CNTL5, ctx_flexeuN(5) },
- { EU_PERF_CNTL6, ctx_flexeuN(6) },
- };
-#undef ctx_flexeuN
struct intel_engine_cs *engine;
struct i915_gem_context *ctx, *cn;
- size_t array_size = IS_GEN(i915, 12) ? 2 : ARRAY_SIZE(regs);
- int i, err;
-
- if (IS_GEN(i915, 12)) {
- u32 format = stream->oa_buffer.format;
-
- regs[1].value =
- (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
- (oa_config ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
- } else {
- regs[1].value =
- (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
- (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
- GEN8_OA_COUNTER_RESUME;
- }
-
- for (i = 2; !!ctx_flexeu0 && i < array_size; i++)
- regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+ int err;
lockdep_assert_held(&stream->perf->lock);
@@ -2353,7 +2325,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
spin_unlock(&i915->gem.contexts.lock);
- err = gen8_configure_context(ctx, regs, array_size);
+ err = gen8_configure_context(ctx, regs, num_regs);
if (err) {
i915_gem_context_put(ctx);
return err;
@@ -2378,7 +2350,7 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu);
- err = gen8_modify_self(ce, regs, array_size);
+ err = gen8_modify_self(ce, regs, num_regs);
if (err)
return err;
}
@@ -2386,6 +2358,56 @@ static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
return 0;
}
+static int gen12_configure_all_contexts(struct i915_perf_stream *stream,
+ const struct i915_oa_config *oa_config)
+{
+ struct flex regs[] = {
+ {
+ GEN8_R_PWR_CLK_STATE,
+ CTX_R_PWR_CLK_STATE,
+ },
+ };
+
+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
+static int lrc_configure_all_contexts(struct i915_perf_stream *stream,
+ const struct i915_oa_config *oa_config)
+{
+ /* The MMIO offsets for Flex EU registers aren't contiguous */
+ const u32 ctx_flexeu0 = stream->perf->ctx_flexeu0_offset;
+#define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1)
+ struct flex regs[] = {
+ {
+ GEN8_R_PWR_CLK_STATE,
+ CTX_R_PWR_CLK_STATE,
+ },
+ {
+ GEN8_OACTXCONTROL,
+ stream->perf->ctx_oactxctrl_offset + 1,
+ },
+ { EU_PERF_CNTL0, ctx_flexeuN(0) },
+ { EU_PERF_CNTL1, ctx_flexeuN(1) },
+ { EU_PERF_CNTL2, ctx_flexeuN(2) },
+ { EU_PERF_CNTL3, ctx_flexeuN(3) },
+ { EU_PERF_CNTL4, ctx_flexeuN(4) },
+ { EU_PERF_CNTL5, ctx_flexeuN(5) },
+ { EU_PERF_CNTL6, ctx_flexeuN(6) },
+ };
+#undef ctx_flexeuN
+ int i;
+
+ regs[1].value =
+ (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) |
+ (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) |
+ GEN8_OA_COUNTER_RESUME;
+
+ for (i = 2; i < ARRAY_SIZE(regs); i++)
+ regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg);
+
+ return oa_configure_all_contexts(stream, regs, ARRAY_SIZE(regs));
+}
+
static int gen8_enable_metric_set(struct i915_perf_stream *stream)
{
struct intel_uncore *uncore = stream->uncore;
@@ -2464,7 +2486,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
* to make sure all slices/subslices are ON before writing to NOA
* registers.
*/
- ret = lrc_configure_all_contexts(stream, oa_config);
+ ret = gen12_configure_all_contexts(stream, oa_config);
if (ret)
return ret;
@@ -2474,8 +2496,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
* requested this.
*/
if (stream->ctx) {
- ret = gen12_emit_oar_config(stream->pinned_ctx,
- oa_config != NULL);
+ ret = gen12_configure_oar_context(stream, true);
if (ret)
return ret;
}
@@ -2509,11 +2530,11 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
struct intel_uncore *uncore = stream->uncore;
/* Reset all contexts' slices/subslices configurations. */
- lrc_configure_all_contexts(stream, NULL);
+ gen12_configure_all_contexts(stream, NULL);
/* disable the context save/restore or OAR counters */
if (stream->ctx)
- gen12_emit_oar_config(stream->pinned_ctx, false);
+ gen12_configure_oar_context(stream, false);
/* Make sure we disable noa to save power. */
intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2713,7 +2734,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
return -EINVAL;
}
- if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
+ if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
+ (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
DRM_DEBUG("Only OA report sampling supported\n");
return -EINVAL;
}
@@ -2745,7 +2767,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
format_size = perf->oa_formats[props->oa_format].size;
- stream->sample_flags |= SAMPLE_OA_REPORT;
+ stream->sample_flags = props->sample_flags;
stream->sample_size += format_size;
stream->oa_buffer.format_size = format_size;
@@ -2854,7 +2876,11 @@ void i915_oa_init_reg_state(const struct intel_context *ce,
return;
stream = engine->i915->perf.exclusive_stream;
- if (stream)
+ /*
+ * For gen12, only CTX_R_PWR_CLK_STATE needs update, but the caller
+ * is already doing that, so nothing to be done for gen12 here.
+ */
+ if (stream && INTEL_GEN(stream->perf->i915) < 12)
gen8_update_reg_state_unlocked(ce, stream);
}
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index 2814218c5ba1..6f09aa0be80a 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -144,61 +144,40 @@ static inline s64 ktime_since(const ktime_t kt)
return ktime_to_ns(ktime_sub(ktime_get(), kt));
}
-static u64 __pmu_estimate_rc6(struct i915_pmu *pmu)
-{
- u64 val;
-
- /*
- * We think we are runtime suspended.
- *
- * Report the delta from when the device was suspended to now,
- * on top of the last known real value, as the approximated RC6
- * counter value.
- */
- val = ktime_since(pmu->sleep_last);
- val += pmu->sample[__I915_SAMPLE_RC6].cur;
-
- pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = val;
-
- return val;
-}
-
-static u64 __pmu_update_rc6(struct i915_pmu *pmu, u64 val)
-{
- /*
- * If we are coming back from being runtime suspended we must
- * be careful not to report a larger value than returned
- * previously.
- */
- if (val >= pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur) {
- pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur = 0;
- pmu->sample[__I915_SAMPLE_RC6].cur = val;
- } else {
- val = pmu->sample[__I915_SAMPLE_RC6_ESTIMATED].cur;
- }
-
- return val;
-}
-
static u64 get_rc6(struct intel_gt *gt)
{
struct drm_i915_private *i915 = gt->i915;
struct i915_pmu *pmu = &i915->pmu;
unsigned long flags;
+ bool awake = false;
u64 val;
- val = 0;
if (intel_gt_pm_get_if_awake(gt)) {
val = __get_rc6(gt);
intel_gt_pm_put_async(gt);
+ awake = true;
}
spin_lock_irqsave(&pmu->lock, flags);
- if (val)
- val = __pmu_update_rc6(pmu, val);
+ if (awake) {
+ pmu->sample[__I915_SAMPLE_RC6].cur = val;
+ } else {
+ /*
+ * We think we are runtime suspended.
+ *
+ * Report the delta from when the device was suspended to now,
+ * on top of the last known real value, as the approximated RC6
+ * counter value.
+ */
+ val = ktime_since(pmu->sleep_last);
+ val += pmu->sample[__I915_SAMPLE_RC6].cur;
+ }
+
+ if (val < pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+ val = pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur;
else
- val = __pmu_estimate_rc6(pmu);
+ pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
spin_unlock_irqrestore(&pmu->lock, flags);
@@ -210,20 +189,11 @@ static void park_rc6(struct drm_i915_private *i915)
struct i915_pmu *pmu = &i915->pmu;
if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
- __pmu_update_rc6(pmu, __get_rc6(&i915->gt));
+ pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt);
pmu->sleep_last = ktime_get();
}
-static void unpark_rc6(struct drm_i915_private *i915)
-{
- struct i915_pmu *pmu = &i915->pmu;
-
- /* Estimate how long we slept and accumulate that into rc6 counters */
- if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY))
- __pmu_estimate_rc6(pmu);
-}
-
#else
static u64 get_rc6(struct intel_gt *gt)
@@ -232,7 +202,6 @@ static u64 get_rc6(struct intel_gt *gt)
}
static void park_rc6(struct drm_i915_private *i915) {}
-static void unpark_rc6(struct drm_i915_private *i915) {}
#endif
@@ -281,8 +250,6 @@ void i915_pmu_gt_unparked(struct drm_i915_private *i915)
*/
__i915_pmu_maybe_start_timer(pmu);
- unpark_rc6(i915);
-
spin_unlock_irq(&pmu->lock);
}
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index bf52e3983631..6c1647c5daf2 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -18,7 +18,7 @@ enum {
__I915_SAMPLE_FREQ_ACT = 0,
__I915_SAMPLE_FREQ_REQ,
__I915_SAMPLE_RC6,
- __I915_SAMPLE_RC6_ESTIMATED,
+ __I915_SAMPLE_RC6_LAST_REPORTED,
__I915_NUM_PMU_SAMPLERS
};
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 73079b503724..4fd3d76db346 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -9405,11 +9405,9 @@ enum skl_power_gate {
#define _ICL_AUX_REG_IDX(pw_idx) ((pw_idx) - ICL_PW_CTL_IDX_AUX_A)
#define _ICL_AUX_ANAOVRD1_A 0x162398
#define _ICL_AUX_ANAOVRD1_B 0x6C398
-#define _TGL_AUX_ANAOVRD1_C 0x160398
#define ICL_AUX_ANAOVRD1(pw_idx) _MMIO(_PICK(_ICL_AUX_REG_IDX(pw_idx), \
_ICL_AUX_ANAOVRD1_A, \
- _ICL_AUX_ANAOVRD1_B, \
- _TGL_AUX_ANAOVRD1_C))
+ _ICL_AUX_ANAOVRD1_B))
#define ICL_AUX_ANAOVRD1_LDO_BYPASS (1 << 7)
#define ICL_AUX_ANAOVRD1_ENABLE (1 << 0)
@@ -11994,7 +11992,7 @@ enum skl_power_gate {
/* This register controls the Display State Buffer (DSB) engines. */
#define _DSBSL_INSTANCE_BASE 0x70B00
#define DSBSL_INSTANCE(pipe, id) (_DSBSL_INSTANCE_BASE + \
- (pipe) * 0x1000 + (id) * 100)
+ (pipe) * 0x1000 + (id) * 0x100)
#define DSB_HEAD(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x0)
#define DSB_TAIL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x4)
#define DSB_CTRL(pipe, id) _MMIO(DSBSL_INSTANCE(pipe, id) + 0x8)
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index bbd71af00a91..765bec89fc0d 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq)
}
static int
-__i915_request_await_execution(struct i915_request *rq,
- struct i915_request *signal,
- void (*hook)(struct i915_request *rq,
- struct dma_fence *signal),
- gfp_t gfp)
+__await_execution(struct i915_request *rq,
+ struct i915_request *signal,
+ void (*hook)(struct i915_request *rq,
+ struct dma_fence *signal),
+ gfp_t gfp)
{
struct execute_cb *cb;
@@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq,
}
spin_unlock_irq(&signal->lock);
+ /* Copy across semaphore status as we need the same behaviour */
+ rq->sched.flags |= signal->sched.flags;
return 0;
}
@@ -811,31 +813,21 @@ already_busywaiting(struct i915_request *rq)
}
static int
-emit_semaphore_wait(struct i915_request *to,
- struct i915_request *from,
- gfp_t gfp)
+__emit_semaphore_wait(struct i915_request *to,
+ struct i915_request *from,
+ u32 seqno)
{
const int has_token = INTEL_GEN(to->i915) >= 12;
u32 hwsp_offset;
- int len;
+ int len, err;
u32 *cs;
GEM_BUG_ON(INTEL_GEN(to->i915) < 8);
- /* Just emit the first semaphore we see as request space is limited. */
- if (already_busywaiting(to) & from->engine->mask)
- goto await_fence;
-
- if (i915_request_await_start(to, from) < 0)
- goto await_fence;
-
- /* Only submit our spinner after the signaler is running! */
- if (__i915_request_await_execution(to, from, NULL, gfp))
- goto await_fence;
-
/* We need to pin the signaler's HWSP until we are finished reading. */
- if (intel_timeline_read_hwsp(from, to, &hwsp_offset))
- goto await_fence;
+ err = intel_timeline_read_hwsp(from, to, &hwsp_offset);
+ if (err)
+ return err;
len = 4;
if (has_token)
@@ -858,7 +850,7 @@ emit_semaphore_wait(struct i915_request *to,
MI_SEMAPHORE_POLL |
MI_SEMAPHORE_SAD_GTE_SDD) +
has_token;
- *cs++ = from->fence.seqno;
+ *cs++ = seqno;
*cs++ = hwsp_offset;
*cs++ = 0;
if (has_token) {
@@ -867,6 +859,28 @@ emit_semaphore_wait(struct i915_request *to,
}
intel_ring_advance(to, cs);
+ return 0;
+}
+
+static int
+emit_semaphore_wait(struct i915_request *to,
+ struct i915_request *from,
+ gfp_t gfp)
+{
+ /* Just emit the first semaphore we see as request space is limited. */
+ if (already_busywaiting(to) & from->engine->mask)
+ goto await_fence;
+
+ if (i915_request_await_start(to, from) < 0)
+ goto await_fence;
+
+ /* Only submit our spinner after the signaler is running! */
+ if (__await_execution(to, from, NULL, gfp))
+ goto await_fence;
+
+ if (__emit_semaphore_wait(to, from, from->fence.seqno))
+ goto await_fence;
+
to->sched.semaphores |= from->engine->mask;
to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN;
return 0;
@@ -980,6 +994,57 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence)
return 0;
}
+static bool intel_timeline_sync_has_start(struct intel_timeline *tl,
+ struct dma_fence *fence)
+{
+ return __intel_timeline_sync_is_later(tl,
+ fence->context,
+ fence->seqno - 1);
+}
+
+static int intel_timeline_sync_set_start(struct intel_timeline *tl,
+ const struct dma_fence *fence)
+{
+ return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1);
+}
+
+static int
+__i915_request_await_execution(struct i915_request *to,
+ struct i915_request *from,
+ void (*hook)(struct i915_request *rq,
+ struct dma_fence *signal))
+{
+ int err;
+
+ /* Submit both requests at the same time */
+ err = __await_execution(to, from, hook, I915_FENCE_GFP);
+ if (err)
+ return err;
+
+ /* Squash repeated depenendices to the same timelines */
+ if (intel_timeline_sync_has_start(i915_request_timeline(to),
+ &from->fence))
+ return 0;
+
+ /* Ensure both start together [after all semaphores in signal] */
+ if (intel_engine_has_semaphores(to->engine))
+ err = __emit_semaphore_wait(to, from, from->fence.seqno - 1);
+ else
+ err = i915_request_await_start(to, from);
+ if (err < 0)
+ return err;
+
+ /* Couple the dependency tree for PI on this exposed to->fence */
+ if (to->engine->schedule) {
+ err = i915_sched_node_add_dependency(&to->sched, &from->sched);
+ if (err < 0)
+ return err;
+ }
+
+ return intel_timeline_sync_set_start(i915_request_timeline(to),
+ &from->fence);
+}
+
int
i915_request_await_execution(struct i915_request *rq,
struct dma_fence *fence,
@@ -1013,8 +1078,7 @@ i915_request_await_execution(struct i915_request *rq,
if (dma_fence_is_i915(fence))
ret = __i915_request_await_execution(rq,
to_request(fence),
- hook,
- I915_FENCE_GFP);
+ hook);
else
ret = i915_sw_fence_await_dma_fence(&rq->submit, fence,
I915_FENCE_TIMEOUT,
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 010d67f48ad9..247a9671bca5 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -474,7 +474,6 @@ void i915_sched_node_fini(struct i915_sched_node *node)
* so we may be called out-of-order.
*/
list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) {
- GEM_BUG_ON(!node_signaled(dep->signaler));
GEM_BUG_ON(!list_empty(&dep->dfs_link));
list_del(&dep->wait_link);
diff --git a/drivers/gpu/drm/i915/i915_sw_fence_work.c b/drivers/gpu/drm/i915/i915_sw_fence_work.c
index 07552cd544f2..8538ee7a521d 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence_work.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence_work.c
@@ -78,12 +78,11 @@ static const struct dma_fence_ops fence_ops = {
void dma_fence_work_init(struct dma_fence_work *f,
const struct dma_fence_work_ops *ops)
{
+ f->ops = ops;
spin_lock_init(&f->lock);
dma_fence_init(&f->dma, &fence_ops, &f->lock, 0, 0);
i915_sw_fence_init(&f->chain, fence_notify);
INIT_WORK(&f->work, fence_work);
-
- f->ops = ops;
}
int dma_fence_work_chain(struct dma_fence_work *f, struct dma_fence *signal)
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index e5512f26e20a..01c822256b39 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -1104,8 +1104,14 @@ int i915_vma_move_to_active(struct i915_vma *vma,
return err;
if (flags & EXEC_OBJECT_WRITE) {
- if (intel_frontbuffer_invalidate(obj->frontbuffer, ORIGIN_CS))
- i915_active_add_request(&obj->frontbuffer->write, rq);
+ struct intel_frontbuffer *front;
+
+ front = __intel_frontbuffer_get(obj);
+ if (unlikely(front)) {
+ if (intel_frontbuffer_invalidate(front, ORIGIN_CS))
+ i915_active_add_request(&front->write, rq);
+ intel_frontbuffer_put(front);
+ }
dma_resv_add_excl_fence(vma->resv, &rq->fence);
obj->write_domain = I915_GEM_DOMAIN_RENDER;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 809bff955b5a..75ae6f495161 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -4291,8 +4291,8 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *crtc_state,
&crtc_state->wm.skl.optimal.planes[plane_id];
if (plane_id == PLANE_CURSOR) {
- if (WARN_ON(wm->wm[level].min_ddb_alloc >
- total[PLANE_CURSOR])) {
+ if (wm->wm[level].min_ddb_alloc > total[PLANE_CURSOR]) {
+ WARN_ON(wm->wm[level].min_ddb_alloc != U16_MAX);
blocks = U32_MAX;
break;
}
diff --git a/drivers/gpu/drm/mcde/mcde_dsi.c b/drivers/gpu/drm/mcde/mcde_dsi.c
index d6214d3c8b33..ef4c630afe3f 100644
--- a/drivers/gpu/drm/mcde/mcde_dsi.c
+++ b/drivers/gpu/drm/mcde/mcde_dsi.c
@@ -935,11 +935,13 @@ static int mcde_dsi_bind(struct device *dev, struct device *master,
for_each_available_child_of_node(dev->of_node, child) {
panel = of_drm_find_panel(child);
if (IS_ERR(panel)) {
- dev_err(dev, "failed to find panel try bridge (%lu)\n",
+ dev_err(dev, "failed to find panel try bridge (%ld)\n",
PTR_ERR(panel));
+ panel = NULL;
+
bridge = of_drm_find_bridge(child);
if (IS_ERR(bridge)) {
- dev_err(dev, "failed to find bridge (%lu)\n",
+ dev_err(dev, "failed to find bridge (%ld)\n",
PTR_ERR(bridge));
return PTR_ERR(bridge);
}
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index f80a8ba75977..3305a94fc930 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -215,11 +215,12 @@ struct mtk_ddp_comp *mtk_drm_ddp_comp_for_plane(struct drm_crtc *crtc,
struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
struct mtk_ddp_comp *comp;
int i, count = 0;
+ unsigned int local_index = plane - mtk_crtc->planes;
for (i = 0; i < mtk_crtc->ddp_comp_nr; i++) {
comp = mtk_crtc->ddp_comp[i];
- if (plane->index < (count + mtk_ddp_comp_layer_nr(comp))) {
- *local_layer = plane->index - count;
+ if (local_index < (count + mtk_ddp_comp_layer_nr(comp))) {
+ *local_layer = local_index - count;
return comp;
}
count += mtk_ddp_comp_layer_nr(comp);
@@ -310,7 +311,9 @@ static int mtk_crtc_ddp_hw_init(struct mtk_drm_crtc *mtk_crtc)
plane_state = to_mtk_plane_state(plane->state);
comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer);
- mtk_ddp_comp_layer_config(comp, local_layer, plane_state);
+ if (comp)
+ mtk_ddp_comp_layer_config(comp, local_layer,
+ plane_state);
}
return 0;
@@ -386,8 +389,9 @@ static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
comp = mtk_drm_ddp_comp_for_plane(crtc, plane,
&local_layer);
- mtk_ddp_comp_layer_config(comp, local_layer,
- plane_state);
+ if (comp)
+ mtk_ddp_comp_layer_config(comp, local_layer,
+ plane_state);
plane_state->pending.config = false;
}
mtk_crtc->pending_planes = false;
@@ -401,7 +405,9 @@ int mtk_drm_crtc_plane_check(struct drm_crtc *crtc, struct drm_plane *plane,
struct mtk_ddp_comp *comp;
comp = mtk_drm_ddp_comp_for_plane(crtc, plane, &local_layer);
- return mtk_ddp_comp_layer_check(comp, local_layer, state);
+ if (comp)
+ return mtk_ddp_comp_layer_check(comp, local_layer, state);
+ return 0;
}
static void mtk_drm_crtc_atomic_enable(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index e9931bbbe846..d77c9f484ce3 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -230,28 +230,25 @@ static void mtk_dsi_mask(struct mtk_dsi *dsi, u32 offset, u32 mask, u32 data)
static void mtk_dsi_phy_timconfig(struct mtk_dsi *dsi)
{
u32 timcon0, timcon1, timcon2, timcon3;
- u32 ui, cycle_time;
+ u32 data_rate_mhz = DIV_ROUND_UP(dsi->data_rate, 1000000);
struct mtk_phy_timing *timing = &dsi->phy_timing;
- ui = DIV_ROUND_UP(1000000000, dsi->data_rate);
- cycle_time = div_u64(8000000000ULL, dsi->data_rate);
+ timing->lpx = (60 * data_rate_mhz / (8 * 1000)) + 1;
+ timing->da_hs_prepare = (80 * data_rate_mhz + 4 * 1000) / 8000;
+ timing->da_hs_zero = (170 * data_rate_mhz + 10 * 1000) / 8000 + 1 -
+ timing->da_hs_prepare;
+ timing->da_hs_trail = timing->da_hs_prepare + 1;
- timing->lpx = NS_TO_CYCLE(60, cycle_time);
- timing->da_hs_prepare = NS_TO_CYCLE(50 + 5 * ui, cycle_time);
- timing->da_hs_zero = NS_TO_CYCLE(110 + 6 * ui, cycle_time);
- timing->da_hs_trail = NS_TO_CYCLE(77 + 4 * ui, cycle_time);
+ timing->ta_go = 4 * timing->lpx - 2;
+ timing->ta_sure = timing->lpx + 2;
+ timing->ta_get = 4 * timing->lpx;
+ timing->da_hs_exit = 2 * timing->lpx + 1;
- timing->ta_go = 4 * timing->lpx;
- timing->ta_sure = 3 * timing->lpx / 2;
- timing->ta_get = 5 * timing->lpx;
- timing->da_hs_exit = 2 * timing->lpx;
-
- timing->clk_hs_zero = NS_TO_CYCLE(336, cycle_time);
- timing->clk_hs_trail = NS_TO_CYCLE(100, cycle_time) + 10;
-
- timing->clk_hs_prepare = NS_TO_CYCLE(64, cycle_time);
- timing->clk_hs_post = NS_TO_CYCLE(80 + 52 * ui, cycle_time);
- timing->clk_hs_exit = 2 * timing->lpx;
+ timing->clk_hs_prepare = 70 * data_rate_mhz / (8 * 1000);
+ timing->clk_hs_post = timing->clk_hs_prepare + 8;
+ timing->clk_hs_trail = timing->clk_hs_prepare;
+ timing->clk_hs_zero = timing->clk_hs_trail * 4;
+ timing->clk_hs_exit = 2 * timing->clk_hs_trail;
timcon0 = timing->lpx | timing->da_hs_prepare << 8 |
timing->da_hs_zero << 16 | timing->da_hs_trail << 24;
@@ -482,27 +479,39 @@ static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi)
dsi_tmp_buf_bpp - 10);
data_phy_cycles = timing->lpx + timing->da_hs_prepare +
- timing->da_hs_zero + timing->da_hs_exit + 2;
+ timing->da_hs_zero + timing->da_hs_exit + 3;
if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) {
- if (vm->hfront_porch * dsi_tmp_buf_bpp >
+ if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp >
data_phy_cycles * dsi->lanes + 18) {
- horizontal_frontporch_byte = vm->hfront_porch *
- dsi_tmp_buf_bpp -
- data_phy_cycles *
- dsi->lanes - 18;
+ horizontal_frontporch_byte =
+ vm->hfront_porch * dsi_tmp_buf_bpp -
+ (data_phy_cycles * dsi->lanes + 18) *
+ vm->hfront_porch /
+ (vm->hfront_porch + vm->hback_porch);
+
+ horizontal_backporch_byte =
+ horizontal_backporch_byte -
+ (data_phy_cycles * dsi->lanes + 18) *
+ vm->hback_porch /
+ (vm->hfront_porch + vm->hback_porch);
} else {
DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n");
horizontal_frontporch_byte = vm->hfront_porch *
dsi_tmp_buf_bpp;
}
} else {
- if (vm->hfront_porch * dsi_tmp_buf_bpp >
+ if ((vm->hfront_porch + vm->hback_porch) * dsi_tmp_buf_bpp >
data_phy_cycles * dsi->lanes + 12) {
- horizontal_frontporch_byte = vm->hfront_porch *
- dsi_tmp_buf_bpp -
- data_phy_cycles *
- dsi->lanes - 12;
+ horizontal_frontporch_byte =
+ vm->hfront_porch * dsi_tmp_buf_bpp -
+ (data_phy_cycles * dsi->lanes + 12) *
+ vm->hfront_porch /
+ (vm->hfront_porch + vm->hback_porch);
+ horizontal_backporch_byte = horizontal_backporch_byte -
+ (data_phy_cycles * dsi->lanes + 12) *
+ vm->hback_porch /
+ (vm->hfront_porch + vm->hback_porch);
} else {
DRM_WARN("HFP less than d-phy, FPS will under 60Hz\n");
horizontal_frontporch_byte = vm->hfront_porch *
diff --git a/drivers/gpu/drm/meson/meson_venc_cvbs.c b/drivers/gpu/drm/meson/meson_venc_cvbs.c
index 9ab27aecfcf3..1bd6b6d15ffb 100644
--- a/drivers/gpu/drm/meson/meson_venc_cvbs.c
+++ b/drivers/gpu/drm/meson/meson_venc_cvbs.c
@@ -64,6 +64,25 @@ struct meson_cvbs_mode meson_cvbs_modes[MESON_CVBS_MODES_COUNT] = {
},
};
+static const struct meson_cvbs_mode *
+meson_cvbs_get_mode(const struct drm_display_mode *req_mode)
+{
+ int i;
+
+ for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) {
+ struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i];
+
+ if (drm_mode_match(req_mode, &meson_mode->mode,
+ DRM_MODE_MATCH_TIMINGS |
+ DRM_MODE_MATCH_CLOCK |
+ DRM_MODE_MATCH_FLAGS |
+ DRM_MODE_MATCH_3D_FLAGS))
+ return meson_mode;
+ }
+
+ return NULL;
+}
+
/* Connector */
static void meson_cvbs_connector_destroy(struct drm_connector *connector)
@@ -136,14 +155,8 @@ static int meson_venc_cvbs_encoder_atomic_check(struct drm_encoder *encoder,
struct drm_crtc_state *crtc_state,
struct drm_connector_state *conn_state)
{
- int i;
-
- for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) {
- struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i];
-
- if (drm_mode_equal(&crtc_state->mode, &meson_mode->mode))
- return 0;
- }
+ if (meson_cvbs_get_mode(&crtc_state->mode))
+ return 0;
return -EINVAL;
}
@@ -191,24 +204,17 @@ static void meson_venc_cvbs_encoder_mode_set(struct drm_encoder *encoder,
struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
+ const struct meson_cvbs_mode *meson_mode = meson_cvbs_get_mode(mode);
struct meson_venc_cvbs *meson_venc_cvbs =
encoder_to_meson_venc_cvbs(encoder);
struct meson_drm *priv = meson_venc_cvbs->priv;
- int i;
- for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) {
- struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i];
+ if (meson_mode) {
+ meson_venci_cvbs_mode_set(priv, meson_mode->enci);
- if (drm_mode_equal(mode, &meson_mode->mode)) {
- meson_venci_cvbs_mode_set(priv,
- meson_mode->enci);
-
- /* Setup 27MHz vclk2 for ENCI and VDAC */
- meson_vclk_setup(priv, MESON_VCLK_TARGET_CVBS,
- MESON_VCLK_CVBS, MESON_VCLK_CVBS,
- MESON_VCLK_CVBS, true);
- break;
- }
+ /* Setup 27MHz vclk2 for ENCI and VDAC */
+ meson_vclk_setup(priv, MESON_VCLK_TARGET_CVBS, MESON_VCLK_CVBS,
+ MESON_VCLK_CVBS, MESON_VCLK_CVBS, true);
}
}
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
index d43951caeea0..b113876c2428 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -30,9 +30,8 @@ module_param_named(modeset, mgag200_modeset, int, 0400);
static struct drm_driver driver;
static const struct pci_device_id pciidlist[] = {
- { PCI_VENDOR_ID_MATROX, 0x522, PCI_VENDOR_ID_SUN, 0x4852, 0, 0,
+ { PCI_VENDOR_ID_MATROX, 0x522, PCI_ANY_ID, PCI_ANY_ID, 0, 0,
G200_SE_A | MGAG200_FLAG_HW_BUG_NO_STARTADD},
- { PCI_VENDOR_ID_MATROX, 0x522, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_A },
{ PCI_VENDOR_ID_MATROX, 0x524, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_SE_B },
{ PCI_VENDOR_ID_MATROX, 0x530, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_EV },
{ PCI_VENDOR_ID_MATROX, 0x532, PCI_ANY_ID, PCI_ANY_ID, 0, 0, G200_WB },
diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h
index 43df86c38f58..24f7700768da 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/atom.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h
@@ -114,6 +114,7 @@ struct nv50_head_atom {
u8 nhsync:1;
u8 nvsync:1;
u8 depth:4;
+ u8 bpc;
} or;
/* Currently only used for MST */
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 549486f1d937..63425e246018 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -326,9 +326,9 @@ nv50_outp_atomic_check_view(struct drm_encoder *encoder,
* same size as the native one (e.g. different
* refresh rate)
*/
- if (adjusted_mode->hdisplay == native_mode->hdisplay &&
- adjusted_mode->vdisplay == native_mode->vdisplay &&
- adjusted_mode->type & DRM_MODE_TYPE_DRIVER)
+ if (mode->hdisplay == native_mode->hdisplay &&
+ mode->vdisplay == native_mode->vdisplay &&
+ mode->type & DRM_MODE_TYPE_DRIVER)
break;
mode = native_mode;
asyc->scaler.full = true;
@@ -353,10 +353,20 @@ nv50_outp_atomic_check(struct drm_encoder *encoder,
struct drm_crtc_state *crtc_state,
struct drm_connector_state *conn_state)
{
- struct nouveau_connector *nv_connector =
- nouveau_connector(conn_state->connector);
- return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
- nv_connector->native_mode);
+ struct drm_connector *connector = conn_state->connector;
+ struct nouveau_connector *nv_connector = nouveau_connector(connector);
+ struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
+ int ret;
+
+ ret = nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
+ nv_connector->native_mode);
+ if (ret)
+ return ret;
+
+ if (crtc_state->mode_changed || crtc_state->connectors_changed)
+ asyh->or.bpc = connector->display_info.bpc;
+
+ return 0;
}
/******************************************************************************
@@ -770,32 +780,54 @@ nv50_msto_atomic_check(struct drm_encoder *encoder,
struct nv50_mstm *mstm = mstc->mstm;
struct nv50_head_atom *asyh = nv50_head_atom(crtc_state);
int slots;
+ int ret;
+
+ ret = nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
+ mstc->native);
+ if (ret)
+ return ret;
+
+ if (!crtc_state->mode_changed && !crtc_state->connectors_changed)
+ return 0;
+
+ /*
+ * When restoring duplicated states, we need to make sure that the bw
+ * remains the same and avoid recalculating it, as the connector's bpc
+ * may have changed after the state was duplicated
+ */
+ if (!state->duplicated) {
+ const int clock = crtc_state->adjusted_mode.clock;
- if (crtc_state->mode_changed || crtc_state->connectors_changed) {
/*
- * When restoring duplicated states, we need to make sure that
- * the bw remains the same and avoid recalculating it, as the
- * connector's bpc may have changed after the state was
- * duplicated
+ * XXX: Since we don't use HDR in userspace quite yet, limit
+ * the bpc to 8 to save bandwidth on the topology. In the
+ * future, we'll want to properly fix this by dynamically
+ * selecting the highest possible bpc that would fit in the
+ * topology
*/
- if (!state->duplicated) {
- const int bpp = connector->display_info.bpc * 3;
- const int clock = crtc_state->adjusted_mode.clock;
+ asyh->or.bpc = min(connector->display_info.bpc, 8U);
+ asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, asyh->or.bpc * 3);
+ }
- asyh->dp.pbn = drm_dp_calc_pbn_mode(clock, bpp);
- }
+ slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr, mstc->port,
+ asyh->dp.pbn);
+ if (slots < 0)
+ return slots;
- slots = drm_dp_atomic_find_vcpi_slots(state, &mstm->mgr,
- mstc->port,
- asyh->dp.pbn);
- if (slots < 0)
- return slots;
+ asyh->dp.tu = slots;
- asyh->dp.tu = slots;
- }
+ return 0;
+}
- return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
- mstc->native);
+static u8
+nv50_dp_bpc_to_depth(unsigned int bpc)
+{
+ switch (bpc) {
+ case 6: return 0x2;
+ case 8: return 0x5;
+ case 10: /* fall-through */
+ default: return 0x6;
+ }
}
static void
@@ -808,7 +840,7 @@ nv50_msto_enable(struct drm_encoder *encoder)
struct nv50_mstm *mstm = NULL;
struct drm_connector *connector;
struct drm_connector_list_iter conn_iter;
- u8 proto, depth;
+ u8 proto;
bool r;
drm_connector_list_iter_begin(encoder->dev, &conn_iter);
@@ -837,14 +869,8 @@ nv50_msto_enable(struct drm_encoder *encoder)
else
proto = 0x9;
- switch (mstc->connector.display_info.bpc) {
- case 6: depth = 0x2; break;
- case 8: depth = 0x5; break;
- case 10:
- default: depth = 0x6; break;
- }
-
- mstm->outp->update(mstm->outp, head->base.index, armh, proto, depth);
+ mstm->outp->update(mstm->outp, head->base.index, armh, proto,
+ nv50_dp_bpc_to_depth(armh->or.bpc));
msto->head = head;
msto->mstc = mstc;
@@ -1498,20 +1524,14 @@ nv50_sor_enable(struct drm_encoder *encoder)
lvds.lvds.script |= 0x0200;
}
- if (nv_connector->base.display_info.bpc == 8)
+ if (asyh->or.bpc == 8)
lvds.lvds.script |= 0x0200;
}
nvif_mthd(&disp->disp->object, 0, &lvds, sizeof(lvds));
break;
case DCB_OUTPUT_DP:
- if (nv_connector->base.display_info.bpc == 6)
- depth = 0x2;
- else
- if (nv_connector->base.display_info.bpc == 8)
- depth = 0x5;
- else
- depth = 0x6;
+ depth = nv50_dp_bpc_to_depth(asyh->or.bpc);
if (nv_encoder->link & 1)
proto = 0x8;
@@ -1662,7 +1682,7 @@ nv50_pior_enable(struct drm_encoder *encoder)
nv50_outp_acquire(nv_encoder);
nv_connector = nouveau_encoder_connector_get(nv_encoder);
- switch (nv_connector->base.display_info.bpc) {
+ switch (asyh->or.bpc) {
case 10: asyh->or.depth = 0x6; break;
case 8: asyh->or.depth = 0x5; break;
case 6: asyh->or.depth = 0x2; break;
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
index 71c23bf1fe25..c9692df2b76c 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
@@ -81,18 +81,17 @@ nv50_head_atomic_check_dither(struct nv50_head_atom *armh,
struct nv50_head_atom *asyh,
struct nouveau_conn_atom *asyc)
{
- struct drm_connector *connector = asyc->state.connector;
u32 mode = 0x00;
if (asyc->dither.mode == DITHERING_MODE_AUTO) {
- if (asyh->base.depth > connector->display_info.bpc * 3)
+ if (asyh->base.depth > asyh->or.bpc * 3)
mode = DITHERING_MODE_DYNAMIC2X2;
} else {
mode = asyc->dither.mode;
}
if (asyc->dither.depth == DITHERING_DEPTH_AUTO) {
- if (connector->display_info.bpc >= 8)
+ if (asyh->or.bpc >= 8)
mode |= DITHERING_DEPTH_8BPC;
} else {
mode |= asyc->dither.depth;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 5b413588b823..9a9a7f5003d3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -245,14 +245,22 @@ nouveau_conn_atomic_duplicate_state(struct drm_connector *connector)
void
nouveau_conn_reset(struct drm_connector *connector)
{
+ struct nouveau_connector *nv_connector = nouveau_connector(connector);
struct nouveau_conn_atom *asyc;
- if (WARN_ON(!(asyc = kzalloc(sizeof(*asyc), GFP_KERNEL))))
- return;
+ if (drm_drv_uses_atomic_modeset(connector->dev)) {
+ if (WARN_ON(!(asyc = kzalloc(sizeof(*asyc), GFP_KERNEL))))
+ return;
+
+ if (connector->state)
+ nouveau_conn_atomic_destroy_state(connector,
+ connector->state);
+
+ __drm_atomic_helper_connector_reset(connector, &asyc->state);
+ } else {
+ asyc = &nv_connector->properties_state;
+ }
- if (connector->state)
- nouveau_conn_atomic_destroy_state(connector, connector->state);
- __drm_atomic_helper_connector_reset(connector, &asyc->state);
asyc->dither.mode = DITHERING_MODE_AUTO;
asyc->dither.depth = DITHERING_DEPTH_AUTO;
asyc->scaler.mode = DRM_MODE_SCALE_NONE;
@@ -276,8 +284,14 @@ void
nouveau_conn_attach_properties(struct drm_connector *connector)
{
struct drm_device *dev = connector->dev;
- struct nouveau_conn_atom *armc = nouveau_conn_atom(connector->state);
struct nouveau_display *disp = nouveau_display(dev);
+ struct nouveau_connector *nv_connector = nouveau_connector(connector);
+ struct nouveau_conn_atom *armc;
+
+ if (drm_drv_uses_atomic_modeset(connector->dev))
+ armc = nouveau_conn_atom(connector->state);
+ else
+ armc = &nv_connector->properties_state;
/* Init DVI-I specific properties. */
if (connector->connector_type == DRM_MODE_CONNECTOR_DVII)
@@ -748,9 +762,9 @@ static int
nouveau_connector_set_property(struct drm_connector *connector,
struct drm_property *property, uint64_t value)
{
- struct nouveau_conn_atom *asyc = nouveau_conn_atom(connector->state);
struct nouveau_connector *nv_connector = nouveau_connector(connector);
struct nouveau_encoder *nv_encoder = nv_connector->detected_encoder;
+ struct nouveau_conn_atom *asyc = &nv_connector->properties_state;
struct drm_encoder *encoder = to_drm_encoder(nv_encoder);
int ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index f43a8d63aef8..de84fb4708c7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -29,6 +29,7 @@
#include <nvif/notify.h>
+#include <drm/drm_crtc.h>
#include <drm/drm_edid.h>
#include <drm/drm_encoder.h>
#include <drm/drm_dp_helper.h>
@@ -44,6 +45,60 @@ struct dcb_output;
struct nouveau_backlight;
#endif
+#define nouveau_conn_atom(p) \
+ container_of((p), struct nouveau_conn_atom, state)
+
+struct nouveau_conn_atom {
+ struct drm_connector_state state;
+
+ struct {
+ /* The enum values specifically defined here match nv50/gf119
+ * hw values, and the code relies on this.
+ */
+ enum {
+ DITHERING_MODE_OFF = 0x00,
+ DITHERING_MODE_ON = 0x01,
+ DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON,
+ DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON,
+ DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON,
+ DITHERING_MODE_AUTO
+ } mode;
+ enum {
+ DITHERING_DEPTH_6BPC = 0x00,
+ DITHERING_DEPTH_8BPC = 0x02,
+ DITHERING_DEPTH_AUTO
+ } depth;
+ } dither;
+
+ struct {
+ int mode; /* DRM_MODE_SCALE_* */
+ struct {
+ enum {
+ UNDERSCAN_OFF,
+ UNDERSCAN_ON,
+ UNDERSCAN_AUTO,
+ } mode;
+ u32 hborder;
+ u32 vborder;
+ } underscan;
+ bool full;
+ } scaler;
+
+ struct {
+ int color_vibrance;
+ int vibrant_hue;
+ } procamp;
+
+ union {
+ struct {
+ bool dither:1;
+ bool scaler:1;
+ bool procamp:1;
+ };
+ u8 mask;
+ } set;
+};
+
struct nouveau_connector {
struct drm_connector base;
enum dcb_connector_type type;
@@ -63,6 +118,12 @@ struct nouveau_connector {
#ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
struct nouveau_backlight *backlight;
#endif
+ /*
+ * Our connector property code expects a nouveau_conn_atom struct
+ * even on pre-nv50 where we do not support atomic. This embedded
+ * version gets used in the non atomic modeset case.
+ */
+ struct nouveau_conn_atom properties_state;
};
static inline struct nouveau_connector *nouveau_connector(
@@ -121,61 +182,6 @@ extern int nouveau_ignorelid;
extern int nouveau_duallink;
extern int nouveau_hdmimhz;
-#include <drm/drm_crtc.h>
-#define nouveau_conn_atom(p) \
- container_of((p), struct nouveau_conn_atom, state)
-
-struct nouveau_conn_atom {
- struct drm_connector_state state;
-
- struct {
- /* The enum values specifically defined here match nv50/gf119
- * hw values, and the code relies on this.
- */
- enum {
- DITHERING_MODE_OFF = 0x00,
- DITHERING_MODE_ON = 0x01,
- DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON,
- DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON,
- DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON,
- DITHERING_MODE_AUTO
- } mode;
- enum {
- DITHERING_DEPTH_6BPC = 0x00,
- DITHERING_DEPTH_8BPC = 0x02,
- DITHERING_DEPTH_AUTO
- } depth;
- } dither;
-
- struct {
- int mode; /* DRM_MODE_SCALE_* */
- struct {
- enum {
- UNDERSCAN_OFF,
- UNDERSCAN_ON,
- UNDERSCAN_AUTO,
- } mode;
- u32 hborder;
- u32 vborder;
- } underscan;
- bool full;
- } scaler;
-
- struct {
- int color_vibrance;
- int vibrant_hue;
- } procamp;
-
- union {
- struct {
- bool dither:1;
- bool scaler:1;
- bool procamp:1;
- };
- u8 mask;
- } set;
-};
-
void nouveau_conn_attach_properties(struct drm_connector *);
void nouveau_conn_reset(struct drm_connector *);
struct drm_connector_state *
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index 4c4e8a30a1ac..536ba93b0f46 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -18,15 +18,18 @@ static void panfrost_devfreq_update_utilization(struct panfrost_device *pfdev);
static int panfrost_devfreq_target(struct device *dev, unsigned long *freq,
u32 flags)
{
- struct panfrost_device *pfdev = dev_get_drvdata(dev);
+ struct dev_pm_opp *opp;
int err;
+ opp = devfreq_recommended_opp(dev, freq, flags);
+ if (IS_ERR(opp))
+ return PTR_ERR(opp);
+ dev_pm_opp_put(opp);
+
err = dev_pm_opp_set_rate(dev, *freq);
if (err)
return err;
- *freq = clk_get_rate(pfdev->clock);
-
return 0;
}
@@ -60,20 +63,10 @@ static int panfrost_devfreq_get_dev_status(struct device *dev,
return 0;
}
-static int panfrost_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
-{
- struct panfrost_device *pfdev = platform_get_drvdata(to_platform_device(dev));
-
- *freq = clk_get_rate(pfdev->clock);
-
- return 0;
-}
-
static struct devfreq_dev_profile panfrost_devfreq_profile = {
.polling_ms = 50, /* ~3 frames */
.target = panfrost_devfreq_target,
.get_dev_status = panfrost_devfreq_get_dev_status,
- .get_cur_freq = panfrost_devfreq_get_cur_freq,
};
int panfrost_devfreq_init(struct panfrost_device *pfdev)
diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
index 9458dc6c750c..f61364f7c471 100644
--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
+++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
@@ -303,14 +303,17 @@ static int panfrost_ioctl_mmap_bo(struct drm_device *dev, void *data,
}
/* Don't allow mmapping of heap objects as pages are not pinned. */
- if (to_panfrost_bo(gem_obj)->is_heap)
- return -EINVAL;
+ if (to_panfrost_bo(gem_obj)->is_heap) {
+ ret = -EINVAL;
+ goto out;
+ }
ret = drm_gem_create_mmap_offset(gem_obj);
if (ret == 0)
args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node);
- drm_gem_object_put_unlocked(gem_obj);
+out:
+ drm_gem_object_put_unlocked(gem_obj);
return ret;
}
@@ -347,20 +350,19 @@ static int panfrost_ioctl_madvise(struct drm_device *dev, void *data,
return -ENOENT;
}
+ mutex_lock(&pfdev->shrinker_lock);
args->retained = drm_gem_shmem_madvise(gem_obj, args->madv);
if (args->retained) {
struct panfrost_gem_object *bo = to_panfrost_bo(gem_obj);
- mutex_lock(&pfdev->shrinker_lock);
-
if (args->madv == PANFROST_MADV_DONTNEED)
- list_add_tail(&bo->base.madv_list, &pfdev->shrinker_list);
+ list_add_tail(&bo->base.madv_list,
+ &pfdev->shrinker_list);
else if (args->madv == PANFROST_MADV_WILLNEED)
list_del_init(&bo->base.madv_list);
-
- mutex_unlock(&pfdev->shrinker_lock);
}
+ mutex_unlock(&pfdev->shrinker_lock);
drm_gem_object_put_unlocked(gem_obj);
return 0;
@@ -443,7 +445,7 @@ panfrost_postclose(struct drm_device *dev, struct drm_file *file)
{
struct panfrost_file_priv *panfrost_priv = file->driver_priv;
- panfrost_perfcnt_close(panfrost_priv);
+ panfrost_perfcnt_close(file);
panfrost_job_close(panfrost_priv);
panfrost_mmu_pgtable_free(panfrost_priv);
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
index deca0c30bbd4..fd766b1395fb 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
@@ -19,6 +19,16 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj)
struct panfrost_gem_object *bo = to_panfrost_bo(obj);
struct panfrost_device *pfdev = obj->dev->dev_private;
+ /*
+ * Make sure the BO is no longer inserted in the shrinker list before
+ * taking care of the destruction itself. If we don't do that we have a
+ * race condition between this function and what's done in
+ * panfrost_gem_shrinker_scan().
+ */
+ mutex_lock(&pfdev->shrinker_lock);
+ list_del_init(&bo->base.madv_list);
+ mutex_unlock(&pfdev->shrinker_lock);
+
if (bo->sgts) {
int i;
int n_sgt = bo->base.base.size / SZ_2M;
@@ -33,15 +43,10 @@ static void panfrost_gem_free_object(struct drm_gem_object *obj)
kfree(bo->sgts);
}
- mutex_lock(&pfdev->shrinker_lock);
- if (!list_empty(&bo->base.madv_list))
- list_del(&bo->base.madv_list);
- mutex_unlock(&pfdev->shrinker_lock);
-
drm_gem_shmem_free_object(obj);
}
-static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv)
+int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv)
{
int ret;
size_t size = obj->size;
@@ -80,7 +85,7 @@ static int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_p
return ret;
}
-static void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv)
+void panfrost_gem_close(struct drm_gem_object *obj, struct drm_file *file_priv)
{
struct panfrost_gem_object *bo = to_panfrost_bo(obj);
struct panfrost_file_priv *priv = file_priv->driver_priv;
diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
index 50920819cc16..4b17e7308764 100644
--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
+++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
@@ -45,6 +45,10 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv,
u32 flags,
uint32_t *handle);
+int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv);
+void panfrost_gem_close(struct drm_gem_object *obj,
+ struct drm_file *file_priv);
+
void panfrost_gem_shrinker_init(struct drm_device *dev);
void panfrost_gem_shrinker_cleanup(struct drm_device *dev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
index 2dba192bf198..2c04e858c50a 100644
--- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.c
@@ -67,9 +67,10 @@ static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
}
static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
- struct panfrost_file_priv *user,
+ struct drm_file *file_priv,
unsigned int counterset)
{
+ struct panfrost_file_priv *user = file_priv->driver_priv;
struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
struct drm_gem_shmem_object *bo;
u32 cfg;
@@ -91,14 +92,14 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
perfcnt->bo = to_panfrost_bo(&bo->base);
/* Map the perfcnt buf in the address space attached to file_priv. */
- ret = panfrost_mmu_map(perfcnt->bo);
+ ret = panfrost_gem_open(&perfcnt->bo->base.base, file_priv);
if (ret)
goto err_put_bo;
perfcnt->buf = drm_gem_shmem_vmap(&bo->base);
if (IS_ERR(perfcnt->buf)) {
ret = PTR_ERR(perfcnt->buf);
- goto err_put_bo;
+ goto err_close_bo;
}
/*
@@ -157,14 +158,17 @@ static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
err_vunmap:
drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf);
+err_close_bo:
+ panfrost_gem_close(&perfcnt->bo->base.base, file_priv);
err_put_bo:
drm_gem_object_put_unlocked(&bo->base);
return ret;
}
static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
- struct panfrost_file_priv *user)
+ struct drm_file *file_priv)
{
+ struct panfrost_file_priv *user = file_priv->driver_priv;
struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
if (user != perfcnt->user)
@@ -180,6 +184,7 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
perfcnt->user = NULL;
drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf);
perfcnt->buf = NULL;
+ panfrost_gem_close(&perfcnt->bo->base.base, file_priv);
drm_gem_object_put_unlocked(&perfcnt->bo->base.base);
perfcnt->bo = NULL;
pm_runtime_mark_last_busy(pfdev->dev);
@@ -191,7 +196,6 @@ static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
- struct panfrost_file_priv *pfile = file_priv->driver_priv;
struct panfrost_device *pfdev = dev->dev_private;
struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
struct drm_panfrost_perfcnt_enable *req = data;
@@ -207,10 +211,10 @@ int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
mutex_lock(&perfcnt->lock);
if (req->enable)
- ret = panfrost_perfcnt_enable_locked(pfdev, pfile,
+ ret = panfrost_perfcnt_enable_locked(pfdev, file_priv,
req->counterset);
else
- ret = panfrost_perfcnt_disable_locked(pfdev, pfile);
+ ret = panfrost_perfcnt_disable_locked(pfdev, file_priv);
mutex_unlock(&perfcnt->lock);
return ret;
@@ -248,15 +252,16 @@ out:
return ret;
}
-void panfrost_perfcnt_close(struct panfrost_file_priv *pfile)
+void panfrost_perfcnt_close(struct drm_file *file_priv)
{
+ struct panfrost_file_priv *pfile = file_priv->driver_priv;
struct panfrost_device *pfdev = pfile->pfdev;
struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
pm_runtime_get_sync(pfdev->dev);
mutex_lock(&perfcnt->lock);
if (perfcnt->user == pfile)
- panfrost_perfcnt_disable_locked(pfdev, pfile);
+ panfrost_perfcnt_disable_locked(pfdev, file_priv);
mutex_unlock(&perfcnt->lock);
pm_runtime_mark_last_busy(pfdev->dev);
pm_runtime_put_autosuspend(pfdev->dev);
diff --git a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
index 13b8fdaa1b43..8bbcf5f5fb33 100644
--- a/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
+++ b/drivers/gpu/drm/panfrost/panfrost_perfcnt.h
@@ -9,7 +9,7 @@ void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev);
void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev);
int panfrost_perfcnt_init(struct panfrost_device *pfdev);
void panfrost_perfcnt_fini(struct panfrost_device *pfdev);
-void panfrost_perfcnt_close(struct panfrost_file_priv *pfile);
+void panfrost_perfcnt_close(struct drm_file *file_priv);
int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index a7c4654445c7..68d4644ac2dc 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -685,8 +685,6 @@ static void sun4i_hdmi_unbind(struct device *dev, struct device *master,
struct sun4i_hdmi *hdmi = dev_get_drvdata(dev);
cec_unregister_adapter(hdmi->cec_adap);
- drm_connector_cleanup(&hdmi->connector);
- drm_encoder_cleanup(&hdmi->encoder);
i2c_del_adapter(hdmi->i2c);
i2c_put_adapter(hdmi->ddc_i2c);
clk_disable_unprepare(hdmi->mod_clk);
diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c
index 8063b1d567b1..e6e4c841fb06 100644
--- a/drivers/hid/hid-asus.c
+++ b/drivers/hid/hid-asus.c
@@ -261,7 +261,8 @@ static int asus_event(struct hid_device *hdev, struct hid_field *field,
struct hid_usage *usage, __s32 value)
{
if ((usage->hid & HID_USAGE_PAGE) == 0xff310000 &&
- (usage->hid & HID_USAGE) != 0x00 && !usage->type) {
+ (usage->hid & HID_USAGE) != 0x00 &&
+ (usage->hid & HID_USAGE) != 0xff && !usage->type) {
hid_warn(hdev, "Unmapped Asus vendor usagepage code 0x%02x\n",
usage->hid & HID_USAGE);
}
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index e0b241bd3070..851fe54ea59e 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -288,6 +288,12 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
offset = report->size;
report->size += parser->global.report_size * parser->global.report_count;
+ /* Total size check: Allow for possible report index byte */
+ if (report->size > (HID_MAX_BUFFER_SIZE - 1) << 3) {
+ hid_err(parser->device, "report is too long\n");
+ return -1;
+ }
+
if (!parser->local.usage_index) /* Ignore padding fields */
return 0;
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index 7e1689ef35f5..3a400ce603c4 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -631,6 +631,7 @@
#define USB_VENDOR_ID_ITE 0x048d
#define USB_DEVICE_ID_ITE_LENOVO_YOGA 0x8386
#define USB_DEVICE_ID_ITE_LENOVO_YOGA2 0x8350
+#define I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720 0x837a
#define USB_DEVICE_ID_ITE_LENOVO_YOGA900 0x8396
#define USB_DEVICE_ID_ITE8595 0x8595
@@ -730,6 +731,7 @@
#define USB_DEVICE_ID_LG_MULTITOUCH 0x0064
#define USB_DEVICE_ID_LG_MELFAS_MT 0x6007
#define I2C_DEVICE_ID_LG_8001 0x8001
+#define I2C_DEVICE_ID_LG_7010 0x7010
#define USB_VENDOR_ID_LOGITECH 0x046d
#define USB_DEVICE_ID_LOGITECH_AUDIOHUB 0x0a0e
@@ -1102,6 +1104,7 @@
#define USB_DEVICE_ID_SYNAPTICS_LTS2 0x1d10
#define USB_DEVICE_ID_SYNAPTICS_HD 0x0ac3
#define USB_DEVICE_ID_SYNAPTICS_QUAD_HD 0x1ac3
+#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012 0x2968
#define USB_DEVICE_ID_SYNAPTICS_TP_V103 0x5710
#define USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5 0x81a7
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 63855f275a38..dea9cc65bf80 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1132,9 +1132,15 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
}
mapped:
- if (device->driver->input_mapped && device->driver->input_mapped(device,
- hidinput, field, usage, &bit, &max) < 0)
- goto ignore;
+ if (device->driver->input_mapped &&
+ device->driver->input_mapped(device, hidinput, field, usage,
+ &bit, &max) < 0) {
+ /*
+ * The driver indicated that no further generic handling
+ * of the usage is desired.
+ */
+ return;
+ }
set_bit(usage->type, input->evbit);
@@ -1215,9 +1221,11 @@ mapped:
set_bit(MSC_SCAN, input->mscbit);
}
-ignore:
return;
+ignore:
+ usage->type = 0;
+ usage->code = 0;
}
static void hidinput_handle_scroll(struct hid_usage *usage,
diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c
index a45f2352618d..c436e12feb23 100644
--- a/drivers/hid/hid-ite.c
+++ b/drivers/hid/hid-ite.c
@@ -40,6 +40,9 @@ static int ite_event(struct hid_device *hdev, struct hid_field *field,
static const struct hid_device_id ite_devices[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_ITE, USB_DEVICE_ID_ITE8595) },
{ HID_USB_DEVICE(USB_VENDOR_ID_258A, USB_DEVICE_ID_258A_6A88) },
+ /* ITE8595 USB kbd ctlr, with Synaptics touchpad connected to it. */
+ { HID_USB_DEVICE(USB_VENDOR_ID_SYNAPTICS,
+ USB_DEVICE_ID_SYNAPTICS_ACER_SWITCH5_012) },
{ }
};
MODULE_DEVICE_TABLE(hid, ite_devices);
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 3cfeb1629f79..362805ddf377 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1019,7 +1019,7 @@ static int mt_process_slot(struct mt_device *td, struct input_dev *input,
tool = MT_TOOL_DIAL;
else if (unlikely(!confidence_state)) {
tool = MT_TOOL_PALM;
- if (!active &&
+ if (!active && mt &&
input_mt_is_active(&mt->slots[slotnum])) {
/*
* The non-confidence was reported for
@@ -1985,6 +1985,9 @@ static const struct hid_device_id mt_devices[] = {
{ .driver_data = MT_CLS_LG,
HID_USB_DEVICE(USB_VENDOR_ID_LG,
USB_DEVICE_ID_LG_MELFAS_MT) },
+ { .driver_data = MT_CLS_LG,
+ HID_DEVICE(BUS_I2C, HID_GROUP_GENERIC,
+ USB_VENDOR_ID_LG, I2C_DEVICE_ID_LG_7010) },
/* MosArt panels */
{ .driver_data = MT_CLS_CONFIDENCE_MINUS_ONE,
diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
index d1b39c29e353..0e7b2d998395 100644
--- a/drivers/hid/hid-quirks.c
+++ b/drivers/hid/hid-quirks.c
@@ -174,6 +174,7 @@ static const struct hid_device_id hid_quirks[] = {
{ HID_USB_DEVICE(USB_VENDOR_ID_WALTOP, USB_DEVICE_ID_WALTOP_SIRIUS_BATTERY_FREE_TABLET), HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP_LTD2, USB_DEVICE_ID_SMARTJOY_DUAL_PLUS), HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT },
{ HID_USB_DEVICE(USB_VENDOR_ID_WISEGROUP, USB_DEVICE_ID_QUAD_USB_JOYPAD), HID_QUIRK_NOGET | HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_XIN_MO, USB_DEVICE_ID_XIN_MO_DUAL_ARCADE), HID_QUIRK_MULTI_INPUT },
{ 0 }
};
diff --git a/drivers/hid/hid-steam.c b/drivers/hid/hid-steam.c
index 8dae0f9b819e..6286204d4c56 100644
--- a/drivers/hid/hid-steam.c
+++ b/drivers/hid/hid-steam.c
@@ -768,8 +768,12 @@ static int steam_probe(struct hid_device *hdev,
if (steam->quirks & STEAM_QUIRK_WIRELESS) {
hid_info(hdev, "Steam wireless receiver connected");
+ /* If using a wireless adaptor ask for connection status */
+ steam->connected = false;
steam_request_conn_status(steam);
} else {
+ /* A wired connection is always present */
+ steam->connected = true;
ret = steam_register(steam);
if (ret) {
hid_err(hdev,
diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c
index c3fc0ceb8096..f61f2123a755 100644
--- a/drivers/hid/hidraw.c
+++ b/drivers/hid/hidraw.c
@@ -252,10 +252,10 @@ static __poll_t hidraw_poll(struct file *file, poll_table *wait)
poll_wait(file, &list->hidraw->wait, wait);
if (list->head != list->tail)
- return EPOLLIN | EPOLLRDNORM | EPOLLOUT;
+ return EPOLLIN | EPOLLRDNORM;
if (!list->hidraw->exist)
return EPOLLERR | EPOLLHUP;
- return 0;
+ return EPOLLOUT | EPOLLWRNORM;
}
static int hidraw_open(struct inode *inode, struct file *file)
diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c
index a358e61fbc82..009000c5d55c 100644
--- a/drivers/hid/i2c-hid/i2c-hid-core.c
+++ b/drivers/hid/i2c-hid/i2c-hid-core.c
@@ -49,6 +49,8 @@
#define I2C_HID_QUIRK_NO_IRQ_AFTER_RESET BIT(1)
#define I2C_HID_QUIRK_BOGUS_IRQ BIT(4)
#define I2C_HID_QUIRK_RESET_ON_RESUME BIT(5)
+#define I2C_HID_QUIRK_BAD_INPUT_SIZE BIT(6)
+
/* flags */
#define I2C_HID_STARTED 0
@@ -175,6 +177,8 @@ static const struct i2c_hid_quirks {
I2C_HID_QUIRK_BOGUS_IRQ },
{ USB_VENDOR_ID_ALPS_JP, HID_ANY_ID,
I2C_HID_QUIRK_RESET_ON_RESUME },
+ { USB_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720,
+ I2C_HID_QUIRK_BAD_INPUT_SIZE },
{ 0, 0 }
};
@@ -496,9 +500,15 @@ static void i2c_hid_get_input(struct i2c_hid *ihid)
}
if ((ret_size > size) || (ret_size < 2)) {
- dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n",
- __func__, size, ret_size);
- return;
+ if (ihid->quirks & I2C_HID_QUIRK_BAD_INPUT_SIZE) {
+ ihid->inbuf[0] = size & 0xff;
+ ihid->inbuf[1] = size >> 8;
+ ret_size = size;
+ } else {
+ dev_err(&ihid->client->dev, "%s: incomplete report (%d/%d)\n",
+ __func__, size, ret_size);
+ return;
+ }
}
i2c_hid_dbg(ihid, "input: %*ph\n", ret_size, ihid->inbuf);
diff --git a/drivers/hid/intel-ish-hid/ipc/hw-ish.h b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
index 6c1e6110867f..1fb294ca463e 100644
--- a/drivers/hid/intel-ish-hid/ipc/hw-ish.h
+++ b/drivers/hid/intel-ish-hid/ipc/hw-ish.h
@@ -24,7 +24,9 @@
#define ICL_MOBILE_DEVICE_ID 0x34FC
#define SPT_H_DEVICE_ID 0xA135
#define CML_LP_DEVICE_ID 0x02FC
+#define CMP_H_DEVICE_ID 0x06FC
#define EHL_Ax_DEVICE_ID 0x4BB3
+#define TGL_LP_DEVICE_ID 0xA0FC
#define REVISION_ID_CHT_A0 0x6
#define REVISION_ID_CHT_Ax_SI 0x0
diff --git a/drivers/hid/intel-ish-hid/ipc/pci-ish.c b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
index 784dcc8c7022..f491d8b4e24c 100644
--- a/drivers/hid/intel-ish-hid/ipc/pci-ish.c
+++ b/drivers/hid/intel-ish-hid/ipc/pci-ish.c
@@ -34,7 +34,9 @@ static const struct pci_device_id ish_pci_tbl[] = {
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, ICL_MOBILE_DEVICE_ID)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, SPT_H_DEVICE_ID)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, CML_LP_DEVICE_ID)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, CMP_H_DEVICE_ID)},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, EHL_Ax_DEVICE_ID)},
+ {PCI_DEVICE(PCI_VENDOR_ID_INTEL, TGL_LP_DEVICE_ID)},
{0, }
};
MODULE_DEVICE_TABLE(pci, ish_pci_tbl);
diff --git a/drivers/hid/uhid.c b/drivers/hid/uhid.c
index fa0cc0899827..935c3d0a3b63 100644
--- a/drivers/hid/uhid.c
+++ b/drivers/hid/uhid.c
@@ -772,7 +772,7 @@ static __poll_t uhid_char_poll(struct file *file, poll_table *wait)
if (uhid->head != uhid->tail)
return EPOLLIN | EPOLLRDNORM;
- return 0;
+ return EPOLLOUT | EPOLLWRNORM;
}
static const struct file_operations uhid_fops = {
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index e421cdf2d1a4..a970b809d778 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -241,12 +241,51 @@ static int hiddev_release(struct inode * inode, struct file * file)
return 0;
}
+static int __hiddev_open(struct hiddev *hiddev, struct file *file)
+{
+ struct hiddev_list *list;
+ int error;
+
+ lockdep_assert_held(&hiddev->existancelock);
+
+ list = vzalloc(sizeof(*list));
+ if (!list)
+ return -ENOMEM;
+
+ mutex_init(&list->thread_lock);
+ list->hiddev = hiddev;
+
+ if (!hiddev->open++) {
+ error = hid_hw_power(hiddev->hid, PM_HINT_FULLON);
+ if (error < 0)
+ goto err_drop_count;
+
+ error = hid_hw_open(hiddev->hid);
+ if (error < 0)
+ goto err_normal_power;
+ }
+
+ spin_lock_irq(&hiddev->list_lock);
+ list_add_tail(&list->node, &hiddev->list);
+ spin_unlock_irq(&hiddev->list_lock);
+
+ file->private_data = list;
+
+ return 0;
+
+err_normal_power:
+ hid_hw_power(hiddev->hid, PM_HINT_NORMAL);
+err_drop_count:
+ hiddev->open--;
+ vfree(list);
+ return error;
+}
+
/*
* open file op
*/
static int hiddev_open(struct inode *inode, struct file *file)
{
- struct hiddev_list *list;
struct usb_interface *intf;
struct hid_device *hid;
struct hiddev *hiddev;
@@ -255,66 +294,14 @@ static int hiddev_open(struct inode *inode, struct file *file)
intf = usbhid_find_interface(iminor(inode));
if (!intf)
return -ENODEV;
+
hid = usb_get_intfdata(intf);
hiddev = hid->hiddev;
- if (!(list = vzalloc(sizeof(struct hiddev_list))))
- return -ENOMEM;
- mutex_init(&list->thread_lock);
- list->hiddev = hiddev;
- file->private_data = list;
-
- /*
- * no need for locking because the USB major number
- * is shared which usbcore guards against disconnect
- */
- if (list->hiddev->exist) {
- if (!list->hiddev->open++) {
- res = hid_hw_open(hiddev->hid);
- if (res < 0)
- goto bail;
- }
- } else {
- res = -ENODEV;
- goto bail;
- }
-
- spin_lock_irq(&list->hiddev->list_lock);
- list_add_tail(&list->node, &hiddev->list);
- spin_unlock_irq(&list->hiddev->list_lock);
-
mutex_lock(&hiddev->existancelock);
- /*
- * recheck exist with existance lock held to
- * avoid opening a disconnected device
- */
- if (!list->hiddev->exist) {
- res = -ENODEV;
- goto bail_unlock;
- }
- if (!list->hiddev->open++)
- if (list->hiddev->exist) {
- struct hid_device *hid = hiddev->hid;
- res = hid_hw_power(hid, PM_HINT_FULLON);
- if (res < 0)
- goto bail_unlock;
- res = hid_hw_open(hid);
- if (res < 0)
- goto bail_normal_power;
- }
- mutex_unlock(&hiddev->existancelock);
- return 0;
-bail_normal_power:
- hid_hw_power(hid, PM_HINT_NORMAL);
-bail_unlock:
+ res = hiddev->exist ? __hiddev_open(hiddev, file) : -ENODEV;
mutex_unlock(&hiddev->existancelock);
- spin_lock_irq(&list->hiddev->list_lock);
- list_del(&list->node);
- spin_unlock_irq(&list->hiddev->list_lock);
-bail:
- file->private_data = NULL;
- vfree(list);
return res;
}
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index ccb74529bc78..d99a9d407671 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -2096,14 +2096,16 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field
(hdev->product == 0x34d || hdev->product == 0x34e || /* MobileStudio Pro */
hdev->product == 0x357 || hdev->product == 0x358 || /* Intuos Pro 2 */
hdev->product == 0x392 || /* Intuos Pro 2 */
- hdev->product == 0x398 || hdev->product == 0x399)) { /* MobileStudio Pro */
+ hdev->product == 0x398 || hdev->product == 0x399 || /* MobileStudio Pro */
+ hdev->product == 0x3AA)) { /* MobileStudio Pro */
value = (field->logical_maximum - value);
if (hdev->product == 0x357 || hdev->product == 0x358 ||
hdev->product == 0x392)
value = wacom_offset_rotation(input, usage, value, 3, 16);
else if (hdev->product == 0x34d || hdev->product == 0x34e ||
- hdev->product == 0x398 || hdev->product == 0x399)
+ hdev->product == 0x398 || hdev->product == 0x399 ||
+ hdev->product == 0x3AA)
value = wacom_offset_rotation(input, usage, value, 1, 2);
}
else {
diff --git a/drivers/hwtracing/intel_th/core.c b/drivers/hwtracing/intel_th/core.c
index 0dfd97bbde9e..ca232ec565e8 100644
--- a/drivers/hwtracing/intel_th/core.c
+++ b/drivers/hwtracing/intel_th/core.c
@@ -834,9 +834,6 @@ static irqreturn_t intel_th_irq(int irq, void *data)
ret |= d->irq(th->thdev[i]);
}
- if (ret == IRQ_NONE)
- pr_warn_ratelimited("nobody cared for irq\n");
-
return ret;
}
@@ -887,6 +884,7 @@ intel_th_alloc(struct device *dev, struct intel_th_drvdata *drvdata,
if (th->irq == -1)
th->irq = devres[r].start;
+ th->num_irqs++;
break;
default:
dev_warn(dev, "Unknown resource type %lx\n",
@@ -940,6 +938,9 @@ void intel_th_free(struct intel_th *th)
th->num_thdevs = 0;
+ for (i = 0; i < th->num_irqs; i++)
+ devm_free_irq(th->dev, th->irq + i, th);
+
pm_runtime_get_sync(th->dev);
pm_runtime_forbid(th->dev);
diff --git a/drivers/hwtracing/intel_th/intel_th.h b/drivers/hwtracing/intel_th/intel_th.h
index 0df480072b6c..6f4f5486fe6d 100644
--- a/drivers/hwtracing/intel_th/intel_th.h
+++ b/drivers/hwtracing/intel_th/intel_th.h
@@ -261,6 +261,7 @@ enum th_mmio_idx {
* @num_thdevs: number of devices in the @thdev array
* @num_resources: number of resources in the @resource array
* @irq: irq number
+ * @num_irqs: number of IRQs is use
* @id: this Intel TH controller's device ID in the system
* @major: device node major for output devices
*/
@@ -277,6 +278,7 @@ struct intel_th {
unsigned int num_thdevs;
unsigned int num_resources;
int irq;
+ int num_irqs;
int id;
int major;
diff --git a/drivers/hwtracing/intel_th/msu.c b/drivers/hwtracing/intel_th/msu.c
index 6d240dfae9d9..8e48c7458aa3 100644
--- a/drivers/hwtracing/intel_th/msu.c
+++ b/drivers/hwtracing/intel_th/msu.c
@@ -1676,10 +1676,13 @@ static int intel_th_msc_init(struct msc *msc)
return 0;
}
-static void msc_win_switch(struct msc *msc)
+static int msc_win_switch(struct msc *msc)
{
struct msc_window *first;
+ if (list_empty(&msc->win_list))
+ return -EINVAL;
+
first = list_first_entry(&msc->win_list, struct msc_window, entry);
if (msc_is_last_win(msc->cur_win))
@@ -1691,6 +1694,8 @@ static void msc_win_switch(struct msc *msc)
msc->base_addr = msc_win_base_dma(msc->cur_win);
intel_th_trace_switch(msc->thdev);
+
+ return 0;
}
/**
@@ -2025,16 +2030,15 @@ win_switch_store(struct device *dev, struct device_attribute *attr,
if (val != 1)
return -EINVAL;
+ ret = -EINVAL;
mutex_lock(&msc->buf_mutex);
/*
* Window switch can only happen in the "multi" mode.
* If a external buffer is engaged, they have the full
* control over window switching.
*/
- if (msc->mode != MSC_MODE_MULTI || msc->mbuf)
- ret = -ENOTSUPP;
- else
- msc_win_switch(msc);
+ if (msc->mode == MSC_MODE_MULTI && !msc->mbuf)
+ ret = msc_win_switch(msc);
mutex_unlock(&msc->buf_mutex);
return ret ? ret : size;
diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
index ebf3e30e989a..e9d90b53bbc4 100644
--- a/drivers/hwtracing/intel_th/pci.c
+++ b/drivers/hwtracing/intel_th/pci.c
@@ -205,6 +205,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
.driver_data = (kernel_ulong_t)&intel_th_2x,
},
{
+ /* Comet Lake PCH-V */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa3a6),
+ .driver_data = (kernel_ulong_t)&intel_th_2x,
+ },
+ {
/* Ice Lake NNPI */
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x45c5),
.driver_data = (kernel_ulong_t)&intel_th_2x,
@@ -229,6 +234,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4da6),
.driver_data = (kernel_ulong_t)&intel_th_2x,
},
+ {
+ /* Elkhart Lake */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x4b26),
+ .driver_data = (kernel_ulong_t)&intel_th_2x,
+ },
{ 0 },
};
diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 9333c865d4a9..9f8dcd3f8385 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -896,29 +896,6 @@ struct i2c_client *i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address
}
EXPORT_SYMBOL_GPL(i2c_new_dummy_device);
-/**
- * i2c_new_dummy - return a new i2c device bound to a dummy driver
- * @adapter: the adapter managing the device
- * @address: seven bit address to be used
- * Context: can sleep
- *
- * This deprecated function has the same functionality as @i2c_new_dummy_device,
- * it just returns NULL instead of an ERR_PTR in case of an error for
- * compatibility with current I2C API. It will be removed once all users are
- * converted.
- *
- * This returns the new i2c client, which should be saved for later use with
- * i2c_unregister_device(); or NULL to indicate an error.
- */
-struct i2c_client *i2c_new_dummy(struct i2c_adapter *adapter, u16 address)
-{
- struct i2c_client *ret;
-
- ret = i2c_new_dummy_device(adapter, address);
- return IS_ERR(ret) ? NULL : ret;
-}
-EXPORT_SYMBOL_GPL(i2c_new_dummy);
-
struct i2c_dummy_devres {
struct i2c_client *client;
};
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 7b837641f166..7320275c7e56 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -992,6 +992,7 @@ static const struct iio_trigger_ops st_accel_trigger_ops = {
#define ST_ACCEL_TRIGGER_OPS NULL
#endif
+#ifdef CONFIG_ACPI
static const struct iio_mount_matrix *
get_mount_matrix(const struct iio_dev *indio_dev,
const struct iio_chan_spec *chan)
@@ -1012,7 +1013,6 @@ static const struct iio_chan_spec_ext_info mount_matrix_ext_info[] = {
static int apply_acpi_orientation(struct iio_dev *indio_dev,
struct iio_chan_spec *channels)
{
-#ifdef CONFIG_ACPI
struct st_sensor_data *adata = iio_priv(indio_dev);
struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
struct acpi_device *adev;
@@ -1140,10 +1140,14 @@ static int apply_acpi_orientation(struct iio_dev *indio_dev,
out:
kfree(buffer.pointer);
return ret;
+}
#else /* !CONFIG_ACPI */
+static int apply_acpi_orientation(struct iio_dev *indio_dev,
+ struct iio_chan_spec *channels)
+{
return 0;
-#endif
}
+#endif
/*
* st_accel_get_settings() - get sensor settings from device name
diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index edc6f1cc90b2..3f03abf100b5 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -39,6 +39,8 @@
#define AD7124_STATUS_POR_FLAG_MSK BIT(4)
/* AD7124_ADC_CONTROL */
+#define AD7124_ADC_CTRL_REF_EN_MSK BIT(8)
+#define AD7124_ADC_CTRL_REF_EN(x) FIELD_PREP(AD7124_ADC_CTRL_REF_EN_MSK, x)
#define AD7124_ADC_CTRL_PWR_MSK GENMASK(7, 6)
#define AD7124_ADC_CTRL_PWR(x) FIELD_PREP(AD7124_ADC_CTRL_PWR_MSK, x)
#define AD7124_ADC_CTRL_MODE_MSK GENMASK(5, 2)
@@ -424,7 +426,10 @@ static int ad7124_init_channel_vref(struct ad7124_state *st,
break;
case AD7124_INT_REF:
st->channel_config[channel_number].vref_mv = 2500;
- break;
+ st->adc_control &= ~AD7124_ADC_CTRL_REF_EN_MSK;
+ st->adc_control |= AD7124_ADC_CTRL_REF_EN(1);
+ return ad_sd_write_reg(&st->sd, AD7124_ADC_CONTROL,
+ 2, st->adc_control);
default:
dev_err(&st->sd.spi->dev, "Invalid reference %d\n", refsel);
return -EINVAL;
diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c
index f5ba94c03a8d..e4683a68522a 100644
--- a/drivers/iio/adc/ad7606.c
+++ b/drivers/iio/adc/ad7606.c
@@ -85,7 +85,7 @@ err_unlock:
static int ad7606_read_samples(struct ad7606_state *st)
{
- unsigned int num = st->chip_info->num_channels;
+ unsigned int num = st->chip_info->num_channels - 1;
u16 *data = st->data;
int ret;
diff --git a/drivers/iio/adc/ad7949.c b/drivers/iio/adc/ad7949.c
index 5c2b3446fa4a..2c6f60edb7ce 100644
--- a/drivers/iio/adc/ad7949.c
+++ b/drivers/iio/adc/ad7949.c
@@ -89,6 +89,7 @@ static int ad7949_spi_read_channel(struct ad7949_adc_chip *ad7949_adc, int *val,
unsigned int channel)
{
int ret;
+ int i;
int bits_per_word = ad7949_adc->resolution;
int mask = GENMASK(ad7949_adc->resolution, 0);
struct spi_message msg;
@@ -100,12 +101,23 @@ static int ad7949_spi_read_channel(struct ad7949_adc_chip *ad7949_adc, int *val,
},
};
- ret = ad7949_spi_write_cfg(ad7949_adc,
- channel << AD7949_OFFSET_CHANNEL_SEL,
- AD7949_MASK_CHANNEL_SEL);
- if (ret)
- return ret;
+ /*
+ * 1: write CFG for sample N and read old data (sample N-2)
+ * 2: if CFG was not changed since sample N-1 then we'll get good data
+ * at the next xfer, so we bail out now, otherwise we write something
+ * and we read garbage (sample N-1 configuration).
+ */
+ for (i = 0; i < 2; i++) {
+ ret = ad7949_spi_write_cfg(ad7949_adc,
+ channel << AD7949_OFFSET_CHANNEL_SEL,
+ AD7949_MASK_CHANNEL_SEL);
+ if (ret)
+ return ret;
+ if (channel == ad7949_adc->current_channel)
+ break;
+ }
+ /* 3: write something and read actual data */
ad7949_adc->buffer = 0;
spi_message_init_with_transfers(&msg, tx, 1);
ret = spi_sync(ad7949_adc->spi, &msg);
diff --git a/drivers/iio/adc/intel_mrfld_adc.c b/drivers/iio/adc/intel_mrfld_adc.c
index 67d096f8180d..c35a1beb817c 100644
--- a/drivers/iio/adc/intel_mrfld_adc.c
+++ b/drivers/iio/adc/intel_mrfld_adc.c
@@ -185,7 +185,7 @@ static int mrfld_adc_probe(struct platform_device *pdev)
int irq;
int ret;
- indio_dev = devm_iio_device_alloc(dev, sizeof(*indio_dev));
+ indio_dev = devm_iio_device_alloc(dev, sizeof(struct mrfld_adc));
if (!indio_dev)
return -ENOMEM;
diff --git a/drivers/iio/adc/max1027.c b/drivers/iio/adc/max1027.c
index e171db20c04a..02834ca3e1ce 100644
--- a/drivers/iio/adc/max1027.c
+++ b/drivers/iio/adc/max1027.c
@@ -478,7 +478,13 @@ static int max1027_probe(struct spi_device *spi)
st->trig->ops = &max1027_trigger_ops;
st->trig->dev.parent = &spi->dev;
iio_trigger_set_drvdata(st->trig, indio_dev);
- iio_trigger_register(st->trig);
+ ret = devm_iio_trigger_register(&indio_dev->dev,
+ st->trig);
+ if (ret < 0) {
+ dev_err(&indio_dev->dev,
+ "Failed to register iio trigger\n");
+ return ret;
+ }
ret = devm_request_threaded_irq(&spi->dev, spi->irq,
iio_trigger_generic_data_rdy_poll,
diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c
index da073d72f649..e480529b3f04 100644
--- a/drivers/iio/adc/max9611.c
+++ b/drivers/iio/adc/max9611.c
@@ -89,6 +89,12 @@
#define MAX9611_TEMP_SCALE_NUM 1000000
#define MAX9611_TEMP_SCALE_DIV 2083
+/*
+ * Conversion time is 2 ms (typically) at Ta=25 degreeC
+ * No maximum value is known, so play it safe.
+ */
+#define MAX9611_CONV_TIME_US_RANGE 3000, 3300
+
struct max9611_dev {
struct device *dev;
struct i2c_client *i2c_client;
@@ -236,11 +242,9 @@ static int max9611_read_single(struct max9611_dev *max9611,
return ret;
}
- /*
- * need a delay here to make register configuration
- * stabilize. 1 msec at least, from empirical testing.
- */
- usleep_range(1000, 2000);
+ /* need a delay here to make register configuration stabilize. */
+
+ usleep_range(MAX9611_CONV_TIME_US_RANGE);
ret = i2c_smbus_read_word_swapped(max9611->i2c_client, reg_addr);
if (ret < 0) {
@@ -507,7 +511,7 @@ static int max9611_init(struct max9611_dev *max9611)
MAX9611_REG_CTRL2, 0);
return ret;
}
- usleep_range(1000, 2000);
+ usleep_range(MAX9611_CONV_TIME_US_RANGE);
return 0;
}
diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
index 963ff043eecf..7ecd2ffa3132 100644
--- a/drivers/iio/humidity/hdc100x.c
+++ b/drivers/iio/humidity/hdc100x.c
@@ -229,7 +229,7 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev,
*val2 = 65536;
return IIO_VAL_FRACTIONAL;
} else {
- *val = 100;
+ *val = 100000;
*val2 = 65536;
return IIO_VAL_FRACTIONAL;
}
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
index 45e77b308238..0686e41bb8a1 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
@@ -117,6 +117,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_6050,
.config = &chip_config_6050,
.fifo_size = 1024,
+ .temp = {INV_MPU6050_TEMP_OFFSET, INV_MPU6050_TEMP_SCALE},
},
{
.whoami = INV_MPU6500_WHOAMI_VALUE,
@@ -124,6 +125,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_6500,
.config = &chip_config_6050,
.fifo_size = 512,
+ .temp = {INV_MPU6500_TEMP_OFFSET, INV_MPU6500_TEMP_SCALE},
},
{
.whoami = INV_MPU6515_WHOAMI_VALUE,
@@ -131,6 +133,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_6500,
.config = &chip_config_6050,
.fifo_size = 512,
+ .temp = {INV_MPU6500_TEMP_OFFSET, INV_MPU6500_TEMP_SCALE},
},
{
.whoami = INV_MPU6000_WHOAMI_VALUE,
@@ -138,6 +141,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_6050,
.config = &chip_config_6050,
.fifo_size = 1024,
+ .temp = {INV_MPU6050_TEMP_OFFSET, INV_MPU6050_TEMP_SCALE},
},
{
.whoami = INV_MPU9150_WHOAMI_VALUE,
@@ -145,6 +149,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_6050,
.config = &chip_config_6050,
.fifo_size = 1024,
+ .temp = {INV_MPU6050_TEMP_OFFSET, INV_MPU6050_TEMP_SCALE},
},
{
.whoami = INV_MPU9250_WHOAMI_VALUE,
@@ -152,6 +157,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_6500,
.config = &chip_config_6050,
.fifo_size = 512,
+ .temp = {INV_MPU6500_TEMP_OFFSET, INV_MPU6500_TEMP_SCALE},
},
{
.whoami = INV_MPU9255_WHOAMI_VALUE,
@@ -159,6 +165,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_6500,
.config = &chip_config_6050,
.fifo_size = 512,
+ .temp = {INV_MPU6500_TEMP_OFFSET, INV_MPU6500_TEMP_SCALE},
},
{
.whoami = INV_ICM20608_WHOAMI_VALUE,
@@ -166,6 +173,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_6500,
.config = &chip_config_6050,
.fifo_size = 512,
+ .temp = {INV_ICM20608_TEMP_OFFSET, INV_ICM20608_TEMP_SCALE},
},
{
.whoami = INV_ICM20602_WHOAMI_VALUE,
@@ -173,6 +181,7 @@ static const struct inv_mpu6050_hw hw_info[] = {
.reg = &reg_set_icm20602,
.config = &chip_config_6050,
.fifo_size = 1008,
+ .temp = {INV_ICM20608_TEMP_OFFSET, INV_ICM20608_TEMP_SCALE},
},
};
@@ -481,12 +490,8 @@ inv_mpu6050_read_raw(struct iio_dev *indio_dev,
return IIO_VAL_INT_PLUS_MICRO;
case IIO_TEMP:
- *val = 0;
- if (st->chip_type == INV_ICM20602)
- *val2 = INV_ICM20602_TEMP_SCALE;
- else
- *val2 = INV_MPU6050_TEMP_SCALE;
-
+ *val = st->hw->temp.scale / 1000000;
+ *val2 = st->hw->temp.scale % 1000000;
return IIO_VAL_INT_PLUS_MICRO;
case IIO_MAGN:
return inv_mpu_magn_get_scale(st, chan, val, val2);
@@ -496,11 +501,7 @@ inv_mpu6050_read_raw(struct iio_dev *indio_dev,
case IIO_CHAN_INFO_OFFSET:
switch (chan->type) {
case IIO_TEMP:
- if (st->chip_type == INV_ICM20602)
- *val = INV_ICM20602_TEMP_OFFSET;
- else
- *val = INV_MPU6050_TEMP_OFFSET;
-
+ *val = st->hw->temp.offset;
return IIO_VAL_INT;
default:
return -EINVAL;
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
index f1fb7b6bdab1..b096e010d4ee 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_iio.h
@@ -107,6 +107,7 @@ struct inv_mpu6050_chip_config {
* @reg: register map of the chip.
* @config: configuration of the chip.
* @fifo_size: size of the FIFO in bytes.
+ * @temp: offset and scale to apply to raw temperature.
*/
struct inv_mpu6050_hw {
u8 whoami;
@@ -114,6 +115,10 @@ struct inv_mpu6050_hw {
const struct inv_mpu6050_reg_map *reg;
const struct inv_mpu6050_chip_config *config;
size_t fifo_size;
+ struct {
+ int offset;
+ int scale;
+ } temp;
};
/*
@@ -279,16 +284,19 @@ struct inv_mpu6050_state {
#define INV_MPU6050_REG_UP_TIME_MIN 5000
#define INV_MPU6050_REG_UP_TIME_MAX 10000
-#define INV_MPU6050_TEMP_OFFSET 12421
-#define INV_MPU6050_TEMP_SCALE 2941
+#define INV_MPU6050_TEMP_OFFSET 12420
+#define INV_MPU6050_TEMP_SCALE 2941176
#define INV_MPU6050_MAX_GYRO_FS_PARAM 3
#define INV_MPU6050_MAX_ACCL_FS_PARAM 3
#define INV_MPU6050_THREE_AXIS 3
#define INV_MPU6050_GYRO_CONFIG_FSR_SHIFT 3
#define INV_MPU6050_ACCL_CONFIG_FSR_SHIFT 3
-#define INV_ICM20602_TEMP_OFFSET 8170
-#define INV_ICM20602_TEMP_SCALE 3060
+#define INV_MPU6500_TEMP_OFFSET 7011
+#define INV_MPU6500_TEMP_SCALE 2995178
+
+#define INV_ICM20608_TEMP_OFFSET 8170
+#define INV_ICM20608_TEMP_SCALE 3059976
/* 6 + 6 + 7 (for MPU9x50) = 19 round up to 24 and plus 8 */
#define INV_MPU6050_OUTPUT_DATA_SIZE 32
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index c605b153be41..dc55d7dff3eb 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -320,7 +320,6 @@ enum st_lsm6dsx_fifo_mode {
* @odr: Output data rate of the sensor [Hz].
* @watermark: Sensor watermark level.
* @sip: Number of samples in a given pattern.
- * @decimator: FIFO decimation factor.
* @ts_ref: Sensor timestamp reference for hw one.
* @ext_info: Sensor settings if it is connected to i2c controller
*/
@@ -334,7 +333,6 @@ struct st_lsm6dsx_sensor {
u16 watermark;
u8 sip;
- u8 decimator;
s64 ts_ref;
struct {
@@ -351,9 +349,9 @@ struct st_lsm6dsx_sensor {
* @fifo_lock: Mutex to prevent concurrent access to the hw FIFO.
* @conf_lock: Mutex to prevent concurrent FIFO configuration update.
* @page_lock: Mutex to prevent concurrent memory page configuration.
- * @fifo_mode: FIFO operating mode supported by the device.
* @suspend_mask: Suspended sensor bitmask.
* @enable_mask: Enabled sensor bitmask.
+ * @fifo_mask: Enabled hw FIFO bitmask.
* @ts_gain: Hw timestamp rate after internal calibration.
* @ts_sip: Total number of timestamp samples in a given pattern.
* @sip: Total number of samples (acc/gyro/ts) in a given pattern.
@@ -373,9 +371,9 @@ struct st_lsm6dsx_hw {
struct mutex conf_lock;
struct mutex page_lock;
- enum st_lsm6dsx_fifo_mode fifo_mode;
u8 suspend_mask;
u8 enable_mask;
+ u8 fifo_mask;
s64 ts_gain;
u8 ts_sip;
u8 sip;
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
index d416990ae309..cb536b81a1c2 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
@@ -78,14 +78,20 @@ struct st_lsm6dsx_decimator_entry st_lsm6dsx_decimator_table[] = {
{ 32, 0x7 },
};
-static int st_lsm6dsx_get_decimator_val(u8 val)
+static int
+st_lsm6dsx_get_decimator_val(struct st_lsm6dsx_sensor *sensor, u32 max_odr)
{
const int max_size = ARRAY_SIZE(st_lsm6dsx_decimator_table);
+ u32 decimator = max_odr / sensor->odr;
int i;
- for (i = 0; i < max_size; i++)
- if (st_lsm6dsx_decimator_table[i].decimator == val)
+ if (decimator > 1)
+ decimator = round_down(decimator, 2);
+
+ for (i = 0; i < max_size; i++) {
+ if (st_lsm6dsx_decimator_table[i].decimator == decimator)
break;
+ }
return i == max_size ? 0 : st_lsm6dsx_decimator_table[i].val;
}
@@ -111,6 +117,13 @@ static void st_lsm6dsx_get_max_min_odr(struct st_lsm6dsx_hw *hw,
}
}
+static u8 st_lsm6dsx_get_sip(struct st_lsm6dsx_sensor *sensor, u32 min_odr)
+{
+ u8 sip = sensor->odr / min_odr;
+
+ return sip > 1 ? round_down(sip, 2) : sip;
+}
+
static int st_lsm6dsx_update_decimators(struct st_lsm6dsx_hw *hw)
{
const struct st_lsm6dsx_reg *ts_dec_reg;
@@ -131,12 +144,10 @@ static int st_lsm6dsx_update_decimators(struct st_lsm6dsx_hw *hw)
sensor = iio_priv(hw->iio_devs[i]);
/* update fifo decimators and sample in pattern */
if (hw->enable_mask & BIT(sensor->id)) {
- sensor->sip = sensor->odr / min_odr;
- sensor->decimator = max_odr / sensor->odr;
- data = st_lsm6dsx_get_decimator_val(sensor->decimator);
+ sensor->sip = st_lsm6dsx_get_sip(sensor, min_odr);
+ data = st_lsm6dsx_get_decimator_val(sensor, max_odr);
} else {
sensor->sip = 0;
- sensor->decimator = 0;
data = 0;
}
ts_sip = max_t(u16, ts_sip, sensor->sip);
@@ -176,17 +187,10 @@ int st_lsm6dsx_set_fifo_mode(struct st_lsm6dsx_hw *hw,
enum st_lsm6dsx_fifo_mode fifo_mode)
{
unsigned int data;
- int err;
data = FIELD_PREP(ST_LSM6DSX_FIFO_MODE_MASK, fifo_mode);
- err = st_lsm6dsx_update_bits_locked(hw, ST_LSM6DSX_REG_FIFO_MODE_ADDR,
- ST_LSM6DSX_FIFO_MODE_MASK, data);
- if (err < 0)
- return err;
-
- hw->fifo_mode = fifo_mode;
-
- return 0;
+ return st_lsm6dsx_update_bits_locked(hw, ST_LSM6DSX_REG_FIFO_MODE_ADDR,
+ ST_LSM6DSX_FIFO_MODE_MASK, data);
}
static int st_lsm6dsx_set_fifo_odr(struct st_lsm6dsx_sensor *sensor,
@@ -608,11 +612,17 @@ int st_lsm6dsx_flush_fifo(struct st_lsm6dsx_hw *hw)
int st_lsm6dsx_update_fifo(struct st_lsm6dsx_sensor *sensor, bool enable)
{
struct st_lsm6dsx_hw *hw = sensor->hw;
+ u8 fifo_mask;
int err;
mutex_lock(&hw->conf_lock);
- if (hw->fifo_mode != ST_LSM6DSX_FIFO_BYPASS) {
+ if (enable)
+ fifo_mask = hw->fifo_mask | BIT(sensor->id);
+ else
+ fifo_mask = hw->fifo_mask & ~BIT(sensor->id);
+
+ if (hw->fifo_mask) {
err = st_lsm6dsx_flush_fifo(hw);
if (err < 0)
goto out;
@@ -642,15 +652,19 @@ int st_lsm6dsx_update_fifo(struct st_lsm6dsx_sensor *sensor, bool enable)
if (err < 0)
goto out;
- if (hw->enable_mask) {
+ if (fifo_mask) {
/* reset hw ts counter */
err = st_lsm6dsx_reset_hw_ts(hw);
if (err < 0)
goto out;
err = st_lsm6dsx_set_fifo_mode(hw, ST_LSM6DSX_FIFO_CONT);
+ if (err < 0)
+ goto out;
}
+ hw->fifo_mask = fifo_mask;
+
out:
mutex_unlock(&hw->conf_lock);
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index 11b2c7bc8041..a7d40c02ce6b 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -1447,8 +1447,9 @@ st_lsm6dsx_set_odr(struct st_lsm6dsx_sensor *sensor, u32 req_odr)
return st_lsm6dsx_update_bits_locked(hw, reg->addr, reg->mask, data);
}
-int st_lsm6dsx_sensor_set_enable(struct st_lsm6dsx_sensor *sensor,
- bool enable)
+static int
+__st_lsm6dsx_sensor_set_enable(struct st_lsm6dsx_sensor *sensor,
+ bool enable)
{
struct st_lsm6dsx_hw *hw = sensor->hw;
u32 odr = enable ? sensor->odr : 0;
@@ -1466,6 +1467,26 @@ int st_lsm6dsx_sensor_set_enable(struct st_lsm6dsx_sensor *sensor,
return 0;
}
+static int
+st_lsm6dsx_check_events(struct st_lsm6dsx_sensor *sensor, bool enable)
+{
+ struct st_lsm6dsx_hw *hw = sensor->hw;
+
+ if (sensor->id == ST_LSM6DSX_ID_GYRO || enable)
+ return 0;
+
+ return hw->enable_event;
+}
+
+int st_lsm6dsx_sensor_set_enable(struct st_lsm6dsx_sensor *sensor,
+ bool enable)
+{
+ if (st_lsm6dsx_check_events(sensor, enable))
+ return 0;
+
+ return __st_lsm6dsx_sensor_set_enable(sensor, enable);
+}
+
static int st_lsm6dsx_read_oneshot(struct st_lsm6dsx_sensor *sensor,
u8 addr, int *val)
{
@@ -1661,7 +1682,7 @@ st_lsm6dsx_write_event_config(struct iio_dev *iio_dev,
struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev);
struct st_lsm6dsx_hw *hw = sensor->hw;
u8 enable_event;
- int err = 0;
+ int err;
if (type != IIO_EV_TYPE_THRESH)
return -EINVAL;
@@ -1689,7 +1710,8 @@ st_lsm6dsx_write_event_config(struct iio_dev *iio_dev,
return err;
mutex_lock(&hw->conf_lock);
- err = st_lsm6dsx_sensor_set_enable(sensor, state);
+ if (enable_event || !(hw->fifo_mask & BIT(sensor->id)))
+ err = __st_lsm6dsx_sensor_set_enable(sensor, state);
mutex_unlock(&hw->conf_lock);
if (err < 0)
return err;
@@ -2300,7 +2322,7 @@ static int __maybe_unused st_lsm6dsx_suspend(struct device *dev)
hw->suspend_mask |= BIT(sensor->id);
}
- if (hw->fifo_mode != ST_LSM6DSX_FIFO_BYPASS)
+ if (hw->fifo_mask)
err = st_lsm6dsx_flush_fifo(hw);
return err;
@@ -2336,7 +2358,7 @@ static int __maybe_unused st_lsm6dsx_resume(struct device *dev)
hw->suspend_mask &= ~BIT(sensor->id);
}
- if (hw->enable_mask)
+ if (hw->fifo_mask)
err = st_lsm6dsx_set_fifo_mode(hw, ST_LSM6DSX_FIFO_CONT);
return err;
diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c
index ddf47023364b..d39c0d6b77f1 100644
--- a/drivers/iio/temperature/ltc2983.c
+++ b/drivers/iio/temperature/ltc2983.c
@@ -444,8 +444,10 @@ static struct ltc2983_custom_sensor *__ltc2983_custom_sensor_new(
else
temp = __convert_to_raw(temp, resolution);
} else {
- of_property_read_u32_index(np, propname, index,
- (u32 *)&temp);
+ u32 t32;
+
+ of_property_read_u32_index(np, propname, index, &t32);
+ temp = t32;
}
for (j = 0; j < n_size; j++)
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 25f2b70fd8ef..43a6f07e0afe 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4763,6 +4763,7 @@ err_ib:
err:
unregister_netdevice_notifier(&cma_nb);
ib_sa_unregister_client(&sa_client);
+ unregister_pernet_subsys(&cma_pernet_operations);
err_wq:
destroy_workqueue(cma_wq);
return ret;
diff --git a/drivers/infiniband/core/counters.c b/drivers/infiniband/core/counters.c
index 8434ec082c3a..2257d7f7810f 100644
--- a/drivers/infiniband/core/counters.c
+++ b/drivers/infiniband/core/counters.c
@@ -286,6 +286,9 @@ int rdma_counter_bind_qp_auto(struct ib_qp *qp, u8 port)
struct rdma_counter *counter;
int ret;
+ if (!qp->res.valid)
+ return 0;
+
if (!rdma_is_port_valid(dev, port))
return -EINVAL;
diff --git a/drivers/infiniband/core/ib_core_uverbs.c b/drivers/infiniband/core/ib_core_uverbs.c
index f509c478b469..b7cb59844ece 100644
--- a/drivers/infiniband/core/ib_core_uverbs.c
+++ b/drivers/infiniband/core/ib_core_uverbs.c
@@ -238,28 +238,32 @@ void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry)
EXPORT_SYMBOL(rdma_user_mmap_entry_remove);
/**
- * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa
+ * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa
+ * in a given range.
*
* @ucontext: associated user context.
* @entry: the entry to insert into the mmap_xa
* @length: length of the address that will be mmapped
+ * @min_pgoff: minimum pgoff to be returned
+ * @max_pgoff: maximum pgoff to be returned
*
* This function should be called by drivers that use the rdma_user_mmap
* interface for implementing their mmap syscall A database of mmap offsets is
* handled in the core and helper functions are provided to insert entries
* into the database and extract entries when the user calls mmap with the
- * given offset. The function allocates a unique page offset that should be
- * provided to user, the user will use the offset to retrieve information such
- * as address to be mapped and how.
+ * given offset. The function allocates a unique page offset in a given range
+ * that should be provided to user, the user will use the offset to retrieve
+ * information such as address to be mapped and how.
*
* Return: 0 on success and -ENOMEM on failure
*/
-int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
- struct rdma_user_mmap_entry *entry,
- size_t length)
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff)
{
struct ib_uverbs_file *ufile = ucontext->ufile;
- XA_STATE(xas, &ucontext->mmap_xa, 0);
+ XA_STATE(xas, &ucontext->mmap_xa, min_pgoff);
u32 xa_first, xa_last, npages;
int err;
u32 i;
@@ -285,7 +289,7 @@ int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
entry->npages = npages;
while (true) {
/* First find an empty index */
- xas_find_marked(&xas, U32_MAX, XA_FREE_MARK);
+ xas_find_marked(&xas, max_pgoff, XA_FREE_MARK);
if (xas.xa_node == XAS_RESTART)
goto err_unlock;
@@ -332,4 +336,30 @@ err_unlock:
mutex_unlock(&ufile->umap_lock);
return -ENOMEM;
}
+EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range);
+
+/**
+ * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa.
+ *
+ * @ucontext: associated user context.
+ * @entry: the entry to insert into the mmap_xa
+ * @length: length of the address that will be mmapped
+ *
+ * This function should be called by drivers that use the rdma_user_mmap
+ * interface for handling user mmapped addresses. The database is handled in
+ * the core and helper functions are provided to insert entries into the
+ * database and extract entries when the user calls mmap with the given offset.
+ * The function allocates a unique page offset that should be provided to user,
+ * the user will use the offset to retrieve information such as address to
+ * be mapped and how.
+ *
+ * Return: 0 on success and -ENOMEM on failure
+ */
+int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length)
+{
+ return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0,
+ U32_MAX);
+}
EXPORT_SYMBOL(rdma_user_mmap_entry_insert);
diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c
index c9d294caa27a..50c22575aed6 100644
--- a/drivers/infiniband/hw/efa/efa_verbs.c
+++ b/drivers/infiniband/hw/efa/efa_verbs.c
@@ -145,7 +145,7 @@ static inline bool is_rdma_read_cap(struct efa_dev *dev)
}
#define field_avail(x, fld, sz) (offsetof(typeof(x), fld) + \
- FIELD_SIZEOF(typeof(x), fld) <= (sz))
+ sizeof_field(typeof(x), fld) <= (sz))
#define is_reserved_cleared(reserved) \
!memchr_inv(reserved, 0, sizeof(reserved))
diff --git a/drivers/infiniband/hw/hfi1/sdma.c b/drivers/infiniband/hw/hfi1/sdma.c
index 5774dfc22e18..a51525647ac8 100644
--- a/drivers/infiniband/hw/hfi1/sdma.c
+++ b/drivers/infiniband/hw/hfi1/sdma.c
@@ -848,7 +848,7 @@ static const struct rhashtable_params sdma_rht_params = {
.nelem_hint = NR_CPUS_HINT,
.head_offset = offsetof(struct sdma_rht_node, node),
.key_offset = offsetof(struct sdma_rht_node, cpu_id),
- .key_len = FIELD_SIZEOF(struct sdma_rht_node, cpu_id),
+ .key_len = sizeof_field(struct sdma_rht_node, cpu_id),
.max_size = NR_CPUS,
.min_size = 8,
.automatic_shrinking = true,
diff --git a/drivers/infiniband/hw/hfi1/verbs.h b/drivers/infiniband/hw/hfi1/verbs.h
index b0e9bf7cd150..d36e3e14896d 100644
--- a/drivers/infiniband/hw/hfi1/verbs.h
+++ b/drivers/infiniband/hw/hfi1/verbs.h
@@ -107,9 +107,9 @@ enum {
HFI1_HAS_GRH = (1 << 0),
};
-#define LRH_16B_BYTES (FIELD_SIZEOF(struct hfi1_16b_header, lrh))
+#define LRH_16B_BYTES (sizeof_field(struct hfi1_16b_header, lrh))
#define LRH_16B_DWORDS (LRH_16B_BYTES / sizeof(u32))
-#define LRH_9B_BYTES (FIELD_SIZEOF(struct ib_header, lrh))
+#define LRH_9B_BYTES (sizeof_field(struct ib_header, lrh))
#define LRH_9B_DWORDS (LRH_9B_BYTES / sizeof(u32))
/* 24Bits for qpn, upper 8Bits reserved */
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0b5dc1d5928f..34055cbab38c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -3018,16 +3018,17 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
ibdev->ib_active = false;
flush_workqueue(wq);
- mlx4_ib_close_sriov(ibdev);
- mlx4_ib_mad_cleanup(ibdev);
- ib_unregister_device(&ibdev->ib_dev);
- mlx4_ib_diag_cleanup(ibdev);
if (ibdev->iboe.nb.notifier_call) {
if (unregister_netdevice_notifier(&ibdev->iboe.nb))
pr_warn("failure unregistering notifier\n");
ibdev->iboe.nb.notifier_call = NULL;
}
+ mlx4_ib_close_sriov(ibdev);
+ mlx4_ib_mad_cleanup(ibdev);
+ ib_unregister_device(&ibdev->ib_dev);
+ mlx4_ib_diag_cleanup(ibdev);
+
mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
ibdev->steer_qpn_count);
kfree(ibdev->ib_uc_qpns_bitmap);
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 4937947400cd..4c26492ab8a3 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -157,7 +157,7 @@ int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
return -ENOMEM;
}
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
{
struct mlx5_core_dev *dev = dm->dev;
u64 hw_start_addr = MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr);
@@ -175,15 +175,13 @@ int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length)
MLX5_SET(dealloc_memic_in, in, memic_size, length);
err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+ if (err)
+ return;
- if (!err) {
- spin_lock(&dm->lock);
- bitmap_clear(dm->memic_alloc_pages,
- start_page_idx, num_pages);
- spin_unlock(&dm->lock);
- }
-
- return err;
+ spin_lock(&dm->lock);
+ bitmap_clear(dm->memic_alloc_pages,
+ start_page_idx, num_pages);
+ spin_unlock(&dm->lock);
}
int mlx5_cmd_query_ext_ppcnt_counters(struct mlx5_core_dev *dev, void *out)
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 169cab4915e3..945ebce73613 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -46,7 +46,7 @@ int mlx5_cmd_modify_cong_params(struct mlx5_core_dev *mdev,
void *in, int in_size);
int mlx5_cmd_alloc_memic(struct mlx5_dm *dm, phys_addr_t *addr,
u64 length, u32 alignment);
-int mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
+void mlx5_cmd_dealloc_memic(struct mlx5_dm *dm, phys_addr_t addr, u64 length);
void mlx5_cmd_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn, u16 uid);
void mlx5_cmd_destroy_tir(struct mlx5_core_dev *dev, u32 tirn, u16 uid);
void mlx5_cmd_destroy_tis(struct mlx5_core_dev *dev, u32 tisn, u16 uid);
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 51100350b688..997cbfe4b90c 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2074,6 +2074,24 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
virt_to_page(dev->mdev->clock_info));
}
+static void mlx5_ib_mmap_free(struct rdma_user_mmap_entry *entry)
+{
+ struct mlx5_user_mmap_entry *mentry = to_mmmap(entry);
+ struct mlx5_ib_dev *dev = to_mdev(entry->ucontext->device);
+ struct mlx5_ib_dm *mdm;
+
+ switch (mentry->mmap_flag) {
+ case MLX5_IB_MMAP_TYPE_MEMIC:
+ mdm = container_of(mentry, struct mlx5_ib_dm, mentry);
+ mlx5_cmd_dealloc_memic(&dev->dm, mdm->dev_addr,
+ mdm->size);
+ kfree(mdm);
+ break;
+ default:
+ WARN_ON(true);
+ }
+}
+
static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
struct vm_area_struct *vma,
struct mlx5_ib_ucontext *context)
@@ -2186,26 +2204,55 @@ free_bfreg:
return err;
}
-static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+static int add_dm_mmap_entry(struct ib_ucontext *context,
+ struct mlx5_ib_dm *mdm,
+ u64 address)
+{
+ mdm->mentry.mmap_flag = MLX5_IB_MMAP_TYPE_MEMIC;
+ mdm->mentry.address = address;
+ return rdma_user_mmap_entry_insert_range(
+ context, &mdm->mentry.rdma_entry,
+ mdm->size,
+ MLX5_IB_MMAP_DEVICE_MEM << 16,
+ (MLX5_IB_MMAP_DEVICE_MEM << 16) + (1UL << 16) - 1);
+}
+
+static unsigned long mlx5_vma_to_pgoff(struct vm_area_struct *vma)
+{
+ unsigned long idx;
+ u8 command;
+
+ command = get_command(vma->vm_pgoff);
+ idx = get_extended_index(vma->vm_pgoff);
+
+ return (command << 16 | idx);
+}
+
+static int mlx5_ib_mmap_offset(struct mlx5_ib_dev *dev,
+ struct vm_area_struct *vma,
+ struct ib_ucontext *ucontext)
{
- struct mlx5_ib_ucontext *mctx = to_mucontext(context);
- struct mlx5_ib_dev *dev = to_mdev(context->device);
- u16 page_idx = get_extended_index(vma->vm_pgoff);
- size_t map_size = vma->vm_end - vma->vm_start;
- u32 npages = map_size >> PAGE_SHIFT;
+ struct mlx5_user_mmap_entry *mentry;
+ struct rdma_user_mmap_entry *entry;
+ unsigned long pgoff;
+ pgprot_t prot;
phys_addr_t pfn;
+ int ret;
- if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
- page_idx + npages)
+ pgoff = mlx5_vma_to_pgoff(vma);
+ entry = rdma_user_mmap_entry_get_pgoff(ucontext, pgoff);
+ if (!entry)
return -EINVAL;
- pfn = ((dev->mdev->bar_addr +
- MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
- PAGE_SHIFT) +
- page_idx;
- return rdma_user_mmap_io(context, vma, pfn, map_size,
- pgprot_writecombine(vma->vm_page_prot),
- NULL);
+ mentry = to_mmmap(entry);
+ pfn = (mentry->address >> PAGE_SHIFT);
+ prot = pgprot_writecombine(vma->vm_page_prot);
+ ret = rdma_user_mmap_io(ucontext, vma, pfn,
+ entry->npages * PAGE_SIZE,
+ prot,
+ entry);
+ rdma_user_mmap_entry_put(&mentry->rdma_entry);
+ return ret;
}
static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
@@ -2248,11 +2295,8 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
case MLX5_IB_MMAP_CLOCK_INFO:
return mlx5_ib_mmap_clock_info_page(dev, vma, context);
- case MLX5_IB_MMAP_DEVICE_MEM:
- return dm_mmap(ibcontext, vma);
-
default:
- return -EINVAL;
+ return mlx5_ib_mmap_offset(dev, vma, ibcontext);
}
return 0;
@@ -2288,8 +2332,9 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
{
struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
u64 start_offset;
- u32 page_idx;
+ u16 page_idx;
int err;
+ u64 address;
dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
@@ -2298,28 +2343,30 @@ static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
if (err)
return err;
- page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >>
- PAGE_SHIFT;
+ address = dm->dev_addr & PAGE_MASK;
+ err = add_dm_mmap_entry(ctx, dm, address);
+ if (err)
+ goto err_dealloc;
+ page_idx = dm->mentry.rdma_entry.start_pgoff & 0xFFFF;
err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
- &page_idx, sizeof(page_idx));
+ &page_idx,
+ sizeof(page_idx));
if (err)
- goto err_dealloc;
+ goto err_copy;
start_offset = dm->dev_addr & ~PAGE_MASK;
err = uverbs_copy_to(attrs,
MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
&start_offset, sizeof(start_offset));
if (err)
- goto err_dealloc;
-
- bitmap_set(to_mucontext(ctx)->dm_pages, page_idx,
- DIV_ROUND_UP(dm->size, PAGE_SIZE));
+ goto err_copy;
return 0;
+err_copy:
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
err_dealloc:
mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
@@ -2423,23 +2470,13 @@ int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
&attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
- struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
struct mlx5_ib_dm *dm = to_mdm(ibdm);
- u32 page_idx;
int ret;
switch (dm->type) {
case MLX5_IB_UAPI_DM_TYPE_MEMIC:
- ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
- if (ret)
- return ret;
-
- page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) -
- MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >>
- PAGE_SHIFT;
- bitmap_clear(ctx->dm_pages, page_idx,
- DIV_ROUND_UP(dm->size, PAGE_SIZE));
- break;
+ rdma_user_mmap_entry_remove(&dm->mentry.rdma_entry);
+ return 0;
case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
dm->size, ctx->devx_uid, dm->dev_addr,
@@ -3544,10 +3581,6 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
INIT_LIST_HEAD(&handler->list);
- if (dst) {
- memcpy(&dest_arr[0], dst, sizeof(*dst));
- dest_num++;
- }
for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
err = parse_flow_attr(dev->mdev, spec,
@@ -3560,6 +3593,11 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
ib_flow += ((union ib_flow_spec *)ib_flow)->size;
}
+ if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) {
+ memcpy(&dest_arr[0], dst, sizeof(*dst));
+ dest_num++;
+ }
+
if (!flow_is_multicast_only(flow_attr))
set_underlay_qp(dev, spec, underlay_qpn);
@@ -3600,10 +3638,8 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
}
if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) {
- if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) {
+ if (!dest_num)
rule_dst = NULL;
- dest_num = 0;
- }
} else {
if (is_egress)
flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
@@ -6236,6 +6272,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.map_mr_sg = mlx5_ib_map_mr_sg,
.map_mr_sg_pi = mlx5_ib_map_mr_sg_pi,
.mmap = mlx5_ib_mmap,
+ .mmap_free = mlx5_ib_mmap_free,
.modify_cq = mlx5_ib_modify_cq,
.modify_device = mlx5_ib_modify_device,
.modify_port = mlx5_ib_modify_port,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 5986953ec2fa..b06f32ff5748 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -118,6 +118,10 @@ enum {
MLX5_MEMIC_BASE_SIZE = 1 << MLX5_MEMIC_BASE_ALIGN,
};
+enum mlx5_ib_mmap_type {
+ MLX5_IB_MMAP_TYPE_MEMIC = 1,
+};
+
#define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) \
(MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity))
#define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
@@ -135,7 +139,6 @@ struct mlx5_ib_ucontext {
u32 tdn;
u64 lib_caps;
- DECLARE_BITMAP(dm_pages, MLX5_MAX_MEMIC_PAGES);
u16 devx_uid;
/* For RoCE LAG TX affinity */
atomic_t tx_port_affinity;
@@ -556,6 +559,12 @@ enum mlx5_ib_mtt_access_flags {
MLX5_IB_MTT_WRITE = (1 << 1),
};
+struct mlx5_user_mmap_entry {
+ struct rdma_user_mmap_entry rdma_entry;
+ u8 mmap_flag;
+ u64 address;
+};
+
struct mlx5_ib_dm {
struct ib_dm ibdm;
phys_addr_t dev_addr;
@@ -567,6 +576,7 @@ struct mlx5_ib_dm {
} icm_dm;
/* other dm types specific params should be added here */
};
+ struct mlx5_user_mmap_entry mentry;
};
#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
@@ -1101,6 +1111,13 @@ to_mflow_act(struct ib_flow_action *ibact)
return container_of(ibact, struct mlx5_ib_flow_action, ib_action);
}
+static inline struct mlx5_user_mmap_entry *
+to_mmmap(struct rdma_user_mmap_entry *rdma_entry)
+{
+ return container_of(rdma_entry,
+ struct mlx5_user_mmap_entry, rdma_entry);
+}
+
int mlx5_ib_db_map_user(struct mlx5_ib_ucontext *context,
struct ib_udata *udata, unsigned long virt,
struct mlx5_db *db);
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index f9a492ed900b..831ad578a7b2 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -389,7 +389,7 @@ void rxe_rcv(struct sk_buff *skb)
calc_icrc = rxe_icrc_hdr(pkt, skb);
calc_icrc = rxe_crc32(rxe, calc_icrc, (u8 *)payload_addr(pkt),
- payload_size(pkt));
+ payload_size(pkt) + bth_pad(pkt));
calc_icrc = (__force u32)cpu_to_be32(~calc_icrc);
if (unlikely(calc_icrc != pack_icrc)) {
if (skb->protocol == htons(ETH_P_IPV6))
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index c5d9b558fa90..e5031172c019 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -500,6 +500,12 @@ static int fill_packet(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
if (err)
return err;
}
+ if (bth_pad(pkt)) {
+ u8 *pad = payload_addr(pkt) + paylen;
+
+ memset(pad, 0, bth_pad(pkt));
+ crc = rxe_crc32(rxe, crc, pad, bth_pad(pkt));
+ }
}
p = payload_addr(pkt) + paylen + bth_pad(pkt);
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 1cbfbd98eb22..c4a8195bf670 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -732,6 +732,13 @@ static enum resp_states read_reply(struct rxe_qp *qp,
if (err)
pr_err("Failed copying memory\n");
+ if (bth_pad(&ack_pkt)) {
+ struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
+ u8 *pad = payload_addr(&ack_pkt) + payload;
+
+ memset(pad, 0, bth_pad(&ack_pkt));
+ icrc = rxe_crc32(rxe, icrc, pad, bth_pad(&ack_pkt));
+ }
p = payload_addr(&ack_pkt) + payload + bth_pad(&ack_pkt);
*p = ~icrc;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index e5f438ab716c..4a0d3a9e72e1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1182,7 +1182,7 @@ unref:
return NETDEV_TX_OK;
}
-static void ipoib_timeout(struct net_device *dev)
+static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ipoib_dev_priv *priv = ipoib_priv(dev);
diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
index 62390e9e0023..8ad7da989a0e 100644
--- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
+++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c
@@ -63,7 +63,7 @@ struct vnic_stats {
};
};
-#define VNIC_STAT(m) { FIELD_SIZEOF(struct opa_vnic_stats, m), \
+#define VNIC_STAT(m) { sizeof_field(struct opa_vnic_stats, m), \
offsetof(struct opa_vnic_stats, m) }
static struct vnic_stats vnic_gstrings_stats[] = {
diff --git a/drivers/interconnect/qcom/Kconfig b/drivers/interconnect/qcom/Kconfig
index c49afbea3458..2f9304d1db49 100644
--- a/drivers/interconnect/qcom/Kconfig
+++ b/drivers/interconnect/qcom/Kconfig
@@ -6,13 +6,13 @@ config INTERCONNECT_QCOM
Support for Qualcomm's Network-on-Chip interconnect hardware.
config INTERCONNECT_QCOM_MSM8974
- tristate "Qualcomm MSM8974 interconnect driver"
- depends on INTERCONNECT_QCOM
- depends on QCOM_SMD_RPM
- select INTERCONNECT_QCOM_SMD_RPM
- help
- This is a driver for the Qualcomm Network-on-Chip on msm8974-based
- platforms.
+ tristate "Qualcomm MSM8974 interconnect driver"
+ depends on INTERCONNECT_QCOM
+ depends on QCOM_SMD_RPM
+ select INTERCONNECT_QCOM_SMD_RPM
+ help
+ This is a driver for the Qualcomm Network-on-Chip on msm8974-based
+ platforms.
config INTERCONNECT_QCOM_QCS404
tristate "Qualcomm QCS404 interconnect driver"
diff --git a/drivers/interconnect/qcom/msm8974.c b/drivers/interconnect/qcom/msm8974.c
index ce599a0c83d9..bf8bd1aee358 100644
--- a/drivers/interconnect/qcom/msm8974.c
+++ b/drivers/interconnect/qcom/msm8974.c
@@ -652,7 +652,7 @@ static int msm8974_icc_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct icc_onecell_data *data;
struct icc_provider *provider;
- struct icc_node *node;
+ struct icc_node *node, *tmp;
size_t num_nodes, i;
int ret;
@@ -732,7 +732,7 @@ static int msm8974_icc_probe(struct platform_device *pdev)
return 0;
err_del_icc:
- list_for_each_entry(node, &provider->nodes, node_list) {
+ list_for_each_entry_safe(node, tmp, &provider->nodes, node_list) {
icc_node_del(node);
icc_node_destroy(node->id);
}
@@ -748,9 +748,9 @@ static int msm8974_icc_remove(struct platform_device *pdev)
{
struct msm8974_icc_provider *qp = platform_get_drvdata(pdev);
struct icc_provider *provider = &qp->provider;
- struct icc_node *n;
+ struct icc_node *n, *tmp;
- list_for_each_entry(n, &provider->nodes, node_list) {
+ list_for_each_entry_safe(n, tmp, &provider->nodes, node_list) {
icc_node_del(n);
icc_node_destroy(n->id);
}
diff --git a/drivers/interconnect/qcom/qcs404.c b/drivers/interconnect/qcom/qcs404.c
index b4966d8f3348..8e0735a87040 100644
--- a/drivers/interconnect/qcom/qcs404.c
+++ b/drivers/interconnect/qcom/qcs404.c
@@ -414,7 +414,7 @@ static int qnoc_probe(struct platform_device *pdev)
struct icc_provider *provider;
struct qcom_icc_node **qnodes;
struct qcom_icc_provider *qp;
- struct icc_node *node;
+ struct icc_node *node, *tmp;
size_t num_nodes, i;
int ret;
@@ -494,7 +494,7 @@ static int qnoc_probe(struct platform_device *pdev)
return 0;
err:
- list_for_each_entry(node, &provider->nodes, node_list) {
+ list_for_each_entry_safe(node, tmp, &provider->nodes, node_list) {
icc_node_del(node);
icc_node_destroy(node->id);
}
@@ -508,9 +508,9 @@ static int qnoc_remove(struct platform_device *pdev)
{
struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
struct icc_provider *provider = &qp->provider;
- struct icc_node *n;
+ struct icc_node *n, *tmp;
- list_for_each_entry(n, &provider->nodes, node_list) {
+ list_for_each_entry_safe(n, tmp, &provider->nodes, node_list) {
icc_node_del(n);
icc_node_destroy(n->id);
}
diff --git a/drivers/interconnect/qcom/sdm845.c b/drivers/interconnect/qcom/sdm845.c
index 502a6c22b41e..387267ee9648 100644
--- a/drivers/interconnect/qcom/sdm845.c
+++ b/drivers/interconnect/qcom/sdm845.c
@@ -868,9 +868,9 @@ static int qnoc_remove(struct platform_device *pdev)
{
struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
struct icc_provider *provider = &qp->provider;
- struct icc_node *n;
+ struct icc_node *n, *tmp;
- list_for_each_entry(n, &provider->nodes, node_list) {
+ list_for_each_entry_safe(n, tmp, &provider->nodes, node_list) {
icc_node_del(n);
icc_node_destroy(n->id);
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 0cc702a70a96..c363294b3bb9 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -19,6 +19,7 @@
#include <linux/iova.h>
#include <linux/irq.h>
#include <linux/mm.h>
+#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
#include <linux/vmalloc.h>
@@ -44,7 +45,6 @@ struct iommu_dma_cookie {
dma_addr_t msi_iova;
};
struct list_head msi_page_list;
- spinlock_t msi_lock;
/* Domain for flush queue callback; NULL if flush queue not in use */
struct iommu_domain *fq_domain;
@@ -63,7 +63,6 @@ static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type)
cookie = kzalloc(sizeof(*cookie), GFP_KERNEL);
if (cookie) {
- spin_lock_init(&cookie->msi_lock);
INIT_LIST_HEAD(&cookie->msi_page_list);
cookie->type = type;
}
@@ -399,7 +398,7 @@ static int dma_info_to_prot(enum dma_data_direction dir, bool coherent,
}
static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
- size_t size, dma_addr_t dma_limit, struct device *dev)
+ size_t size, u64 dma_limit, struct device *dev)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
@@ -424,7 +423,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain,
dma_limit = min_not_zero(dma_limit, dev->bus_dma_limit);
if (domain->geometry.force_aperture)
- dma_limit = min(dma_limit, domain->geometry.aperture_end);
+ dma_limit = min(dma_limit, (u64)domain->geometry.aperture_end);
/* Try to get PCI devices a SAC address */
if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev))
@@ -477,7 +476,7 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr,
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
- size_t size, int prot, dma_addr_t dma_mask)
+ size_t size, int prot, u64 dma_mask)
{
struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
@@ -1176,7 +1175,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
if (msi_page->phys == msi_addr)
return msi_page;
- msi_page = kzalloc(sizeof(*msi_page), GFP_ATOMIC);
+ msi_page = kzalloc(sizeof(*msi_page), GFP_KERNEL);
if (!msi_page)
return NULL;
@@ -1206,7 +1205,7 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_dma_cookie *cookie;
struct iommu_dma_msi_page *msi_page;
- unsigned long flags;
+ static DEFINE_MUTEX(msi_prepare_lock); /* see below */
if (!domain || !domain->iova_cookie) {
desc->iommu_cookie = NULL;
@@ -1216,13 +1215,13 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr)
cookie = domain->iova_cookie;
/*
- * We disable IRQs to rule out a possible inversion against
- * irq_desc_lock if, say, someone tries to retarget the affinity
- * of an MSI from within an IPI handler.
+ * In fact the whole prepare operation should already be serialised by
+ * irq_domain_mutex further up the callchain, but that's pretty subtle
+ * on its own, so consider this locking as failsafe documentation...
*/
- spin_lock_irqsave(&cookie->msi_lock, flags);
+ mutex_lock(&msi_prepare_lock);
msi_page = iommu_dma_get_msi_page(dev, msi_addr, domain);
- spin_unlock_irqrestore(&cookie->msi_lock, flags);
+ mutex_unlock(&msi_prepare_lock);
msi_desc_set_iommu_cookie(desc, msi_page);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 0c8d81f56a30..42966611a192 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -5478,9 +5478,6 @@ static int intel_iommu_map(struct iommu_domain *domain,
int prot = 0;
int ret;
- if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
- return -EINVAL;
-
if (iommu_prot & IOMMU_READ)
prot |= DMA_PTE_READ;
if (iommu_prot & IOMMU_WRITE)
@@ -5523,8 +5520,6 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
BUG_ON(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level));
- if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
- return 0;
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -5556,9 +5551,6 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
int level = 0;
u64 phys = 0;
- if (dmar_domain->flags & DOMAIN_FLAG_LOSE_CHILDREN)
- return 0;
-
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level);
if (pte)
phys = dma_pte_addr(pte);
@@ -5736,8 +5728,8 @@ static void intel_iommu_get_resv_regions(struct device *device,
struct pci_dev *pdev = to_pci_dev(device);
if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) {
- reg = iommu_alloc_resv_region(0, 1UL << 24, 0,
- IOMMU_RESV_DIRECT);
+ reg = iommu_alloc_resv_region(0, 1UL << 24, prot,
+ IOMMU_RESV_DIRECT_RELAXABLE);
if (reg)
list_add_tail(&reg->list, head);
}
diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c
index 9b159132405d..dca88f9fdf29 100644
--- a/drivers/iommu/intel-svm.c
+++ b/drivers/iommu/intel-svm.c
@@ -104,11 +104,7 @@ static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_d
{
struct qi_desc desc;
- /*
- * Do PASID granu IOTLB invalidation if page selective capability is
- * not available.
- */
- if (pages == -1 || !cap_pgsel_inv(svm->iommu->cap)) {
+ if (pages == -1) {
desc.qw0 = QI_EIOTLB_PASID(svm->pasid) |
QI_EIOTLB_DID(sdev->did) |
QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index db7bfd4f2d20..fdd40756dbc1 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -312,8 +312,8 @@ int iommu_insert_resv_region(struct iommu_resv_region *new,
list_for_each_entry_safe(iter, tmp, regions, list) {
phys_addr_t top_end, iter_end = iter->start + iter->length - 1;
- /* no merge needed on elements of different types than @nr */
- if (iter->type != nr->type) {
+ /* no merge needed on elements of different types than @new */
+ if (iter->type != new->type) {
list_move_tail(&iter->list, &stack);
continue;
}
@@ -2282,13 +2282,13 @@ request_default_domain_for_dev(struct device *dev, unsigned long type)
goto out;
}
- iommu_group_create_direct_mappings(group, dev);
-
/* Make the domain the default for this group */
if (group->default_domain)
iommu_domain_free(group->default_domain);
group->default_domain = domain;
+ iommu_group_create_direct_mappings(group, dev);
+
dev_info(dev, "Using iommu %s mapping\n",
type == IOMMU_DOMAIN_DMA ? "dma" : "direct");
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c
index 41c605b0058f..c7a914b9bbbc 100644
--- a/drivers/iommu/iova.c
+++ b/drivers/iommu/iova.c
@@ -233,7 +233,7 @@ static DEFINE_MUTEX(iova_cache_mutex);
struct iova *alloc_iova_mem(void)
{
- return kmem_cache_alloc(iova_cache, GFP_ATOMIC);
+ return kmem_cache_zalloc(iova_cache, GFP_ATOMIC);
}
EXPORT_SYMBOL(alloc_iova_mem);
diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c
index 8df547d2d935..0aca5807a119 100644
--- a/drivers/irqchip/irq-sifive-plic.c
+++ b/drivers/irqchip/irq-sifive-plic.c
@@ -256,7 +256,7 @@ static int __init plic_init(struct device_node *node,
* Skip contexts other than external interrupts for our
* privilege level.
*/
- if (parent.args[0] != IRQ_EXT)
+ if (parent.args[0] != RV_IRQ_EXT)
continue;
hartid = plic_find_hart_id(parent.np);
diff --git a/drivers/md/dm-clone-metadata.c b/drivers/md/dm-clone-metadata.c
index 08c552e5e41b..c05b12110456 100644
--- a/drivers/md/dm-clone-metadata.c
+++ b/drivers/md/dm-clone-metadata.c
@@ -67,23 +67,34 @@ struct superblock_disk {
* To save constantly doing look ups on disk we keep an in core copy of the
* on-disk bitmap, the region_map.
*
- * To further reduce metadata I/O overhead we use a second bitmap, the dmap
- * (dirty bitmap), which tracks the dirty words, i.e. longs, of the region_map.
+ * In order to track which regions are hydrated during a metadata transaction,
+ * we use a second set of bitmaps, the dmap (dirty bitmap), which includes two
+ * bitmaps, namely dirty_regions and dirty_words. The dirty_regions bitmap
+ * tracks the regions that got hydrated during the current metadata
+ * transaction. The dirty_words bitmap tracks the dirty words, i.e. longs, of
+ * the dirty_regions bitmap.
+ *
+ * This allows us to precisely track the regions that were hydrated during the
+ * current metadata transaction and update the metadata accordingly, when we
+ * commit the current transaction. This is important because dm-clone should
+ * only commit the metadata of regions that were properly flushed to the
+ * destination device beforehand. Otherwise, in case of a crash, we could end
+ * up with a corrupted dm-clone device.
*
* When a region finishes hydrating dm-clone calls
* dm_clone_set_region_hydrated(), or for discard requests
* dm_clone_cond_set_range(), which sets the corresponding bits in region_map
* and dmap.
*
- * During a metadata commit we scan the dmap for dirty region_map words (longs)
- * and update accordingly the on-disk metadata. Thus, we don't have to flush to
- * disk the whole region_map. We can just flush the dirty region_map words.
+ * During a metadata commit we scan dmap->dirty_words and dmap->dirty_regions
+ * and update the on-disk metadata accordingly. Thus, we don't have to flush to
+ * disk the whole region_map. We can just flush the dirty region_map bits.
*
- * We use a dirty bitmap, which is smaller than the original region_map, to
- * reduce the amount of memory accesses during a metadata commit. As dm-bitset
- * accesses the on-disk bitmap in 64-bit word granularity, there is no
- * significant benefit in tracking the dirty region_map bits with a smaller
- * granularity.
+ * We use the helper dmap->dirty_words bitmap, which is smaller than the
+ * original region_map, to reduce the amount of memory accesses during a
+ * metadata commit. Moreover, as dm-bitset also accesses the on-disk bitmap in
+ * 64-bit word granularity, the dirty_words bitmap helps us avoid useless disk
+ * accesses.
*
* We could update directly the on-disk bitmap, when dm-clone calls either
* dm_clone_set_region_hydrated() or dm_clone_cond_set_range(), buts this
@@ -92,12 +103,13 @@ struct superblock_disk {
* e.g., in a hooked overwrite bio's completion routine, and further reduce the
* I/O completion latency.
*
- * We maintain two dirty bitmaps. During a metadata commit we atomically swap
- * the currently used dmap with the unused one. This allows the metadata update
- * functions to run concurrently with an ongoing commit.
+ * We maintain two dirty bitmap sets. During a metadata commit we atomically
+ * swap the currently used dmap with the unused one. This allows the metadata
+ * update functions to run concurrently with an ongoing commit.
*/
struct dirty_map {
unsigned long *dirty_words;
+ unsigned long *dirty_regions;
unsigned int changed;
};
@@ -115,6 +127,9 @@ struct dm_clone_metadata {
struct dirty_map dmap[2];
struct dirty_map *current_dmap;
+ /* Protected by lock */
+ struct dirty_map *committing_dmap;
+
/*
* In core copy of the on-disk bitmap to save constantly doing look ups
* on disk.
@@ -461,34 +476,53 @@ static size_t bitmap_size(unsigned long nr_bits)
return BITS_TO_LONGS(nr_bits) * sizeof(long);
}
-static int dirty_map_init(struct dm_clone_metadata *cmd)
+static int __dirty_map_init(struct dirty_map *dmap, unsigned long nr_words,
+ unsigned long nr_regions)
{
- cmd->dmap[0].changed = 0;
- cmd->dmap[0].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
+ dmap->changed = 0;
- if (!cmd->dmap[0].dirty_words) {
- DMERR("Failed to allocate dirty bitmap");
+ dmap->dirty_words = kvzalloc(bitmap_size(nr_words), GFP_KERNEL);
+ if (!dmap->dirty_words)
+ return -ENOMEM;
+
+ dmap->dirty_regions = kvzalloc(bitmap_size(nr_regions), GFP_KERNEL);
+ if (!dmap->dirty_regions) {
+ kvfree(dmap->dirty_words);
return -ENOMEM;
}
- cmd->dmap[1].changed = 0;
- cmd->dmap[1].dirty_words = kvzalloc(bitmap_size(cmd->nr_words), GFP_KERNEL);
+ return 0;
+}
+
+static void __dirty_map_exit(struct dirty_map *dmap)
+{
+ kvfree(dmap->dirty_words);
+ kvfree(dmap->dirty_regions);
+}
+
+static int dirty_map_init(struct dm_clone_metadata *cmd)
+{
+ if (__dirty_map_init(&cmd->dmap[0], cmd->nr_words, cmd->nr_regions)) {
+ DMERR("Failed to allocate dirty bitmap");
+ return -ENOMEM;
+ }
- if (!cmd->dmap[1].dirty_words) {
+ if (__dirty_map_init(&cmd->dmap[1], cmd->nr_words, cmd->nr_regions)) {
DMERR("Failed to allocate dirty bitmap");
- kvfree(cmd->dmap[0].dirty_words);
+ __dirty_map_exit(&cmd->dmap[0]);
return -ENOMEM;
}
cmd->current_dmap = &cmd->dmap[0];
+ cmd->committing_dmap = NULL;
return 0;
}
static void dirty_map_exit(struct dm_clone_metadata *cmd)
{
- kvfree(cmd->dmap[0].dirty_words);
- kvfree(cmd->dmap[1].dirty_words);
+ __dirty_map_exit(&cmd->dmap[0]);
+ __dirty_map_exit(&cmd->dmap[1]);
}
static int __load_bitset_in_core(struct dm_clone_metadata *cmd)
@@ -633,21 +667,23 @@ unsigned long dm_clone_find_next_unhydrated_region(struct dm_clone_metadata *cmd
return find_next_zero_bit(cmd->region_map, cmd->nr_regions, start);
}
-static int __update_metadata_word(struct dm_clone_metadata *cmd, unsigned long word)
+static int __update_metadata_word(struct dm_clone_metadata *cmd,
+ unsigned long *dirty_regions,
+ unsigned long word)
{
int r;
unsigned long index = word * BITS_PER_LONG;
unsigned long max_index = min(cmd->nr_regions, (word + 1) * BITS_PER_LONG);
while (index < max_index) {
- if (test_bit(index, cmd->region_map)) {
+ if (test_bit(index, dirty_regions)) {
r = dm_bitset_set_bit(&cmd->bitset_info, cmd->bitset_root,
index, &cmd->bitset_root);
-
if (r) {
DMERR("dm_bitset_set_bit failed");
return r;
}
+ __clear_bit(index, dirty_regions);
}
index++;
}
@@ -721,7 +757,7 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
if (word == cmd->nr_words)
break;
- r = __update_metadata_word(cmd, word);
+ r = __update_metadata_word(cmd, dmap->dirty_regions, word);
if (r)
return r;
@@ -743,15 +779,17 @@ static int __flush_dmap(struct dm_clone_metadata *cmd, struct dirty_map *dmap)
return 0;
}
-int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
+int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd)
{
- int r = -EPERM;
+ int r = 0;
struct dirty_map *dmap, *next_dmap;
down_write(&cmd->lock);
- if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
+ if (cmd->fail_io || dm_bm_is_read_only(cmd->bm)) {
+ r = -EPERM;
goto out;
+ }
/* Get current dirty bitmap */
dmap = cmd->current_dmap;
@@ -763,7 +801,7 @@ int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
* The last commit failed, so we don't have a clean dirty-bitmap to
* use.
*/
- if (WARN_ON(next_dmap->changed)) {
+ if (WARN_ON(next_dmap->changed || cmd->committing_dmap)) {
r = -EINVAL;
goto out;
}
@@ -773,11 +811,33 @@ int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
cmd->current_dmap = next_dmap;
spin_unlock_irq(&cmd->bitmap_lock);
- /*
- * No one is accessing the old dirty bitmap anymore, so we can flush
- * it.
- */
- r = __flush_dmap(cmd, dmap);
+ /* Set old dirty bitmap as currently committing */
+ cmd->committing_dmap = dmap;
+out:
+ up_write(&cmd->lock);
+
+ return r;
+}
+
+int dm_clone_metadata_commit(struct dm_clone_metadata *cmd)
+{
+ int r = -EPERM;
+
+ down_write(&cmd->lock);
+
+ if (cmd->fail_io || dm_bm_is_read_only(cmd->bm))
+ goto out;
+
+ if (WARN_ON(!cmd->committing_dmap)) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ r = __flush_dmap(cmd, cmd->committing_dmap);
+ if (!r) {
+ /* Clear committing dmap */
+ cmd->committing_dmap = NULL;
+ }
out:
up_write(&cmd->lock);
@@ -802,6 +862,7 @@ int dm_clone_set_region_hydrated(struct dm_clone_metadata *cmd, unsigned long re
dmap = cmd->current_dmap;
__set_bit(word, dmap->dirty_words);
+ __set_bit(region_nr, dmap->dirty_regions);
__set_bit(region_nr, cmd->region_map);
dmap->changed = 1;
@@ -830,6 +891,7 @@ int dm_clone_cond_set_range(struct dm_clone_metadata *cmd, unsigned long start,
if (!test_bit(region_nr, cmd->region_map)) {
word = region_nr / BITS_PER_LONG;
__set_bit(word, dmap->dirty_words);
+ __set_bit(region_nr, dmap->dirty_regions);
__set_bit(region_nr, cmd->region_map);
dmap->changed = 1;
}
diff --git a/drivers/md/dm-clone-metadata.h b/drivers/md/dm-clone-metadata.h
index 3fe50a781c11..14af1ebd853f 100644
--- a/drivers/md/dm-clone-metadata.h
+++ b/drivers/md/dm-clone-metadata.h
@@ -75,7 +75,23 @@ void dm_clone_metadata_close(struct dm_clone_metadata *cmd);
/*
* Commit dm-clone metadata to disk.
+ *
+ * We use a two phase commit:
+ *
+ * 1. dm_clone_metadata_pre_commit(): Prepare the current transaction for
+ * committing. After this is called, all subsequent metadata updates, done
+ * through either dm_clone_set_region_hydrated() or
+ * dm_clone_cond_set_range(), will be part of the **next** transaction.
+ *
+ * 2. dm_clone_metadata_commit(): Actually commit the current transaction to
+ * disk and start a new transaction.
+ *
+ * This allows dm-clone to flush the destination device after step (1) to
+ * ensure that all freshly hydrated regions, for which we are updating the
+ * metadata, are properly written to non-volatile storage and won't be lost in
+ * case of a crash.
*/
+int dm_clone_metadata_pre_commit(struct dm_clone_metadata *cmd);
int dm_clone_metadata_commit(struct dm_clone_metadata *cmd);
/*
@@ -112,6 +128,7 @@ int dm_clone_metadata_abort(struct dm_clone_metadata *cmd);
* Switches metadata to a read only mode. Once read-only mode has been entered
* the following functions will return -EPERM:
*
+ * dm_clone_metadata_pre_commit()
* dm_clone_metadata_commit()
* dm_clone_set_region_hydrated()
* dm_clone_cond_set_range()
diff --git a/drivers/md/dm-clone-target.c b/drivers/md/dm-clone-target.c
index b3d89072d21c..d1e1b5b56b1b 100644
--- a/drivers/md/dm-clone-target.c
+++ b/drivers/md/dm-clone-target.c
@@ -86,6 +86,12 @@ struct clone {
struct dm_clone_metadata *cmd;
+ /*
+ * bio used to flush the destination device, before committing the
+ * metadata.
+ */
+ struct bio flush_bio;
+
/* Region hydration hash table */
struct hash_table_bucket *ht;
@@ -1108,10 +1114,13 @@ static bool need_commit_due_to_time(struct clone *clone)
/*
* A non-zero return indicates read-only or fail mode.
*/
-static int commit_metadata(struct clone *clone)
+static int commit_metadata(struct clone *clone, bool *dest_dev_flushed)
{
int r = 0;
+ if (dest_dev_flushed)
+ *dest_dev_flushed = false;
+
mutex_lock(&clone->commit_lock);
if (!dm_clone_changed_this_transaction(clone->cmd))
@@ -1122,8 +1131,26 @@ static int commit_metadata(struct clone *clone)
goto out;
}
- r = dm_clone_metadata_commit(clone->cmd);
+ r = dm_clone_metadata_pre_commit(clone->cmd);
+ if (unlikely(r)) {
+ __metadata_operation_failed(clone, "dm_clone_metadata_pre_commit", r);
+ goto out;
+ }
+ bio_reset(&clone->flush_bio);
+ bio_set_dev(&clone->flush_bio, clone->dest_dev->bdev);
+ clone->flush_bio.bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+ r = submit_bio_wait(&clone->flush_bio);
+ if (unlikely(r)) {
+ __metadata_operation_failed(clone, "flush destination device", r);
+ goto out;
+ }
+
+ if (dest_dev_flushed)
+ *dest_dev_flushed = true;
+
+ r = dm_clone_metadata_commit(clone->cmd);
if (unlikely(r)) {
__metadata_operation_failed(clone, "dm_clone_metadata_commit", r);
goto out;
@@ -1194,6 +1221,7 @@ static void process_deferred_bios(struct clone *clone)
static void process_deferred_flush_bios(struct clone *clone)
{
struct bio *bio;
+ bool dest_dev_flushed;
struct bio_list bios = BIO_EMPTY_LIST;
struct bio_list bio_completions = BIO_EMPTY_LIST;
@@ -1213,7 +1241,7 @@ static void process_deferred_flush_bios(struct clone *clone)
!(dm_clone_changed_this_transaction(clone->cmd) && need_commit_due_to_time(clone)))
return;
- if (commit_metadata(clone)) {
+ if (commit_metadata(clone, &dest_dev_flushed)) {
bio_list_merge(&bios, &bio_completions);
while ((bio = bio_list_pop(&bios)))
@@ -1227,8 +1255,17 @@ static void process_deferred_flush_bios(struct clone *clone)
while ((bio = bio_list_pop(&bio_completions)))
bio_endio(bio);
- while ((bio = bio_list_pop(&bios)))
- generic_make_request(bio);
+ while ((bio = bio_list_pop(&bios))) {
+ if ((bio->bi_opf & REQ_PREFLUSH) && dest_dev_flushed) {
+ /* We just flushed the destination device as part of
+ * the metadata commit, so there is no reason to send
+ * another flush.
+ */
+ bio_endio(bio);
+ } else {
+ generic_make_request(bio);
+ }
+ }
}
static void do_worker(struct work_struct *work)
@@ -1400,7 +1437,7 @@ static void clone_status(struct dm_target *ti, status_type_t type,
/* Commit to ensure statistics aren't out-of-date */
if (!(status_flags & DM_STATUS_NOFLUSH_FLAG) && !dm_suspended(ti))
- (void) commit_metadata(clone);
+ (void) commit_metadata(clone, NULL);
r = dm_clone_get_free_metadata_block_count(clone->cmd, &nr_free_metadata_blocks);
@@ -1834,6 +1871,7 @@ static int clone_ctr(struct dm_target *ti, unsigned int argc, char **argv)
bio_list_init(&clone->deferred_flush_completions);
clone->hydration_offset = 0;
atomic_set(&clone->hydrations_in_flight, 0);
+ bio_init(&clone->flush_bio, NULL, 0);
clone->wq = alloc_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM, 0);
if (!clone->wq) {
@@ -1907,6 +1945,7 @@ static void clone_dtr(struct dm_target *ti)
struct clone *clone = ti->private;
mutex_destroy(&clone->commit_lock);
+ bio_uninit(&clone->flush_bio);
for (i = 0; i < clone->nr_ctr_args; i++)
kfree(clone->ctr_args[i]);
@@ -1961,7 +2000,7 @@ static void clone_postsuspend(struct dm_target *ti)
wait_event(clone->hydration_stopped, !atomic_read(&clone->hydrations_in_flight));
flush_workqueue(clone->wq);
- (void) commit_metadata(clone);
+ (void) commit_metadata(clone, NULL);
}
static void clone_resume(struct dm_target *ti)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index dbcc1e41cd57..e0c32793c248 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -599,45 +599,10 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
return pgpath;
}
-static struct pgpath *__map_bio_fast(struct multipath *m, struct bio *bio)
-{
- struct pgpath *pgpath;
- unsigned long flags;
-
- /* Do we need to select a new pgpath? */
- /*
- * FIXME: currently only switching path if no path (due to failure, etc)
- * - which negates the point of using a path selector
- */
- pgpath = READ_ONCE(m->current_pgpath);
- if (!pgpath)
- pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
-
- if (!pgpath) {
- if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
- /* Queue for the daemon to resubmit */
- spin_lock_irqsave(&m->lock, flags);
- bio_list_add(&m->queued_bios, bio);
- spin_unlock_irqrestore(&m->lock, flags);
- queue_work(kmultipathd, &m->process_queued_bios);
-
- return ERR_PTR(-EAGAIN);
- }
- return NULL;
- }
-
- return pgpath;
-}
-
static int __multipath_map_bio(struct multipath *m, struct bio *bio,
struct dm_mpath_io *mpio)
{
- struct pgpath *pgpath;
-
- if (!m->hw_handler_name)
- pgpath = __map_bio_fast(m, bio);
- else
- pgpath = __map_bio(m, bio);
+ struct pgpath *pgpath = __map_bio(m, bio);
if (IS_ERR(pgpath))
return DM_MAPIO_SUBMITTED;
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
index 4c68a7b93d5e..b88d6d701f5b 100644
--- a/drivers/md/dm-thin-metadata.c
+++ b/drivers/md/dm-thin-metadata.c
@@ -189,6 +189,15 @@ struct dm_pool_metadata {
sector_t data_block_size;
/*
+ * Pre-commit callback.
+ *
+ * This allows the thin provisioning target to run a callback before
+ * the metadata are committed.
+ */
+ dm_pool_pre_commit_fn pre_commit_fn;
+ void *pre_commit_context;
+
+ /*
* We reserve a section of the metadata for commit overhead.
* All reported space does *not* include this.
*/
@@ -826,6 +835,14 @@ static int __commit_transaction(struct dm_pool_metadata *pmd)
if (unlikely(!pmd->in_service))
return 0;
+ if (pmd->pre_commit_fn) {
+ r = pmd->pre_commit_fn(pmd->pre_commit_context);
+ if (r < 0) {
+ DMERR("pre-commit callback failed");
+ return r;
+ }
+ }
+
r = __write_changed_details(pmd);
if (r < 0)
return r;
@@ -892,6 +909,8 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev,
pmd->in_service = false;
pmd->bdev = bdev;
pmd->data_block_size = data_block_size;
+ pmd->pre_commit_fn = NULL;
+ pmd->pre_commit_context = NULL;
r = __create_persistent_data_objects(pmd, format_device);
if (r) {
@@ -2044,6 +2063,16 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
return r;
}
+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd,
+ dm_pool_pre_commit_fn fn,
+ void *context)
+{
+ pmd_write_lock_in_core(pmd);
+ pmd->pre_commit_fn = fn;
+ pmd->pre_commit_context = context;
+ pmd_write_unlock(pmd);
+}
+
int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd)
{
int r = -EINVAL;
diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h
index f6be0d733c20..7ef56bd2a7e3 100644
--- a/drivers/md/dm-thin-metadata.h
+++ b/drivers/md/dm-thin-metadata.h
@@ -230,6 +230,13 @@ bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd);
*/
void dm_pool_issue_prefetches(struct dm_pool_metadata *pmd);
+/* Pre-commit callback */
+typedef int (*dm_pool_pre_commit_fn)(void *context);
+
+void dm_pool_register_pre_commit_callback(struct dm_pool_metadata *pmd,
+ dm_pool_pre_commit_fn fn,
+ void *context);
+
/*----------------------------------------------------------------*/
#endif
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 5a2c494cb552..57626c27a54b 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -328,6 +328,7 @@ struct pool_c {
dm_block_t low_water_blocks;
struct pool_features requested_pf; /* Features requested during table load */
struct pool_features adjusted_pf; /* Features used after adjusting for constituent devices */
+ struct bio flush_bio;
};
/*
@@ -2383,8 +2384,16 @@ static void process_deferred_bios(struct pool *pool)
while ((bio = bio_list_pop(&bio_completions)))
bio_endio(bio);
- while ((bio = bio_list_pop(&bios)))
- generic_make_request(bio);
+ while ((bio = bio_list_pop(&bios))) {
+ /*
+ * The data device was flushed as part of metadata commit,
+ * so complete redundant flushes immediately.
+ */
+ if (bio->bi_opf & REQ_PREFLUSH)
+ bio_endio(bio);
+ else
+ generic_make_request(bio);
+ }
}
static void do_worker(struct work_struct *ws)
@@ -3115,6 +3124,7 @@ static void pool_dtr(struct dm_target *ti)
__pool_dec(pt->pool);
dm_put_device(ti, pt->metadata_dev);
dm_put_device(ti, pt->data_dev);
+ bio_uninit(&pt->flush_bio);
kfree(pt);
mutex_unlock(&dm_thin_pool_table.mutex);
@@ -3180,6 +3190,29 @@ static void metadata_low_callback(void *context)
dm_table_event(pool->ti->table);
}
+/*
+ * We need to flush the data device **before** committing the metadata.
+ *
+ * This ensures that the data blocks of any newly inserted mappings are
+ * properly written to non-volatile storage and won't be lost in case of a
+ * crash.
+ *
+ * Failure to do so can result in data corruption in the case of internal or
+ * external snapshots and in the case of newly provisioned blocks, when block
+ * zeroing is enabled.
+ */
+static int metadata_pre_commit_callback(void *context)
+{
+ struct pool_c *pt = context;
+ struct bio *flush_bio = &pt->flush_bio;
+
+ bio_reset(flush_bio);
+ bio_set_dev(flush_bio, pt->data_dev->bdev);
+ flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+ return submit_bio_wait(flush_bio);
+}
+
static sector_t get_dev_size(struct block_device *bdev)
{
return i_size_read(bdev->bd_inode) >> SECTOR_SHIFT;
@@ -3348,6 +3381,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
pt->data_dev = data_dev;
pt->low_water_blocks = low_water_blocks;
pt->adjusted_pf = pt->requested_pf = pf;
+ bio_init(&pt->flush_bio, NULL, 0);
ti->num_flush_bios = 1;
/*
@@ -3374,6 +3408,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (r)
goto out_flags_changed;
+ dm_pool_register_pre_commit_callback(pt->pool->pmd,
+ metadata_pre_commit_callback,
+ pt);
+
pt->callbacks.congested_fn = pool_is_congested;
dm_table_add_target_callbacks(ti->table, &pt->callbacks);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 805b33e27496..4e7c9f398bc6 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1159,6 +1159,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor
/* not spare disk, or LEVEL_MULTIPATH */
if (sb->level == LEVEL_MULTIPATH ||
(rdev->desc_nr >= 0 &&
+ rdev->desc_nr < MD_SB_DISKS &&
sb->disks[rdev->desc_nr].state &
((1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))))
spare_disk = false;
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index 21ea537bd55e..eff04fa23dfa 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -203,7 +203,13 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
struct btree_node *right = r->n;
uint32_t nr_left = le32_to_cpu(left->header.nr_entries);
uint32_t nr_right = le32_to_cpu(right->header.nr_entries);
- unsigned threshold = 2 * merge_threshold(left) + 1;
+ /*
+ * Ensure the number of entries in each child will be greater
+ * than or equal to (max_entries / 3 + 1), so no matter which
+ * child is used for removal, the number will still be not
+ * less than (max_entries / 3).
+ */
+ unsigned int threshold = 2 * (merge_threshold(left) + 1);
if (nr_left + nr_right < threshold) {
/*
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a409ab6f30bc..201fd8aec59a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2782,7 +2782,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
write_targets++;
}
}
- if (bio->bi_end_io) {
+ if (rdev && bio->bi_end_io) {
atomic_inc(&rdev->nr_pending);
bio->bi_iter.bi_sector = sector_nr + rdev->data_offset;
bio_set_dev(bio, rdev->bdev);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index cab5b1352892..d50238d0a85d 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -1360,7 +1360,7 @@ int ppl_init_log(struct r5conf *conf)
return -EINVAL;
}
- max_disks = FIELD_SIZEOF(struct ppl_log, disk_flush_bitmap) *
+ max_disks = sizeof_field(struct ppl_log, disk_flush_bitmap) *
BITS_PER_BYTE;
if (conf->raid_disks > max_disks) {
pr_warn("md/raid:%s PPL doesn't support over %d disks in the array\n",
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f0fc538bfe59..d4d3b67ffbba 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5726,7 +5726,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
do_flush = false;
}
- if (!sh->batch_head)
+ if (!sh->batch_head || sh == sh->batch_head)
set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
if ((!sh->batch_head || sh == sh->batch_head) &&
diff --git a/drivers/media/cec/cec-adap.c b/drivers/media/cec/cec-adap.c
index 9340435a94a0..6c95dc471d4c 100644
--- a/drivers/media/cec/cec-adap.c
+++ b/drivers/media/cec/cec-adap.c
@@ -380,7 +380,8 @@ static void cec_data_cancel(struct cec_data *data, u8 tx_status)
} else {
list_del_init(&data->list);
if (!(data->msg.tx_status & CEC_TX_STATUS_OK))
- data->adap->transmit_queue_sz--;
+ if (!WARN_ON(!data->adap->transmit_queue_sz))
+ data->adap->transmit_queue_sz--;
}
if (data->msg.tx_status & CEC_TX_STATUS_OK) {
@@ -432,6 +433,14 @@ static void cec_flush(struct cec_adapter *adap)
* need to do anything special in that case.
*/
}
+ /*
+ * If something went wrong and this counter isn't what it should
+ * be, then this will reset it back to 0. Warn if it is not 0,
+ * since it indicates a bug, either in this framework or in a
+ * CEC driver.
+ */
+ if (WARN_ON(adap->transmit_queue_sz))
+ adap->transmit_queue_sz = 0;
}
/*
@@ -456,7 +465,7 @@ int cec_thread_func(void *_adap)
bool timeout = false;
u8 attempts;
- if (adap->transmitting) {
+ if (adap->transmit_in_progress) {
int err;
/*
@@ -491,7 +500,7 @@ int cec_thread_func(void *_adap)
goto unlock;
}
- if (adap->transmitting && timeout) {
+ if (adap->transmit_in_progress && timeout) {
/*
* If we timeout, then log that. Normally this does
* not happen and it is an indication of a faulty CEC
@@ -500,14 +509,18 @@ int cec_thread_func(void *_adap)
* so much traffic on the bus that the adapter was
* unable to transmit for CEC_XFER_TIMEOUT_MS (2.1s).
*/
- pr_warn("cec-%s: message %*ph timed out\n", adap->name,
- adap->transmitting->msg.len,
- adap->transmitting->msg.msg);
+ if (adap->transmitting) {
+ pr_warn("cec-%s: message %*ph timed out\n", adap->name,
+ adap->transmitting->msg.len,
+ adap->transmitting->msg.msg);
+ /* Just give up on this. */
+ cec_data_cancel(adap->transmitting,
+ CEC_TX_STATUS_TIMEOUT);
+ } else {
+ pr_warn("cec-%s: transmit timed out\n", adap->name);
+ }
adap->transmit_in_progress = false;
adap->tx_timeouts++;
- /* Just give up on this. */
- cec_data_cancel(adap->transmitting,
- CEC_TX_STATUS_TIMEOUT);
goto unlock;
}
@@ -522,7 +535,8 @@ int cec_thread_func(void *_adap)
data = list_first_entry(&adap->transmit_queue,
struct cec_data, list);
list_del_init(&data->list);
- adap->transmit_queue_sz--;
+ if (!WARN_ON(!data->adap->transmit_queue_sz))
+ adap->transmit_queue_sz--;
/* Make this the current transmitting message */
adap->transmitting = data;
@@ -1085,11 +1099,11 @@ void cec_received_msg_ts(struct cec_adapter *adap,
valid_la = false;
else if (!cec_msg_is_broadcast(msg) && !(dir_fl & DIRECTED))
valid_la = false;
- else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST1_4))
+ else if (cec_msg_is_broadcast(msg) && !(dir_fl & BCAST))
valid_la = false;
else if (cec_msg_is_broadcast(msg) &&
- adap->log_addrs.cec_version >= CEC_OP_CEC_VERSION_2_0 &&
- !(dir_fl & BCAST2_0))
+ adap->log_addrs.cec_version < CEC_OP_CEC_VERSION_2_0 &&
+ !(dir_fl & BCAST1_4))
valid_la = false;
}
if (valid_la && min_len) {
diff --git a/drivers/media/platform/omap3isp/isppreview.c b/drivers/media/platform/omap3isp/isppreview.c
index 97d660606d98..4dbdf3180d10 100644
--- a/drivers/media/platform/omap3isp/isppreview.c
+++ b/drivers/media/platform/omap3isp/isppreview.c
@@ -753,7 +753,7 @@ static const struct preview_update update_attrs[] = {
preview_config_luma_enhancement,
preview_enable_luma_enhancement,
offsetof(struct prev_params, luma),
- FIELD_SIZEOF(struct prev_params, luma),
+ sizeof_field(struct prev_params, luma),
offsetof(struct omap3isp_prev_update_config, luma),
}, /* OMAP3ISP_PREV_INVALAW */ {
NULL,
@@ -762,55 +762,55 @@ static const struct preview_update update_attrs[] = {
preview_config_hmed,
preview_enable_hmed,
offsetof(struct prev_params, hmed),
- FIELD_SIZEOF(struct prev_params, hmed),
+ sizeof_field(struct prev_params, hmed),
offsetof(struct omap3isp_prev_update_config, hmed),
}, /* OMAP3ISP_PREV_CFA */ {
preview_config_cfa,
NULL,
offsetof(struct prev_params, cfa),
- FIELD_SIZEOF(struct prev_params, cfa),
+ sizeof_field(struct prev_params, cfa),
offsetof(struct omap3isp_prev_update_config, cfa),
}, /* OMAP3ISP_PREV_CHROMA_SUPP */ {
preview_config_chroma_suppression,
preview_enable_chroma_suppression,
offsetof(struct prev_params, csup),
- FIELD_SIZEOF(struct prev_params, csup),
+ sizeof_field(struct prev_params, csup),
offsetof(struct omap3isp_prev_update_config, csup),
}, /* OMAP3ISP_PREV_WB */ {
preview_config_whitebalance,
NULL,
offsetof(struct prev_params, wbal),
- FIELD_SIZEOF(struct prev_params, wbal),
+ sizeof_field(struct prev_params, wbal),
offsetof(struct omap3isp_prev_update_config, wbal),
}, /* OMAP3ISP_PREV_BLKADJ */ {
preview_config_blkadj,
NULL,
offsetof(struct prev_params, blkadj),
- FIELD_SIZEOF(struct prev_params, blkadj),
+ sizeof_field(struct prev_params, blkadj),
offsetof(struct omap3isp_prev_update_config, blkadj),
}, /* OMAP3ISP_PREV_RGB2RGB */ {
preview_config_rgb_blending,
NULL,
offsetof(struct prev_params, rgb2rgb),
- FIELD_SIZEOF(struct prev_params, rgb2rgb),
+ sizeof_field(struct prev_params, rgb2rgb),
offsetof(struct omap3isp_prev_update_config, rgb2rgb),
}, /* OMAP3ISP_PREV_COLOR_CONV */ {
preview_config_csc,
NULL,
offsetof(struct prev_params, csc),
- FIELD_SIZEOF(struct prev_params, csc),
+ sizeof_field(struct prev_params, csc),
offsetof(struct omap3isp_prev_update_config, csc),
}, /* OMAP3ISP_PREV_YC_LIMIT */ {
preview_config_yc_range,
NULL,
offsetof(struct prev_params, yclimit),
- FIELD_SIZEOF(struct prev_params, yclimit),
+ sizeof_field(struct prev_params, yclimit),
offsetof(struct omap3isp_prev_update_config, yclimit),
}, /* OMAP3ISP_PREV_DEFECT_COR */ {
preview_config_dcor,
preview_enable_dcor,
offsetof(struct prev_params, dcor),
- FIELD_SIZEOF(struct prev_params, dcor),
+ sizeof_field(struct prev_params, dcor),
offsetof(struct omap3isp_prev_update_config, dcor),
}, /* Previously OMAP3ISP_PREV_GAMMABYPASS, not used anymore */ {
NULL,
@@ -828,13 +828,13 @@ static const struct preview_update update_attrs[] = {
preview_config_noisefilter,
preview_enable_noisefilter,
offsetof(struct prev_params, nf),
- FIELD_SIZEOF(struct prev_params, nf),
+ sizeof_field(struct prev_params, nf),
offsetof(struct omap3isp_prev_update_config, nf),
}, /* OMAP3ISP_PREV_GAMMA */ {
preview_config_gammacorrn,
preview_enable_gammacorrn,
offsetof(struct prev_params, gamma),
- FIELD_SIZEOF(struct prev_params, gamma),
+ sizeof_field(struct prev_params, gamma),
offsetof(struct omap3isp_prev_update_config, gamma),
}, /* OMAP3ISP_PREV_CONTRAST */ {
preview_config_contrast,
diff --git a/drivers/media/usb/pulse8-cec/pulse8-cec.c b/drivers/media/usb/pulse8-cec/pulse8-cec.c
index ac88ade94cda..59609556d969 100644
--- a/drivers/media/usb/pulse8-cec/pulse8-cec.c
+++ b/drivers/media/usb/pulse8-cec/pulse8-cec.c
@@ -116,6 +116,7 @@ struct pulse8 {
unsigned int vers;
struct completion cmd_done;
struct work_struct work;
+ u8 work_result;
struct delayed_work ping_eeprom_work;
struct cec_msg rx_msg;
u8 data[DATA_SIZE];
@@ -137,8 +138,10 @@ static void pulse8_irq_work_handler(struct work_struct *work)
{
struct pulse8 *pulse8 =
container_of(work, struct pulse8, work);
+ u8 result = pulse8->work_result;
- switch (pulse8->data[0] & 0x3f) {
+ pulse8->work_result = 0;
+ switch (result & 0x3f) {
case MSGCODE_FRAME_DATA:
cec_received_msg(pulse8->adap, &pulse8->rx_msg);
break;
@@ -172,12 +175,12 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data,
pulse8->escape = false;
} else if (data == MSGEND) {
struct cec_msg *msg = &pulse8->rx_msg;
+ u8 msgcode = pulse8->buf[0];
if (debug)
dev_info(pulse8->dev, "received: %*ph\n",
pulse8->idx, pulse8->buf);
- pulse8->data[0] = pulse8->buf[0];
- switch (pulse8->buf[0] & 0x3f) {
+ switch (msgcode & 0x3f) {
case MSGCODE_FRAME_START:
msg->len = 1;
msg->msg[0] = pulse8->buf[1];
@@ -186,14 +189,20 @@ static irqreturn_t pulse8_interrupt(struct serio *serio, unsigned char data,
if (msg->len == CEC_MAX_MSG_SIZE)
break;
msg->msg[msg->len++] = pulse8->buf[1];
- if (pulse8->buf[0] & MSGCODE_FRAME_EOM)
+ if (msgcode & MSGCODE_FRAME_EOM) {
+ WARN_ON(pulse8->work_result);
+ pulse8->work_result = msgcode;
schedule_work(&pulse8->work);
+ break;
+ }
break;
case MSGCODE_TRANSMIT_SUCCEEDED:
case MSGCODE_TRANSMIT_FAILED_LINE:
case MSGCODE_TRANSMIT_FAILED_ACK:
case MSGCODE_TRANSMIT_FAILED_TIMEOUT_DATA:
case MSGCODE_TRANSMIT_FAILED_TIMEOUT_LINE:
+ WARN_ON(pulse8->work_result);
+ pulse8->work_result = msgcode;
schedule_work(&pulse8->work);
break;
case MSGCODE_HIGH_ERROR:
diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 4e700583659b..003b7422aeef 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -2652,7 +2652,7 @@ struct v4l2_ioctl_info {
/* Zero struct from after the field to the end */
#define INFO_FL_CLEAR(v4l2_struct, field) \
((offsetof(struct v4l2_struct, field) + \
- FIELD_SIZEOF(struct v4l2_struct, field)) << 16)
+ sizeof_field(struct v4l2_struct, field)) << 16)
#define INFO_FL_CLEAR_MASK (_IOC_SIZEMASK << 16)
#define DEFINE_V4L_STUB_FUNC(_vidioc) \
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index ebc00d47abf5..7d3784aa20e5 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -552,7 +552,7 @@ mpt_lan_close(struct net_device *dev)
/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
/* Tx timeout handler. */
static void
-mpt_lan_tx_timeout(struct net_device *dev)
+mpt_lan_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mpt_lan_priv *priv = netdev_priv(dev);
MPT_ADAPTER *mpt_dev = priv->mpt_dev;
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 8850f475a413..0bf08678431b 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -824,8 +824,9 @@ int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
memset(args, 0, sizeof(*args));
if (rc < 0) {
- dev_err(hdev->dev, "Error %ld on waiting for CS handle %llu\n",
- rc, seq);
+ dev_err_ratelimited(hdev->dev,
+ "Error %ld on waiting for CS handle %llu\n",
+ rc, seq);
if (rc == -ERESTARTSYS) {
args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
rc = -EINTR;
diff --git a/drivers/misc/habanalabs/context.c b/drivers/misc/habanalabs/context.c
index 17db7b3dfb4c..2df6fb87e7ff 100644
--- a/drivers/misc/habanalabs/context.c
+++ b/drivers/misc/habanalabs/context.c
@@ -176,7 +176,7 @@ struct dma_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
spin_lock(&ctx->cs_lock);
if (seq >= ctx->cs_sequence) {
- dev_notice(hdev->dev,
+ dev_notice_ratelimited(hdev->dev,
"Can't wait on seq %llu because current CS is at seq %llu\n",
seq, ctx->cs_sequence);
spin_unlock(&ctx->cs_lock);
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index c8d16aa4382c..7344e8a222ae 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -2192,7 +2192,7 @@ static int goya_push_linux_to_device(struct hl_device *hdev)
static int goya_pldm_init_cpu(struct hl_device *hdev)
{
- u32 val, unit_rst_val;
+ u32 unit_rst_val;
int rc;
/* Must initialize SRAM scrambler before pushing u-boot to SRAM */
@@ -2200,14 +2200,14 @@ static int goya_pldm_init_cpu(struct hl_device *hdev)
/* Put ARM cores into reset */
WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL, CPU_RESET_ASSERT);
- val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
+ RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
/* Reset the CA53 MACRO */
unit_rst_val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, CA53_RESET);
- val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
+ RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
- val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
+ RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
rc = goya_push_uboot_to_device(hdev);
if (rc)
@@ -2228,7 +2228,7 @@ static int goya_pldm_init_cpu(struct hl_device *hdev)
/* Release ARM core 0 from reset */
WREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL,
CPU_RESET_CORE0_DEASSERT);
- val = RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
+ RREG32(mmCPU_CA53_CFG_ARM_RST_CONTROL);
return 0;
}
@@ -2502,13 +2502,12 @@ err:
static int goya_hw_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u32 val;
int rc;
dev_info(hdev->dev, "Starting initialization of H/W\n");
/* Perform read from the device to make sure device is up */
- val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+ RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
/*
* Let's mark in the H/W that we have reached this point. We check
@@ -2560,7 +2559,7 @@ static int goya_hw_init(struct hl_device *hdev)
goto disable_queues;
/* Perform read from the device to flush all MSI-X configuration */
- val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+ RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
return 0;
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index 994563a078eb..de8a66b9d76b 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -10,18 +10,17 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
int pasid;
struct ocxl_context *ctx;
- *context = kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
- if (!*context)
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
return -ENOMEM;
- ctx = *context;
-
ctx->afu = afu;
mutex_lock(&afu->contexts_lock);
pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
afu->pasid_base + afu->pasid_max, GFP_KERNEL);
if (pasid < 0) {
mutex_unlock(&afu->contexts_lock);
+ kfree(ctx);
return pasid;
}
afu->pasid_count++;
@@ -43,6 +42,7 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
* duration of the life of the context
*/
ocxl_afu_get(afu);
+ *context = ctx;
return 0;
}
EXPORT_SYMBOL_GPL(ocxl_context_alloc);
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index 2870c25da166..4d1b44de1492 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -18,18 +18,15 @@ static struct class *ocxl_class;
static struct mutex minors_idr_lock;
static struct idr minors_idr;
-static struct ocxl_file_info *find_file_info(dev_t devno)
+static struct ocxl_file_info *find_and_get_file_info(dev_t devno)
{
struct ocxl_file_info *info;
- /*
- * We don't declare an RCU critical section here, as our AFU
- * is protected by a reference counter on the device. By the time the
- * info reference is removed from the idr, the ref count of
- * the device is already at 0, so no user API will access that AFU and
- * this function can't return it.
- */
+ mutex_lock(&minors_idr_lock);
info = idr_find(&minors_idr, MINOR(devno));
+ if (info)
+ get_device(&info->dev);
+ mutex_unlock(&minors_idr_lock);
return info;
}
@@ -58,14 +55,16 @@ static int afu_open(struct inode *inode, struct file *file)
pr_debug("%s for device %x\n", __func__, inode->i_rdev);
- info = find_file_info(inode->i_rdev);
+ info = find_and_get_file_info(inode->i_rdev);
if (!info)
return -ENODEV;
rc = ocxl_context_alloc(&ctx, info->afu, inode->i_mapping);
- if (rc)
+ if (rc) {
+ put_device(&info->dev);
return rc;
-
+ }
+ put_device(&info->dev);
file->private_data = ctx;
return 0;
}
@@ -487,7 +486,6 @@ static void info_release(struct device *dev)
{
struct ocxl_file_info *info = container_of(dev, struct ocxl_file_info, dev);
- free_minor(info);
ocxl_afu_put(info->afu);
kfree(info);
}
@@ -577,6 +575,7 @@ void ocxl_file_unregister_afu(struct ocxl_afu *afu)
ocxl_file_make_invisible(info);
ocxl_sysfs_unregister_afu(info);
+ free_minor(info);
device_unregister(&info->dev);
}
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index f7d610a22347..ada94e6a3c91 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -496,7 +496,7 @@ xpnet_dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
* Deal with transmit timeouts coming from the network layer.
*/
static void
-xpnet_dev_tx_timeout(struct net_device *dev)
+xpnet_dev_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
dev->stats.tx_errors++;
}
diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c
index 189e42674d85..010fe29a4888 100644
--- a/drivers/mmc/host/mtk-sd.c
+++ b/drivers/mmc/host/mtk-sd.c
@@ -228,6 +228,7 @@
#define MSDC_PATCH_BIT_SPCPUSH (0x1 << 29) /* RW */
#define MSDC_PATCH_BIT_DECRCTMO (0x1 << 30) /* RW */
+#define MSDC_PATCH_BIT1_CMDTA (0x7 << 3) /* RW */
#define MSDC_PATCH_BIT1_STOP_DLY (0xf << 8) /* RW */
#define MSDC_PATCH_BIT2_CFGRESP (0x1 << 15) /* RW */
@@ -1881,6 +1882,7 @@ static int hs400_tune_response(struct mmc_host *mmc, u32 opcode)
/* select EMMC50 PAD CMD tune */
sdr_set_bits(host->base + PAD_CMD_TUNE, BIT(0));
+ sdr_set_field(host->base + MSDC_PATCH_BIT1, MSDC_PATCH_BIT1_CMDTA, 2);
if (mmc->ios.timing == MMC_TIMING_MMC_HS200 ||
mmc->ios.timing == MMC_TIMING_UHS_SDR104)
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index b75c82d8d6c1..3d0bb5e2e09b 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -99,7 +99,7 @@
#define CORE_PWRSAVE_DLL BIT(3)
-#define DDR_CONFIG_POR_VAL 0x80040853
+#define DDR_CONFIG_POR_VAL 0x80040873
#define INVALID_TUNING_PHASE -1
@@ -148,8 +148,9 @@ struct sdhci_msm_offset {
u32 core_ddr_200_cfg;
u32 core_vendor_spec3;
u32 core_dll_config_2;
+ u32 core_dll_config_3;
+ u32 core_ddr_config_old; /* Applicable to sdcc minor ver < 0x49 */
u32 core_ddr_config;
- u32 core_ddr_config_2;
};
static const struct sdhci_msm_offset sdhci_msm_v5_offset = {
@@ -177,8 +178,8 @@ static const struct sdhci_msm_offset sdhci_msm_v5_offset = {
.core_ddr_200_cfg = 0x224,
.core_vendor_spec3 = 0x250,
.core_dll_config_2 = 0x254,
- .core_ddr_config = 0x258,
- .core_ddr_config_2 = 0x25c,
+ .core_dll_config_3 = 0x258,
+ .core_ddr_config = 0x25c,
};
static const struct sdhci_msm_offset sdhci_msm_mci_offset = {
@@ -207,8 +208,8 @@ static const struct sdhci_msm_offset sdhci_msm_mci_offset = {
.core_ddr_200_cfg = 0x184,
.core_vendor_spec3 = 0x1b0,
.core_dll_config_2 = 0x1b4,
- .core_ddr_config = 0x1b8,
- .core_ddr_config_2 = 0x1bc,
+ .core_ddr_config_old = 0x1b8,
+ .core_ddr_config = 0x1bc,
};
struct sdhci_msm_variant_ops {
@@ -253,6 +254,7 @@ struct sdhci_msm_host {
const struct sdhci_msm_offset *offset;
bool use_cdr;
u32 transfer_mode;
+ bool updated_ddr_cfg;
};
static const struct sdhci_msm_offset *sdhci_priv_msm_offset(struct sdhci_host *host)
@@ -924,8 +926,10 @@ out:
static int sdhci_msm_cm_dll_sdc4_calibration(struct sdhci_host *host)
{
struct mmc_host *mmc = host->mmc;
- u32 dll_status, config;
+ u32 dll_status, config, ddr_cfg_offset;
int ret;
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+ struct sdhci_msm_host *msm_host = sdhci_pltfm_priv(pltfm_host);
const struct sdhci_msm_offset *msm_offset =
sdhci_priv_msm_offset(host);
@@ -938,8 +942,11 @@ static int sdhci_msm_cm_dll_sdc4_calibration(struct sdhci_host *host)
* bootloaders. In the future, if this changes, then the desired
* values will need to be programmed appropriately.
*/
- writel_relaxed(DDR_CONFIG_POR_VAL, host->ioaddr +
- msm_offset->core_ddr_config);
+ if (msm_host->updated_ddr_cfg)
+ ddr_cfg_offset = msm_offset->core_ddr_config;
+ else
+ ddr_cfg_offset = msm_offset->core_ddr_config_old;
+ writel_relaxed(DDR_CONFIG_POR_VAL, host->ioaddr + ddr_cfg_offset);
if (mmc->ios.enhanced_strobe) {
config = readl_relaxed(host->ioaddr +
@@ -1899,6 +1906,9 @@ static int sdhci_msm_probe(struct platform_device *pdev)
msm_offset->core_vendor_spec_capabilities0);
}
+ if (core_major == 1 && core_minor >= 0x49)
+ msm_host->updated_ddr_cfg = true;
+
/*
* Power on reset state may trigger power irq if previous status of
* PWRCTL was either BUS_ON or IO_HIGH_V. So before enabling pwr irq
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 5cca3fa4610b..500f70a6ee42 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -80,6 +80,7 @@ struct sdhci_esdhc {
bool quirk_tuning_erratum_type1;
bool quirk_tuning_erratum_type2;
bool quirk_ignore_data_inhibit;
+ bool quirk_delay_before_data_reset;
bool in_sw_tuning;
unsigned int peripheral_clock;
const struct esdhc_clk_fixup *clk_fixup;
@@ -759,14 +760,16 @@ static void esdhc_reset(struct sdhci_host *host, u8 mask)
struct sdhci_esdhc *esdhc = sdhci_pltfm_priv(pltfm_host);
u32 val;
+ if (esdhc->quirk_delay_before_data_reset &&
+ (mask & SDHCI_RESET_DATA) &&
+ (host->flags & SDHCI_REQ_USE_DMA))
+ mdelay(5);
+
sdhci_reset(host, mask);
sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
- if (of_find_compatible_node(NULL, NULL, "fsl,p2020-esdhc"))
- mdelay(5);
-
if (mask & SDHCI_RESET_ALL) {
val = sdhci_readl(host, ESDHC_TBCTL);
val &= ~ESDHC_TB_EN;
@@ -1221,6 +1224,10 @@ static void esdhc_init(struct platform_device *pdev, struct sdhci_host *host)
if (match)
esdhc->clk_fixup = match->data;
np = pdev->dev.of_node;
+
+ if (of_device_is_compatible(np, "fsl,p2020-esdhc"))
+ esdhc->quirk_delay_before_data_reset = true;
+
clk = of_clk_get(np, 0);
if (!IS_ERR(clk)) {
/*
@@ -1303,8 +1310,8 @@ static int sdhci_esdhc_probe(struct platform_device *pdev)
host->quirks &= ~SDHCI_QUIRK_NO_BUSY_IRQ;
if (of_find_compatible_node(NULL, NULL, "fsl,p2020-esdhc")) {
- host->quirks2 |= SDHCI_QUIRK_RESET_AFTER_REQUEST;
- host->quirks2 |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
+ host->quirks |= SDHCI_QUIRK_RESET_AFTER_REQUEST;
+ host->quirks |= SDHCI_QUIRK_BROKEN_TIMEOUT_VAL;
}
if (of_device_is_compatible(np, "fsl,p5040-esdhc") ||
diff --git a/drivers/mmc/host/sdhci-pci-core.c b/drivers/mmc/host/sdhci-pci-core.c
index acefb76b4e15..5091e2c1c0e5 100644
--- a/drivers/mmc/host/sdhci-pci-core.c
+++ b/drivers/mmc/host/sdhci-pci-core.c
@@ -27,6 +27,7 @@
#include <linux/mmc/slot-gpio.h>
#include <linux/mmc/sdhci-pci-data.h>
#include <linux/acpi.h>
+#include <linux/dmi.h>
#ifdef CONFIG_X86
#include <asm/iosf_mbi.h>
@@ -783,11 +784,18 @@ static int byt_emmc_probe_slot(struct sdhci_pci_slot *slot)
return 0;
}
+static bool glk_broken_cqhci(struct sdhci_pci_slot *slot)
+{
+ return slot->chip->pdev->device == PCI_DEVICE_ID_INTEL_GLK_EMMC &&
+ dmi_match(DMI_BIOS_VENDOR, "LENOVO");
+}
+
static int glk_emmc_probe_slot(struct sdhci_pci_slot *slot)
{
int ret = byt_emmc_probe_slot(slot);
- slot->host->mmc->caps2 |= MMC_CAP2_CQE;
+ if (!glk_broken_cqhci(slot))
+ slot->host->mmc->caps2 |= MMC_CAP2_CQE;
if (slot->chip->pdev->device != PCI_DEVICE_ID_INTEL_GLK_EMMC) {
slot->host->mmc->caps2 |= MMC_CAP2_HS400_ES,
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 3140fe2e5dba..1b1c26da3fe0 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1882,9 +1882,7 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
else if (timing == MMC_TIMING_UHS_SDR12)
ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
- else if (timing == MMC_TIMING_SD_HS ||
- timing == MMC_TIMING_MMC_HS ||
- timing == MMC_TIMING_UHS_SDR25)
+ else if (timing == MMC_TIMING_UHS_SDR25)
ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
else if (timing == MMC_TIMING_UHS_SDR50)
ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
@@ -2419,8 +2417,8 @@ static int __sdhci_execute_tuning(struct sdhci_host *host, u32 opcode)
sdhci_send_tuning(host, opcode);
if (!host->tuning_done) {
- pr_info("%s: Tuning timeout, falling back to fixed sampling clock\n",
- mmc_hostname(host->mmc));
+ pr_debug("%s: Tuning timeout, falling back to fixed sampling clock\n",
+ mmc_hostname(host->mmc));
sdhci_abort_tuning(host, opcode);
return -ETIMEDOUT;
}
@@ -3769,6 +3767,9 @@ int sdhci_setup_host(struct sdhci_host *host)
mmc_hostname(mmc), host->version);
}
+ if (host->quirks & SDHCI_QUIRK_BROKEN_CQE)
+ mmc->caps2 &= ~MMC_CAP2_CQE;
+
if (host->quirks & SDHCI_QUIRK_FORCE_DMA)
host->flags |= SDHCI_USE_SDMA;
else if (!(host->caps & SDHCI_CAN_DO_SDMA))
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0ed3e0eaef5f..fe83ece6965b 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -409,6 +409,8 @@ struct sdhci_host {
#define SDHCI_QUIRK_BROKEN_CARD_DETECTION (1<<15)
/* Controller reports inverted write-protect state */
#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT (1<<16)
+/* Controller has unusable command queue engine */
+#define SDHCI_QUIRK_BROKEN_CQE (1<<17)
/* Controller does not like fast PIO transfers */
#define SDHCI_QUIRK_PIO_NEEDS_DELAY (1<<18)
/* Controller does not have a LED */
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d02f12a5254e..01e2657e4c26 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -71,6 +71,49 @@ config DUMMY
To compile this driver as a module, choose M here: the module
will be called dummy.
+config WIREGUARD
+ tristate "WireGuard secure network tunnel"
+ depends on NET && INET
+ depends on IPV6 || !IPV6
+ select NET_UDP_TUNNEL
+ select DST_CACHE
+ select CRYPTO
+ select CRYPTO_LIB_CURVE25519
+ select CRYPTO_LIB_CHACHA20POLY1305
+ select CRYPTO_LIB_BLAKE2S
+ select CRYPTO_CHACHA20_X86_64 if X86 && 64BIT
+ select CRYPTO_POLY1305_X86_64 if X86 && 64BIT
+ select CRYPTO_BLAKE2S_X86 if X86 && 64BIT
+ select CRYPTO_CURVE25519_X86 if X86 && 64BIT
+ select ARM_CRYPTO if ARM
+ select ARM64_CRYPTO if ARM64
+ select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON
+ select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON
+ select CRYPTO_POLY1305_ARM if ARM
+ select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
+ select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
+ select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+ help
+ WireGuard is a secure, fast, and easy to use replacement for IPSec
+ that uses modern cryptography and clever networking tricks. It's
+ designed to be fairly general purpose and abstract enough to fit most
+ use cases, while at the same time remaining extremely simple to
+ configure. See www.wireguard.com for more info.
+
+ It's safe to say Y or M here, as the driver is very lightweight and
+ is only in use when an administrator chooses to add an interface.
+
+config WIREGUARD_DEBUG
+ bool "Debugging checks and verbose messages"
+ depends on WIREGUARD
+ help
+ This will write log messages for handshake and other events
+ that occur for a WireGuard interface. It will also perform some
+ extra validation checks and unit tests at various points. This is
+ only useful for debugging.
+
+ Say N here unless you know what you're doing.
+
config EQUALIZER
tristate "EQL (serial line load balancing) support"
---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 0d3ba056cda3..953b7c12f0b0 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_BONDING) += bonding/
obj-$(CONFIG_IPVLAN) += ipvlan/
obj-$(CONFIG_IPVTAP) += ipvlan/
obj-$(CONFIG_DUMMY) += dummy.o
+obj-$(CONFIG_WIREGUARD) += wireguard/
obj-$(CONFIG_EQUALIZER) += eql.o
obj-$(CONFIG_IFB) += ifb.o
obj-$(CONFIG_MACSEC) += macsec.o
diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index b3c63d2f16aa..18428e104445 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -189,7 +189,7 @@ static int cops_nodeid (struct net_device *dev, int nodeid);
static irqreturn_t cops_interrupt (int irq, void *dev_id);
static void cops_poll(struct timer_list *t);
-static void cops_timeout(struct net_device *dev);
+static void cops_timeout(struct net_device *dev, unsigned int txqueue);
static void cops_rx (struct net_device *dev);
static netdev_tx_t cops_send_packet (struct sk_buff *skb,
struct net_device *dev);
@@ -844,7 +844,7 @@ static void cops_rx(struct net_device *dev)
netif_rx(skb);
}
-static void cops_timeout(struct net_device *dev)
+static void cops_timeout(struct net_device *dev, unsigned int txqueue)
{
struct cops_local *lp = netdev_priv(dev);
int ioaddr = dev->base_addr;
diff --git a/drivers/net/arcnet/arcdevice.h b/drivers/net/arcnet/arcdevice.h
index b0f5bc07aef5..22a49c6d7ae6 100644
--- a/drivers/net/arcnet/arcdevice.h
+++ b/drivers/net/arcnet/arcdevice.h
@@ -356,7 +356,7 @@ int arcnet_open(struct net_device *dev);
int arcnet_close(struct net_device *dev);
netdev_tx_t arcnet_send_packet(struct sk_buff *skb,
struct net_device *dev);
-void arcnet_timeout(struct net_device *dev);
+void arcnet_timeout(struct net_device *dev, unsigned int txqueue);
/* I/O equivalents */
diff --git a/drivers/net/arcnet/arcnet.c b/drivers/net/arcnet/arcnet.c
index 553776cc1d29..e04efc0a5c97 100644
--- a/drivers/net/arcnet/arcnet.c
+++ b/drivers/net/arcnet/arcnet.c
@@ -763,7 +763,7 @@ static int go_tx(struct net_device *dev)
}
/* Called by the kernel when transmit times out */
-void arcnet_timeout(struct net_device *dev)
+void arcnet_timeout(struct net_device *dev, unsigned int txqueue)
{
unsigned long flags;
struct arcnet_local *lp = netdev_priv(dev);
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index e3b25f310936..31e43a2197a3 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -31,16 +31,6 @@
#define AD_CHURN_DETECTION_TIME 60
#define AD_AGGREGATE_WAIT_TIME 2
-/* Port state definitions (43.4.2.2 in the 802.3ad standard) */
-#define AD_STATE_LACP_ACTIVITY 0x1
-#define AD_STATE_LACP_TIMEOUT 0x2
-#define AD_STATE_AGGREGATION 0x4
-#define AD_STATE_SYNCHRONIZATION 0x8
-#define AD_STATE_COLLECTING 0x10
-#define AD_STATE_DISTRIBUTING 0x20
-#define AD_STATE_DEFAULTED 0x40
-#define AD_STATE_EXPIRED 0x80
-
/* Port Variables definitions used by the State Machines (43.4.7 in the
* 802.3ad standard)
*/
@@ -457,8 +447,8 @@ static void __choose_matched(struct lacpdu *lacpdu, struct port *port)
MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) &&
(ntohs(lacpdu->partner_system_priority) == port->actor_system_priority) &&
(ntohs(lacpdu->partner_key) == port->actor_oper_port_key) &&
- ((lacpdu->partner_state & AD_STATE_AGGREGATION) == (port->actor_oper_port_state & AD_STATE_AGGREGATION))) ||
- ((lacpdu->actor_state & AD_STATE_AGGREGATION) == 0)
+ ((lacpdu->partner_state & LACP_STATE_AGGREGATION) == (port->actor_oper_port_state & LACP_STATE_AGGREGATION))) ||
+ ((lacpdu->actor_state & LACP_STATE_AGGREGATION) == 0)
) {
port->sm_vars |= AD_PORT_MATCHED;
} else {
@@ -492,18 +482,18 @@ static void __record_pdu(struct lacpdu *lacpdu, struct port *port)
partner->port_state = lacpdu->actor_state;
/* set actor_oper_port_state.defaulted to FALSE */
- port->actor_oper_port_state &= ~AD_STATE_DEFAULTED;
+ port->actor_oper_port_state &= ~LACP_STATE_DEFAULTED;
/* set the partner sync. to on if the partner is sync,
* and the port is matched
*/
if ((port->sm_vars & AD_PORT_MATCHED) &&
- (lacpdu->actor_state & AD_STATE_SYNCHRONIZATION)) {
- partner->port_state |= AD_STATE_SYNCHRONIZATION;
+ (lacpdu->actor_state & LACP_STATE_SYNCHRONIZATION)) {
+ partner->port_state |= LACP_STATE_SYNCHRONIZATION;
slave_dbg(port->slave->bond->dev, port->slave->dev,
"partner sync=1\n");
} else {
- partner->port_state &= ~AD_STATE_SYNCHRONIZATION;
+ partner->port_state &= ~LACP_STATE_SYNCHRONIZATION;
slave_dbg(port->slave->bond->dev, port->slave->dev,
"partner sync=0\n");
}
@@ -526,7 +516,7 @@ static void __record_default(struct port *port)
sizeof(struct port_params));
/* set actor_oper_port_state.defaulted to true */
- port->actor_oper_port_state |= AD_STATE_DEFAULTED;
+ port->actor_oper_port_state |= LACP_STATE_DEFAULTED;
}
}
@@ -556,7 +546,7 @@ static void __update_selected(struct lacpdu *lacpdu, struct port *port)
!MAC_ADDRESS_EQUAL(&lacpdu->actor_system, &partner->system) ||
ntohs(lacpdu->actor_system_priority) != partner->system_priority ||
ntohs(lacpdu->actor_key) != partner->key ||
- (lacpdu->actor_state & AD_STATE_AGGREGATION) != (partner->port_state & AD_STATE_AGGREGATION)) {
+ (lacpdu->actor_state & LACP_STATE_AGGREGATION) != (partner->port_state & LACP_STATE_AGGREGATION)) {
port->sm_vars &= ~AD_PORT_SELECTED;
}
}
@@ -588,8 +578,8 @@ static void __update_default_selected(struct port *port)
!MAC_ADDRESS_EQUAL(&admin->system, &oper->system) ||
admin->system_priority != oper->system_priority ||
admin->key != oper->key ||
- (admin->port_state & AD_STATE_AGGREGATION)
- != (oper->port_state & AD_STATE_AGGREGATION)) {
+ (admin->port_state & LACP_STATE_AGGREGATION)
+ != (oper->port_state & LACP_STATE_AGGREGATION)) {
port->sm_vars &= ~AD_PORT_SELECTED;
}
}
@@ -619,10 +609,10 @@ static void __update_ntt(struct lacpdu *lacpdu, struct port *port)
!MAC_ADDRESS_EQUAL(&(lacpdu->partner_system), &(port->actor_system)) ||
(ntohs(lacpdu->partner_system_priority) != port->actor_system_priority) ||
(ntohs(lacpdu->partner_key) != port->actor_oper_port_key) ||
- ((lacpdu->partner_state & AD_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY)) ||
- ((lacpdu->partner_state & AD_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT)) ||
- ((lacpdu->partner_state & AD_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) ||
- ((lacpdu->partner_state & AD_STATE_AGGREGATION) != (port->actor_oper_port_state & AD_STATE_AGGREGATION))
+ ((lacpdu->partner_state & LACP_STATE_LACP_ACTIVITY) != (port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY)) ||
+ ((lacpdu->partner_state & LACP_STATE_LACP_TIMEOUT) != (port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT)) ||
+ ((lacpdu->partner_state & LACP_STATE_SYNCHRONIZATION) != (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) ||
+ ((lacpdu->partner_state & LACP_STATE_AGGREGATION) != (port->actor_oper_port_state & LACP_STATE_AGGREGATION))
) {
port->ntt = true;
}
@@ -978,7 +968,7 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
* edable port will take place only after this timer)
*/
if ((port->sm_vars & AD_PORT_SELECTED) &&
- (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) &&
+ (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) &&
!__check_agg_selection_timer(port)) {
if (port->aggregator->is_active)
port->sm_mux_state =
@@ -996,14 +986,14 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
port->sm_mux_state = AD_MUX_DETACHED;
} else if (port->aggregator->is_active) {
port->actor_oper_port_state |=
- AD_STATE_SYNCHRONIZATION;
+ LACP_STATE_SYNCHRONIZATION;
}
break;
case AD_MUX_COLLECTING_DISTRIBUTING:
if (!(port->sm_vars & AD_PORT_SELECTED) ||
(port->sm_vars & AD_PORT_STANDBY) ||
- !(port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) ||
- !(port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION)) {
+ !(port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) ||
+ !(port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION)) {
port->sm_mux_state = AD_MUX_ATTACHED;
} else {
/* if port state hasn't changed make
@@ -1032,11 +1022,11 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
port->sm_mux_state);
switch (port->sm_mux_state) {
case AD_MUX_DETACHED:
- port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
+ port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION;
ad_disable_collecting_distributing(port,
update_slave_arr);
- port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
- port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
+ port->actor_oper_port_state &= ~LACP_STATE_COLLECTING;
+ port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING;
port->ntt = true;
break;
case AD_MUX_WAITING:
@@ -1045,20 +1035,20 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
case AD_MUX_ATTACHED:
if (port->aggregator->is_active)
port->actor_oper_port_state |=
- AD_STATE_SYNCHRONIZATION;
+ LACP_STATE_SYNCHRONIZATION;
else
port->actor_oper_port_state &=
- ~AD_STATE_SYNCHRONIZATION;
- port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
- port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
+ ~LACP_STATE_SYNCHRONIZATION;
+ port->actor_oper_port_state &= ~LACP_STATE_COLLECTING;
+ port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING;
ad_disable_collecting_distributing(port,
update_slave_arr);
port->ntt = true;
break;
case AD_MUX_COLLECTING_DISTRIBUTING:
- port->actor_oper_port_state |= AD_STATE_COLLECTING;
- port->actor_oper_port_state |= AD_STATE_DISTRIBUTING;
- port->actor_oper_port_state |= AD_STATE_SYNCHRONIZATION;
+ port->actor_oper_port_state |= LACP_STATE_COLLECTING;
+ port->actor_oper_port_state |= LACP_STATE_DISTRIBUTING;
+ port->actor_oper_port_state |= LACP_STATE_SYNCHRONIZATION;
ad_enable_collecting_distributing(port,
update_slave_arr);
port->ntt = true;
@@ -1156,7 +1146,7 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
port->sm_vars |= AD_PORT_LACP_ENABLED;
port->sm_vars &= ~AD_PORT_SELECTED;
__record_default(port);
- port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
+ port->actor_oper_port_state &= ~LACP_STATE_EXPIRED;
port->sm_rx_state = AD_RX_PORT_DISABLED;
/* Fall Through */
@@ -1166,9 +1156,9 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
case AD_RX_LACP_DISABLED:
port->sm_vars &= ~AD_PORT_SELECTED;
__record_default(port);
- port->partner_oper.port_state &= ~AD_STATE_AGGREGATION;
+ port->partner_oper.port_state &= ~LACP_STATE_AGGREGATION;
port->sm_vars |= AD_PORT_MATCHED;
- port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
+ port->actor_oper_port_state &= ~LACP_STATE_EXPIRED;
break;
case AD_RX_EXPIRED:
/* Reset of the Synchronization flag (Standard 43.4.12)
@@ -1177,19 +1167,19 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
* case of EXPIRED even if LINK_DOWN didn't arrive for
* the port.
*/
- port->partner_oper.port_state &= ~AD_STATE_SYNCHRONIZATION;
+ port->partner_oper.port_state &= ~LACP_STATE_SYNCHRONIZATION;
port->sm_vars &= ~AD_PORT_MATCHED;
- port->partner_oper.port_state |= AD_STATE_LACP_TIMEOUT;
- port->partner_oper.port_state |= AD_STATE_LACP_ACTIVITY;
+ port->partner_oper.port_state |= LACP_STATE_LACP_TIMEOUT;
+ port->partner_oper.port_state |= LACP_STATE_LACP_ACTIVITY;
port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(AD_SHORT_TIMEOUT));
- port->actor_oper_port_state |= AD_STATE_EXPIRED;
+ port->actor_oper_port_state |= LACP_STATE_EXPIRED;
port->sm_vars |= AD_PORT_CHURNED;
break;
case AD_RX_DEFAULTED:
__update_default_selected(port);
__record_default(port);
port->sm_vars |= AD_PORT_MATCHED;
- port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
+ port->actor_oper_port_state &= ~LACP_STATE_EXPIRED;
break;
case AD_RX_CURRENT:
/* detect loopback situation */
@@ -1202,8 +1192,8 @@ static void ad_rx_machine(struct lacpdu *lacpdu, struct port *port)
__update_selected(lacpdu, port);
__update_ntt(lacpdu, port);
__record_pdu(lacpdu, port);
- port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & AD_STATE_LACP_TIMEOUT));
- port->actor_oper_port_state &= ~AD_STATE_EXPIRED;
+ port->sm_rx_timer_counter = __ad_timer_to_ticks(AD_CURRENT_WHILE_TIMER, (u16)(port->actor_oper_port_state & LACP_STATE_LACP_TIMEOUT));
+ port->actor_oper_port_state &= ~LACP_STATE_EXPIRED;
break;
default:
break;
@@ -1231,7 +1221,7 @@ static void ad_churn_machine(struct port *port)
if (port->sm_churn_actor_timer_counter &&
!(--port->sm_churn_actor_timer_counter) &&
port->sm_churn_actor_state == AD_CHURN_MONITOR) {
- if (port->actor_oper_port_state & AD_STATE_SYNCHRONIZATION) {
+ if (port->actor_oper_port_state & LACP_STATE_SYNCHRONIZATION) {
port->sm_churn_actor_state = AD_NO_CHURN;
} else {
port->churn_actor_count++;
@@ -1241,7 +1231,7 @@ static void ad_churn_machine(struct port *port)
if (port->sm_churn_partner_timer_counter &&
!(--port->sm_churn_partner_timer_counter) &&
port->sm_churn_partner_state == AD_CHURN_MONITOR) {
- if (port->partner_oper.port_state & AD_STATE_SYNCHRONIZATION) {
+ if (port->partner_oper.port_state & LACP_STATE_SYNCHRONIZATION) {
port->sm_churn_partner_state = AD_NO_CHURN;
} else {
port->churn_partner_count++;
@@ -1298,7 +1288,7 @@ static void ad_periodic_machine(struct port *port)
/* check if port was reinitialized */
if (((port->sm_vars & AD_PORT_BEGIN) || !(port->sm_vars & AD_PORT_LACP_ENABLED) || !port->is_enabled) ||
- (!(port->actor_oper_port_state & AD_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & AD_STATE_LACP_ACTIVITY))
+ (!(port->actor_oper_port_state & LACP_STATE_LACP_ACTIVITY) && !(port->partner_oper.port_state & LACP_STATE_LACP_ACTIVITY))
) {
port->sm_periodic_state = AD_NO_PERIODIC;
}
@@ -1315,11 +1305,11 @@ static void ad_periodic_machine(struct port *port)
switch (port->sm_periodic_state) {
case AD_FAST_PERIODIC:
if (!(port->partner_oper.port_state
- & AD_STATE_LACP_TIMEOUT))
+ & LACP_STATE_LACP_TIMEOUT))
port->sm_periodic_state = AD_SLOW_PERIODIC;
break;
case AD_SLOW_PERIODIC:
- if ((port->partner_oper.port_state & AD_STATE_LACP_TIMEOUT)) {
+ if ((port->partner_oper.port_state & LACP_STATE_LACP_TIMEOUT)) {
port->sm_periodic_timer_counter = 0;
port->sm_periodic_state = AD_PERIODIC_TX;
}
@@ -1335,7 +1325,7 @@ static void ad_periodic_machine(struct port *port)
break;
case AD_PERIODIC_TX:
if (!(port->partner_oper.port_state &
- AD_STATE_LACP_TIMEOUT))
+ LACP_STATE_LACP_TIMEOUT))
port->sm_periodic_state = AD_SLOW_PERIODIC;
else
port->sm_periodic_state = AD_FAST_PERIODIC;
@@ -1542,7 +1532,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
ad_agg_selection_logic(aggregator, update_slave_arr);
if (!port->aggregator->is_active)
- port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
+ port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION;
}
/* Decide if "agg" is a better choice for the new active aggregator that
@@ -1848,13 +1838,13 @@ static void ad_initialize_port(struct port *port, int lacp_fast)
port->actor_port_priority = 0xff;
port->actor_port_aggregator_identifier = 0;
port->ntt = false;
- port->actor_admin_port_state = AD_STATE_AGGREGATION |
- AD_STATE_LACP_ACTIVITY;
- port->actor_oper_port_state = AD_STATE_AGGREGATION |
- AD_STATE_LACP_ACTIVITY;
+ port->actor_admin_port_state = LACP_STATE_AGGREGATION |
+ LACP_STATE_LACP_ACTIVITY;
+ port->actor_oper_port_state = LACP_STATE_AGGREGATION |
+ LACP_STATE_LACP_ACTIVITY;
if (lacp_fast)
- port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT;
+ port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT;
memcpy(&port->partner_admin, &tmpl, sizeof(tmpl));
memcpy(&port->partner_oper, &tmpl, sizeof(tmpl));
@@ -2105,10 +2095,10 @@ void bond_3ad_unbind_slave(struct slave *slave)
aggregator->aggregator_identifier);
/* Tell the partner that this port is not suitable for aggregation */
- port->actor_oper_port_state &= ~AD_STATE_SYNCHRONIZATION;
- port->actor_oper_port_state &= ~AD_STATE_COLLECTING;
- port->actor_oper_port_state &= ~AD_STATE_DISTRIBUTING;
- port->actor_oper_port_state &= ~AD_STATE_AGGREGATION;
+ port->actor_oper_port_state &= ~LACP_STATE_SYNCHRONIZATION;
+ port->actor_oper_port_state &= ~LACP_STATE_COLLECTING;
+ port->actor_oper_port_state &= ~LACP_STATE_DISTRIBUTING;
+ port->actor_oper_port_state &= ~LACP_STATE_AGGREGATION;
__update_lacpdu_from_port(port);
ad_lacpdu_send(port);
@@ -2695,9 +2685,9 @@ void bond_3ad_update_lacp_rate(struct bonding *bond)
bond_for_each_slave(bond, slave, iter) {
port = &(SLAVE_AD_INFO(slave)->port);
if (lacp_fast)
- port->actor_oper_port_state |= AD_STATE_LACP_TIMEOUT;
+ port->actor_oper_port_state |= LACP_STATE_LACP_TIMEOUT;
else
- port->actor_oper_port_state &= ~AD_STATE_LACP_TIMEOUT;
+ port->actor_oper_port_state &= ~LACP_STATE_LACP_TIMEOUT;
}
spin_unlock_bh(&bond->mode_lock);
}
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index fcb7c2f7f001..48d5ec770b94 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2272,9 +2272,6 @@ static void bond_miimon_commit(struct bonding *bond)
} else if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
/* make it immediately active */
bond_set_active_slave(slave);
- } else if (slave != primary) {
- /* prevent it from being the active one */
- bond_set_backup_slave(slave);
}
slave_info(bond->dev, slave->dev, "link status definitely up, %u Mbps %s duplex\n",
@@ -3702,32 +3699,35 @@ static int bond_neigh_init(struct neighbour *n)
const struct net_device_ops *slave_ops;
struct neigh_parms parms;
struct slave *slave;
- int ret;
+ int ret = 0;
- slave = bond_first_slave(bond);
+ rcu_read_lock();
+ slave = bond_first_slave_rcu(bond);
if (!slave)
- return 0;
+ goto out;
slave_ops = slave->dev->netdev_ops;
if (!slave_ops->ndo_neigh_setup)
- return 0;
-
- parms.neigh_setup = NULL;
- parms.neigh_cleanup = NULL;
- ret = slave_ops->ndo_neigh_setup(slave->dev, &parms);
- if (ret)
- return ret;
+ goto out;
- /* Assign slave's neigh_cleanup to neighbour in case cleanup is called
- * after the last slave has been detached. Assumes that all slaves
- * utilize the same neigh_cleanup (true at this writing as only user
- * is ipoib).
+ /* TODO: find another way [1] to implement this.
+ * Passing a zeroed structure is fragile,
+ * but at least we do not pass garbage.
+ *
+ * [1] One way would be that ndo_neigh_setup() never touch
+ * struct neigh_parms, but propagate the new neigh_setup()
+ * back to ___neigh_create() / neigh_parms_alloc()
*/
- n->parms->neigh_cleanup = parms.neigh_cleanup;
+ memset(&parms, 0, sizeof(parms));
+ ret = slave_ops->ndo_neigh_setup(slave->dev, &parms);
- if (!parms.neigh_setup)
- return 0;
+ if (ret)
+ goto out;
- return parms.neigh_setup(n);
+ if (parms.neigh_setup)
+ ret = parms.neigh_setup(n);
+out:
+ rcu_read_unlock();
+ return ret;
}
/* The bonding ndo_neigh_setup is called at init time beofre any
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
index bd40b114d6cd..d737ceb61203 100644
--- a/drivers/net/caif/caif_serial.c
+++ b/drivers/net/caif/caif_serial.c
@@ -270,7 +270,9 @@ static int caif_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ser_device *ser;
- BUG_ON(dev == NULL);
+ if (WARN_ON(!dev))
+ return -EINVAL;
+
ser = netdev_priv(dev);
/* Send flow off once, on high water mark */
diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c
index a929cdda9ab2..94d10ec954a0 100644
--- a/drivers/net/can/flexcan.c
+++ b/drivers/net/can/flexcan.c
@@ -389,6 +389,34 @@ static struct flexcan_mb __iomem *flexcan_get_mb(const struct flexcan_priv *priv
(&priv->regs->mb[bank][priv->mb_size * mb_index]);
}
+static int flexcan_low_power_enter_ack(struct flexcan_priv *priv)
+{
+ struct flexcan_regs __iomem *regs = priv->regs;
+ unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
+
+ while (timeout-- && !(priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
+ udelay(10);
+
+ if (!(priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int flexcan_low_power_exit_ack(struct flexcan_priv *priv)
+{
+ struct flexcan_regs __iomem *regs = priv->regs;
+ unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
+
+ while (timeout-- && (priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
+ udelay(10);
+
+ if (priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK)
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
static void flexcan_enable_wakeup_irq(struct flexcan_priv *priv, bool enable)
{
struct flexcan_regs __iomem *regs = priv->regs;
@@ -407,7 +435,6 @@ static void flexcan_enable_wakeup_irq(struct flexcan_priv *priv, bool enable)
static inline int flexcan_enter_stop_mode(struct flexcan_priv *priv)
{
struct flexcan_regs __iomem *regs = priv->regs;
- unsigned int ackval;
u32 reg_mcr;
reg_mcr = priv->read(&regs->mcr);
@@ -418,36 +445,24 @@ static inline int flexcan_enter_stop_mode(struct flexcan_priv *priv)
regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr,
1 << priv->stm.req_bit, 1 << priv->stm.req_bit);
- /* get stop acknowledgment */
- if (regmap_read_poll_timeout(priv->stm.gpr, priv->stm.ack_gpr,
- ackval, ackval & (1 << priv->stm.ack_bit),
- 0, FLEXCAN_TIMEOUT_US))
- return -ETIMEDOUT;
-
- return 0;
+ return flexcan_low_power_enter_ack(priv);
}
static inline int flexcan_exit_stop_mode(struct flexcan_priv *priv)
{
struct flexcan_regs __iomem *regs = priv->regs;
- unsigned int ackval;
u32 reg_mcr;
/* remove stop request */
regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr,
1 << priv->stm.req_bit, 0);
- /* get stop acknowledgment */
- if (regmap_read_poll_timeout(priv->stm.gpr, priv->stm.ack_gpr,
- ackval, !(ackval & (1 << priv->stm.ack_bit)),
- 0, FLEXCAN_TIMEOUT_US))
- return -ETIMEDOUT;
reg_mcr = priv->read(&regs->mcr);
reg_mcr &= ~FLEXCAN_MCR_SLF_WAK;
priv->write(reg_mcr, &regs->mcr);
- return 0;
+ return flexcan_low_power_exit_ack(priv);
}
static inline void flexcan_error_irq_enable(const struct flexcan_priv *priv)
@@ -506,39 +521,25 @@ static inline int flexcan_transceiver_disable(const struct flexcan_priv *priv)
static int flexcan_chip_enable(struct flexcan_priv *priv)
{
struct flexcan_regs __iomem *regs = priv->regs;
- unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
u32 reg;
reg = priv->read(&regs->mcr);
reg &= ~FLEXCAN_MCR_MDIS;
priv->write(reg, &regs->mcr);
- while (timeout-- && (priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
- udelay(10);
-
- if (priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK)
- return -ETIMEDOUT;
-
- return 0;
+ return flexcan_low_power_exit_ack(priv);
}
static int flexcan_chip_disable(struct flexcan_priv *priv)
{
struct flexcan_regs __iomem *regs = priv->regs;
- unsigned int timeout = FLEXCAN_TIMEOUT_US / 10;
u32 reg;
reg = priv->read(&regs->mcr);
reg |= FLEXCAN_MCR_MDIS;
priv->write(reg, &regs->mcr);
- while (timeout-- && !(priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
- udelay(10);
-
- if (!(priv->read(&regs->mcr) & FLEXCAN_MCR_LPM_ACK))
- return -ETIMEDOUT;
-
- return 0;
+ return flexcan_low_power_enter_ack(priv);
}
static int flexcan_chip_freeze(struct flexcan_priv *priv)
@@ -1722,6 +1723,9 @@ static int __maybe_unused flexcan_resume(struct device *device)
netif_start_queue(dev);
if (device_may_wakeup(device)) {
disable_irq_wake(dev->irq);
+ err = flexcan_exit_stop_mode(priv);
+ if (err)
+ return err;
} else {
err = pm_runtime_force_resume(device);
if (err)
@@ -1767,14 +1771,9 @@ static int __maybe_unused flexcan_noirq_resume(struct device *device)
{
struct net_device *dev = dev_get_drvdata(device);
struct flexcan_priv *priv = netdev_priv(dev);
- int err;
- if (netif_running(dev) && device_may_wakeup(device)) {
+ if (netif_running(dev) && device_may_wakeup(device))
flexcan_enable_wakeup_irq(priv, false);
- err = flexcan_exit_stop_mode(priv);
- if (err)
- return err;
- }
return 0;
}
diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c
index 3db619209fe1..eacd428e07e9 100644
--- a/drivers/net/can/m_can/tcan4x5x.c
+++ b/drivers/net/can/m_can/tcan4x5x.c
@@ -101,6 +101,9 @@
#define TCAN4X5X_MODE_STANDBY BIT(6)
#define TCAN4X5X_MODE_NORMAL BIT(7)
+#define TCAN4X5X_DISABLE_WAKE_MSK (BIT(31) | BIT(30))
+#define TCAN4X5X_DISABLE_INH_MSK BIT(9)
+
#define TCAN4X5X_SW_RESET BIT(2)
#define TCAN4X5X_MCAN_CONFIGURED BIT(5)
@@ -164,6 +167,28 @@ static void tcan4x5x_check_wake(struct tcan4x5x_priv *priv)
}
}
+static int tcan4x5x_reset(struct tcan4x5x_priv *priv)
+{
+ int ret = 0;
+
+ if (priv->reset_gpio) {
+ gpiod_set_value(priv->reset_gpio, 1);
+
+ /* tpulse_width minimum 30us */
+ usleep_range(30, 100);
+ gpiod_set_value(priv->reset_gpio, 0);
+ } else {
+ ret = regmap_write(priv->regmap, TCAN4X5X_CONFIG,
+ TCAN4X5X_SW_RESET);
+ if (ret)
+ return ret;
+ }
+
+ usleep_range(700, 1000);
+
+ return ret;
+}
+
static int regmap_spi_gather_write(void *context, const void *reg,
size_t reg_len, const void *val,
size_t val_len)
@@ -338,15 +363,34 @@ static int tcan4x5x_init(struct m_can_classdev *cdev)
return ret;
}
+static int tcan4x5x_disable_wake(struct m_can_classdev *cdev)
+{
+ struct tcan4x5x_priv *tcan4x5x = cdev->device_data;
+
+ return regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG,
+ TCAN4X5X_DISABLE_WAKE_MSK, 0x00);
+}
+
+static int tcan4x5x_disable_state(struct m_can_classdev *cdev)
+{
+ struct tcan4x5x_priv *tcan4x5x = cdev->device_data;
+
+ return regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG,
+ TCAN4X5X_DISABLE_INH_MSK, 0x01);
+}
+
static int tcan4x5x_parse_config(struct m_can_classdev *cdev)
{
struct tcan4x5x_priv *tcan4x5x = cdev->device_data;
+ int ret;
tcan4x5x->device_wake_gpio = devm_gpiod_get(cdev->dev, "device-wake",
GPIOD_OUT_HIGH);
if (IS_ERR(tcan4x5x->device_wake_gpio)) {
- dev_err(cdev->dev, "device-wake gpio not defined\n");
- return -EINVAL;
+ if (PTR_ERR(tcan4x5x->device_wake_gpio) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+
+ tcan4x5x_disable_wake(cdev);
}
tcan4x5x->reset_gpio = devm_gpiod_get_optional(cdev->dev, "reset",
@@ -354,16 +398,17 @@ static int tcan4x5x_parse_config(struct m_can_classdev *cdev)
if (IS_ERR(tcan4x5x->reset_gpio))
tcan4x5x->reset_gpio = NULL;
+ ret = tcan4x5x_reset(tcan4x5x);
+ if (ret)
+ return ret;
+
tcan4x5x->device_state_gpio = devm_gpiod_get_optional(cdev->dev,
"device-state",
GPIOD_IN);
- if (IS_ERR(tcan4x5x->device_state_gpio))
+ if (IS_ERR(tcan4x5x->device_state_gpio)) {
tcan4x5x->device_state_gpio = NULL;
-
- tcan4x5x->power = devm_regulator_get_optional(cdev->dev,
- "vsup");
- if (PTR_ERR(tcan4x5x->power) == -EPROBE_DEFER)
- return -EPROBE_DEFER;
+ tcan4x5x_disable_state(cdev);
+ }
return 0;
}
@@ -398,6 +443,12 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
if (!priv)
return -ENOMEM;
+ priv->power = devm_regulator_get_optional(&spi->dev, "vsup");
+ if (PTR_ERR(priv->power) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ else
+ priv->power = NULL;
+
mcan_class->device_data = priv;
m_can_class_get_clocks(mcan_class);
@@ -428,10 +479,6 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
spi_set_drvdata(spi, priv);
- ret = tcan4x5x_parse_config(mcan_class);
- if (ret)
- goto out_clk;
-
/* Configure the SPI bus */
spi->bits_per_word = 32;
ret = spi_setup(spi);
@@ -441,7 +488,17 @@ static int tcan4x5x_can_probe(struct spi_device *spi)
priv->regmap = devm_regmap_init(&spi->dev, &tcan4x5x_bus,
&spi->dev, &tcan4x5x_regmap);
- tcan4x5x_power_enable(priv->power, 1);
+ ret = tcan4x5x_power_enable(priv->power, 1);
+ if (ret)
+ goto out_clk;
+
+ ret = tcan4x5x_parse_config(mcan_class);
+ if (ret)
+ goto out_power;
+
+ ret = tcan4x5x_init(mcan_class);
+ if (ret)
+ goto out_power;
ret = m_can_class_register(mcan_class);
if (ret)
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index 8caf7af0dee2..99101d7027a8 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -381,13 +381,12 @@ static int mscan_rx_poll(struct napi_struct *napi, int quota)
struct net_device *dev = napi->dev;
struct mscan_regs __iomem *regs = priv->reg_base;
struct net_device_stats *stats = &dev->stats;
- int npackets = 0;
- int ret = 1;
+ int work_done = 0;
struct sk_buff *skb;
struct can_frame *frame;
u8 canrflg;
- while (npackets < quota) {
+ while (work_done < quota) {
canrflg = in_8(&regs->canrflg);
if (!(canrflg & (MSCAN_RXF | MSCAN_ERR_IF)))
break;
@@ -408,18 +407,18 @@ static int mscan_rx_poll(struct napi_struct *napi, int quota)
stats->rx_packets++;
stats->rx_bytes += frame->can_dlc;
- npackets++;
+ work_done++;
netif_receive_skb(skb);
}
- if (!(in_8(&regs->canrflg) & (MSCAN_RXF | MSCAN_ERR_IF))) {
- napi_complete(&priv->napi);
- clear_bit(F_RX_PROGRESS, &priv->flags);
- if (priv->can.state < CAN_STATE_BUS_OFF)
- out_8(&regs->canrier, priv->shadow_canrier);
- ret = 0;
+ if (work_done < quota) {
+ if (likely(napi_complete_done(&priv->napi, work_done))) {
+ clear_bit(F_RX_PROGRESS, &priv->flags);
+ if (priv->can.state < CAN_STATE_BUS_OFF)
+ out_8(&regs->canrier, priv->shadow_canrier);
+ }
}
- return ret;
+ return work_done;
}
static irqreturn_t mscan_isr(int irq, void *dev_id)
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 2f74f6704c12..a4b4b742c80c 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -918,7 +918,7 @@ static int gs_usb_probe(struct usb_interface *intf,
GS_USB_BREQ_HOST_FORMAT,
USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
1,
- intf->altsetting[0].desc.bInterfaceNumber,
+ intf->cur_altsetting->desc.bInterfaceNumber,
hconf,
sizeof(*hconf),
1000);
@@ -941,7 +941,7 @@ static int gs_usb_probe(struct usb_interface *intf,
GS_USB_BREQ_DEVICE_CONFIG,
USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
1,
- intf->altsetting[0].desc.bInterfaceNumber,
+ intf->cur_altsetting->desc.bInterfaceNumber,
dconf,
sizeof(*dconf),
1000);
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
index 5fc0be564274..7ab87a758754 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
@@ -1590,7 +1590,7 @@ static int kvaser_usb_hydra_setup_endpoints(struct kvaser_usb *dev)
struct usb_endpoint_descriptor *ep;
int i;
- iface_desc = &dev->intf->altsetting[0];
+ iface_desc = dev->intf->cur_altsetting;
for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
ep = &iface_desc->endpoint[i].desc;
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
index 07d2f3aa2c02..1b9957f12459 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c
@@ -608,7 +608,7 @@ static int kvaser_usb_leaf_simple_cmd_async(struct kvaser_usb_net_priv *priv,
struct kvaser_cmd *cmd;
int err;
- cmd = kmalloc(sizeof(*cmd), GFP_ATOMIC);
+ cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
if (!cmd)
return -ENOMEM;
@@ -1140,7 +1140,7 @@ static int kvaser_usb_leaf_set_opt_mode(const struct kvaser_usb_net_priv *priv)
struct kvaser_cmd *cmd;
int rc;
- cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
@@ -1206,7 +1206,7 @@ static int kvaser_usb_leaf_flush_queue(struct kvaser_usb_net_priv *priv)
struct kvaser_cmd *cmd;
int rc;
- cmd = kmalloc(sizeof(*cmd), GFP_KERNEL);
+ cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
if (!cmd)
return -ENOMEM;
@@ -1310,7 +1310,7 @@ static int kvaser_usb_leaf_setup_endpoints(struct kvaser_usb *dev)
struct usb_endpoint_descriptor *endpoint;
int i;
- iface_desc = &dev->intf->altsetting[0];
+ iface_desc = dev->intf->cur_altsetting;
for (i = 0; i < iface_desc->desc.bNumEndpoints; ++i) {
endpoint = &iface_desc->endpoint[i].desc;
diff --git a/drivers/net/can/xilinx_can.c b/drivers/net/can/xilinx_can.c
index 464af939cd8a..c1dbab8c896d 100644
--- a/drivers/net/can/xilinx_can.c
+++ b/drivers/net/can/xilinx_can.c
@@ -60,6 +60,8 @@ enum xcan_reg {
XCAN_TXMSG_BASE_OFFSET = 0x0100, /* TX Message Space */
XCAN_RXMSG_BASE_OFFSET = 0x1100, /* RX Message Space */
XCAN_RXMSG_2_BASE_OFFSET = 0x2100, /* RX Message Space */
+ XCAN_AFR_2_MASK_OFFSET = 0x0A00, /* Acceptance Filter MASK */
+ XCAN_AFR_2_ID_OFFSET = 0x0A04, /* Acceptance Filter ID */
};
#define XCAN_FRAME_ID_OFFSET(frame_base) ((frame_base) + 0x00)
@@ -1809,6 +1811,11 @@ static int xcan_probe(struct platform_device *pdev)
pm_runtime_put(&pdev->dev);
+ if (priv->devtype.flags & XCAN_FLAG_CANFD_2) {
+ priv->write_reg(priv, XCAN_AFR_2_ID_OFFSET, 0x00000000);
+ priv->write_reg(priv, XCAN_AFR_2_MASK_OFFSET, 0x00000000);
+ }
+
netdev_dbg(ndev, "reg_base=0x%p irq=%d clock=%d, tx buffers: actual %d, using %d\n",
priv->reg_base, ndev->irq, priv->can.clock.freq,
hw_tx_max, priv->tx_max);
diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index c7667645f04a..2d38dbc9dd8c 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig
@@ -54,6 +54,8 @@ source "drivers/net/dsa/mv88e6xxx/Kconfig"
source "drivers/net/dsa/ocelot/Kconfig"
+source "drivers/net/dsa/qca/Kconfig"
+
source "drivers/net/dsa/sja1105/Kconfig"
config NET_DSA_QCA8K
@@ -103,7 +105,6 @@ config NET_DSA_SMSC_LAN9303_MDIO
config NET_DSA_VITESSE_VSC73XX
tristate
- depends on OF
depends on NET_DSA
select FIXED_PHY
select VITESSE_PHY
@@ -114,7 +115,6 @@ config NET_DSA_VITESSE_VSC73XX
config NET_DSA_VITESSE_VSC73XX_SPI
tristate "Vitesse VSC7385/7388/7395/7398 SPI mode support"
- depends on OF
depends on NET_DSA
depends on SPI
select NET_DSA_VITESSE_VSC73XX
@@ -124,7 +124,6 @@ config NET_DSA_VITESSE_VSC73XX_SPI
config NET_DSA_VITESSE_VSC73XX_PLATFORM
tristate "Vitesse VSC7385/7388/7395/7398 Platform mode support"
- depends on OF
depends on NET_DSA
depends on HAS_IOMEM
select NET_DSA_VITESSE_VSC73XX
diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile
index 9d384a32b3a2..4a943ccc2ca4 100644
--- a/drivers/net/dsa/Makefile
+++ b/drivers/net/dsa/Makefile
@@ -21,4 +21,5 @@ obj-y += b53/
obj-y += microchip/
obj-y += mv88e6xxx/
obj-y += ocelot/
+obj-y += qca/
obj-y += sja1105/
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 36828f210030..060497512159 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -347,7 +347,7 @@ static void b53_set_forwarding(struct b53_device *dev, int enable)
* frames should be flooded or not.
*/
b53_read8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, &mgmt);
- mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN;
+ mgmt |= B53_UC_FWD_EN | B53_MC_FWD_EN | B53_IPMC_FWD_EN;
b53_write8(dev, B53_CTRL_PAGE, B53_IP_MULTICAST_CTRL, mgmt);
}
@@ -371,8 +371,6 @@ static void b53_enable_vlan(struct b53_device *dev, bool enable,
b53_read8(dev, B53_VLAN_PAGE, B53_VLAN_CTRL5, &vc5);
}
- mgmt &= ~SM_SW_FWD_MODE;
-
if (enable) {
vc0 |= VC0_VLAN_EN | VC0_VID_CHK_EN | VC0_VID_HASH_VID;
vc1 |= VC1_RX_MCST_UNTAG_EN | VC1_RX_MCST_FWD_EN;
@@ -526,6 +524,8 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
cpu_port = dsa_to_port(ds, port)->cpu_dp->index;
+ b53_br_egress_floods(ds, port, true, true);
+
if (dev->ops->irq_enable)
ret = dev->ops->irq_enable(dev, port);
if (ret)
@@ -571,9 +571,8 @@ EXPORT_SYMBOL(b53_disable_port);
void b53_brcm_hdr_setup(struct dsa_switch *ds, int port)
{
- bool tag_en = !(ds->ops->get_tag_protocol(ds, port) ==
- DSA_TAG_PROTO_NONE);
struct b53_device *dev = ds->priv;
+ bool tag_en = !(dev->tag_protocol == DSA_TAG_PROTO_NONE);
u8 hdr_ctl, val;
u16 reg;
@@ -593,6 +592,22 @@ void b53_brcm_hdr_setup(struct dsa_switch *ds, int port)
break;
}
+ /* Enable management mode if tagging is requested */
+ b53_read8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, &hdr_ctl);
+ if (tag_en)
+ hdr_ctl |= SM_SW_FWD_MODE;
+ else
+ hdr_ctl &= ~SM_SW_FWD_MODE;
+ b53_write8(dev, B53_CTRL_PAGE, B53_SWITCH_MODE, hdr_ctl);
+
+ /* Configure the appropriate IMP port */
+ b53_read8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, &hdr_ctl);
+ if (port == 8)
+ hdr_ctl |= GC_FRM_MGMT_PORT_MII;
+ else if (port == 5)
+ hdr_ctl |= GC_FRM_MGMT_PORT_M;
+ b53_write8(dev, B53_MGMT_PAGE, B53_GLOBAL_CONFIG, hdr_ctl);
+
/* Enable Broadcom tags for IMP port */
b53_read8(dev, B53_MGMT_PAGE, B53_BRCM_HDR, &hdr_ctl);
if (tag_en)
@@ -641,6 +656,8 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port)
b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), port_ctrl);
b53_brcm_hdr_setup(dev->ds, port);
+
+ b53_br_egress_floods(dev->ds, port, true, true);
}
static void b53_enable_mib(struct b53_device *dev)
@@ -1821,19 +1838,26 @@ int b53_br_egress_floods(struct dsa_switch *ds, int port,
struct b53_device *dev = ds->priv;
u16 uc, mc;
- b53_read16(dev, B53_CTRL_PAGE, B53_UC_FWD_EN, &uc);
+ b53_read16(dev, B53_CTRL_PAGE, B53_UC_FLOOD_MASK, &uc);
if (unicast)
uc |= BIT(port);
else
uc &= ~BIT(port);
- b53_write16(dev, B53_CTRL_PAGE, B53_UC_FWD_EN, uc);
+ b53_write16(dev, B53_CTRL_PAGE, B53_UC_FLOOD_MASK, uc);
+
+ b53_read16(dev, B53_CTRL_PAGE, B53_MC_FLOOD_MASK, &mc);
+ if (multicast)
+ mc |= BIT(port);
+ else
+ mc &= ~BIT(port);
+ b53_write16(dev, B53_CTRL_PAGE, B53_MC_FLOOD_MASK, mc);
- b53_read16(dev, B53_CTRL_PAGE, B53_MC_FWD_EN, &mc);
+ b53_read16(dev, B53_CTRL_PAGE, B53_IPMC_FLOOD_MASK, &mc);
if (multicast)
mc |= BIT(port);
else
mc &= ~BIT(port);
- b53_write16(dev, B53_CTRL_PAGE, B53_MC_FWD_EN, mc);
+ b53_write16(dev, B53_CTRL_PAGE, B53_IPMC_FLOOD_MASK, mc);
return 0;
@@ -1855,36 +1879,57 @@ static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
return false;
}
-static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port)
+static bool b53_can_enable_brcm_tags(struct dsa_switch *ds, int port,
+ enum dsa_tag_protocol tag_protocol)
{
bool ret = b53_possible_cpu_port(ds, port);
- if (!ret)
+ if (!ret) {
dev_warn(ds->dev, "Port %d is not Broadcom tag capable\n",
port);
+ return ret;
+ }
+
+ switch (tag_protocol) {
+ case DSA_TAG_PROTO_BRCM:
+ case DSA_TAG_PROTO_BRCM_PREPEND:
+ dev_warn(ds->dev,
+ "Port %d is stacked to Broadcom tag switch\n", port);
+ ret = false;
+ break;
+ default:
+ ret = true;
+ break;
+ }
+
return ret;
}
-enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port)
+enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port,
+ enum dsa_tag_protocol mprot)
{
struct b53_device *dev = ds->priv;
/* Older models (5325, 5365) support a different tag format that we do
- * not support in net/dsa/tag_brcm.c yet. 539x and 531x5 require managed
- * mode to be turned on which means we need to specifically manage ARL
- * misses on multicast addresses (TBD).
+ * not support in net/dsa/tag_brcm.c yet.
*/
- if (is5325(dev) || is5365(dev) || is539x(dev) || is531x5(dev) ||
- !b53_can_enable_brcm_tags(ds, port))
- return DSA_TAG_PROTO_NONE;
+ if (is5325(dev) || is5365(dev) ||
+ !b53_can_enable_brcm_tags(ds, port, mprot)) {
+ dev->tag_protocol = DSA_TAG_PROTO_NONE;
+ goto out;
+ }
/* Broadcom BCM58xx chips have a flow accelerator on Port 8
* which requires us to use the prepended Broadcom tag type
*/
- if (dev->chip_id == BCM58XX_DEVICE_ID && port == B53_CPU_PORT)
- return DSA_TAG_PROTO_BRCM_PREPEND;
+ if (dev->chip_id == BCM58XX_DEVICE_ID && port == B53_CPU_PORT) {
+ dev->tag_protocol = DSA_TAG_PROTO_BRCM_PREPEND;
+ goto out;
+ }
- return DSA_TAG_PROTO_BRCM;
+ dev->tag_protocol = DSA_TAG_PROTO_BRCM;
+out:
+ return dev->tag_protocol;
}
EXPORT_SYMBOL(b53_get_tag_protocol);
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 1877acf05081..3c30f3a7eb29 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -118,6 +118,7 @@ struct b53_device {
u8 jumbo_size_reg;
int reset_gpio;
u8 num_arl_entries;
+ enum dsa_tag_protocol tag_protocol;
/* used ports mask */
u16 enabled_ports;
@@ -359,7 +360,8 @@ int b53_mdb_del(struct dsa_switch *ds, int port,
const struct switchdev_obj_port_mdb *mdb);
int b53_mirror_add(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror, bool ingress);
-enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port);
+enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds, int port,
+ enum dsa_tag_protocol mprot);
void b53_mirror_del(struct dsa_switch *ds, int port,
struct dsa_mall_mirror_tc_entry *mirror);
int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy);
diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c
index f3f0c3f07391..1962c8330daa 100644
--- a/drivers/net/dsa/bcm_sf2_cfp.c
+++ b/drivers/net/dsa/bcm_sf2_cfp.c
@@ -358,7 +358,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port,
return -EINVAL;
}
- ip_frag = be32_to_cpu(fs->m_ext.data[0]);
+ ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1);
/* Locate the first rule available */
if (fs->location == RX_CLS_LOC_ANY)
@@ -569,7 +569,7 @@ static int bcm_sf2_cfp_rule_cmp(struct bcm_sf2_priv *priv, int port,
if (rule->fs.flow_type != fs->flow_type ||
rule->fs.ring_cookie != fs->ring_cookie ||
- rule->fs.m_ext.data[0] != fs->m_ext.data[0])
+ rule->fs.h_ext.data[0] != fs->h_ext.data[0])
continue;
switch (fs->flow_type & ~FLOW_EXT) {
@@ -621,7 +621,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port,
return -EINVAL;
}
- ip_frag = be32_to_cpu(fs->m_ext.data[0]);
+ ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1);
layout = &udf_tcpip6_layout;
slice_num = bcm_sf2_get_slice_number(layout, 0);
diff --git a/drivers/net/dsa/dsa_loop.c b/drivers/net/dsa/dsa_loop.c
index c8d7ef27fd72..fdcb70b9f0e4 100644
--- a/drivers/net/dsa/dsa_loop.c
+++ b/drivers/net/dsa/dsa_loop.c
@@ -61,7 +61,8 @@ struct dsa_loop_priv {
static struct phy_device *phydevs[PHY_MAX_ADDR];
static enum dsa_tag_protocol dsa_loop_get_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol mp)
{
dev_dbg(ds->dev, "%s: port: %d\n", __func__, port);
diff --git a/drivers/net/dsa/lan9303-core.c b/drivers/net/dsa/lan9303-core.c
index e3c333a8f45d..cc17a44dd3a8 100644
--- a/drivers/net/dsa/lan9303-core.c
+++ b/drivers/net/dsa/lan9303-core.c
@@ -883,7 +883,8 @@ static int lan9303_check_device(struct lan9303 *chip)
/* ---------------------------- DSA -----------------------------------*/
static enum dsa_tag_protocol lan9303_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol mp)
{
return DSA_TAG_PROTO_LAN9303;
}
diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
index 955324968b74..0369c22fe3e1 100644
--- a/drivers/net/dsa/lantiq_gswip.c
+++ b/drivers/net/dsa/lantiq_gswip.c
@@ -841,7 +841,8 @@ static int gswip_setup(struct dsa_switch *ds)
}
static enum dsa_tag_protocol gswip_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol mp)
{
return DSA_TAG_PROTO_GSWIP;
}
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 24a5e99f7fd5..47d65b77caf7 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -645,7 +645,8 @@ static void ksz8795_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val)
}
static enum dsa_tag_protocol ksz8795_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol mp)
{
return DSA_TAG_PROTO_KSZ8795;
}
diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c
index 50ffc63d6231..9a51b8a4de5d 100644
--- a/drivers/net/dsa/microchip/ksz9477.c
+++ b/drivers/net/dsa/microchip/ksz9477.c
@@ -295,7 +295,8 @@ static void ksz9477_port_init_cnt(struct ksz_device *dev, int port)
}
static enum dsa_tag_protocol ksz9477_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol mp)
{
enum dsa_tag_protocol proto = DSA_TAG_PROTO_KSZ9477;
struct ksz_device *dev = ds->priv;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index ed1ec10ec62b..022466ca1c19 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1223,7 +1223,8 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
}
static enum dsa_tag_protocol
-mtk_get_tag_protocol(struct dsa_switch *ds, int port)
+mtk_get_tag_protocol(struct dsa_switch *ds, int port,
+ enum dsa_tag_protocol mp)
{
struct mt7530_priv *priv = ds->priv;
diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c
index a5a37f47b320..24b8219fd607 100644
--- a/drivers/net/dsa/mv88e6060.c
+++ b/drivers/net/dsa/mv88e6060.c
@@ -43,7 +43,8 @@ static const char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr)
}
static enum dsa_tag_protocol mv88e6060_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol m)
{
return DSA_TAG_PROTO_TRAILER;
}
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 3bd988529178..04ef4d00f134 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -340,11 +340,14 @@ static int mv88e6xxx_g1_irq_setup(struct mv88e6xxx_chip *chip)
*/
irq_set_lockdep_class(chip->irq, &lock_key, &request_key);
+ snprintf(chip->irq_name, sizeof(chip->irq_name),
+ "mv88e6xxx-%s", dev_name(chip->dev));
+
mv88e6xxx_reg_unlock(chip);
err = request_threaded_irq(chip->irq, NULL,
mv88e6xxx_g1_irq_thread_fn,
IRQF_ONESHOT | IRQF_SHARED,
- dev_name(chip->dev), chip);
+ chip->irq_name, chip);
mv88e6xxx_reg_lock(chip);
if (err)
mv88e6xxx_g1_irq_free_common(chip);
@@ -2303,10 +2306,14 @@ static int mv88e6xxx_serdes_irq_request(struct mv88e6xxx_chip *chip, int port,
if (!irq)
return 0;
+ snprintf(dev_id->serdes_irq_name, sizeof(dev_id->serdes_irq_name),
+ "mv88e6xxx-%s-serdes-%d", dev_name(chip->dev), port);
+
/* Requesting the IRQ will trigger IRQ callbacks, so release the lock */
mv88e6xxx_reg_unlock(chip);
err = request_threaded_irq(irq, NULL, mv88e6xxx_serdes_irq_thread_fn,
- IRQF_ONESHOT, "mv88e6xxx-serdes", dev_id);
+ IRQF_ONESHOT, dev_id->serdes_irq_name,
+ dev_id);
mv88e6xxx_reg_lock(chip);
if (err)
return err;
@@ -4424,6 +4431,9 @@ static const struct mv88e6xxx_ops mv88e6390_ops = {
.gpio_ops = &mv88e6352_gpio_ops,
.avb_ops = &mv88e6390_avb_ops,
.ptp_ops = &mv88e6352_ptp_ops,
+ .serdes_get_sset_count = mv88e6390_serdes_get_sset_count,
+ .serdes_get_strings = mv88e6390_serdes_get_strings,
+ .serdes_get_stats = mv88e6390_serdes_get_stats,
.phylink_validate = mv88e6390_phylink_validate,
};
@@ -5207,7 +5217,8 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev)
}
static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol m)
{
struct mv88e6xxx_chip *chip = ds->priv;
diff --git a/drivers/net/dsa/mv88e6xxx/chip.h b/drivers/net/dsa/mv88e6xxx/chip.h
index 8a8e38bfb161..f332cb4b2fbf 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.h
+++ b/drivers/net/dsa/mv88e6xxx/chip.h
@@ -236,6 +236,7 @@ struct mv88e6xxx_port {
bool mirror_ingress;
bool mirror_egress;
unsigned int serdes_irq;
+ char serdes_irq_name[32];
};
struct mv88e6xxx_chip {
@@ -292,11 +293,16 @@ struct mv88e6xxx_chip {
struct mv88e6xxx_irq g1_irq;
struct mv88e6xxx_irq g2_irq;
int irq;
+ char irq_name[32];
int device_irq;
+ char device_irq_name[32];
int watchdog_irq;
+ char watchdog_irq_name[32];
int atu_prob_irq;
+ char atu_prob_irq_name[32];
int vtu_prob_irq;
+ char vtu_prob_irq_name[32];
struct kthread_worker *kworker;
struct kthread_delayed_work irq_poll_work;
diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c
index 120a65d3e3ef..b016cc205f81 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.c
+++ b/drivers/net/dsa/mv88e6xxx/global1.c
@@ -360,6 +360,11 @@ int mv88e6390_g1_set_cpu_port(struct mv88e6xxx_chip *chip, int port)
{
u16 ptr = MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST;
+ /* Use the default high priority for management frames sent to
+ * the CPU.
+ */
+ port |= MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST_MGMTPRI;
+
return mv88e6390_g1_monitor_write(chip, ptr, port);
}
diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h
index bc5a6b2bb1e4..5324c6f4ae90 100644
--- a/drivers/net/dsa/mv88e6xxx/global1.h
+++ b/drivers/net/dsa/mv88e6xxx/global1.h
@@ -211,6 +211,7 @@
#define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_INGRESS_DEST 0x2000
#define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_EGRESS_DEST 0x2100
#define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST 0x3000
+#define MV88E6390_G1_MONITOR_MGMT_CTL_PTR_CPU_DEST_MGMTPRI 0x00e0
#define MV88E6390_G1_MONITOR_MGMT_CTL_DATA_MASK 0x00ff
/* Offset 0x1C: Global Control 2 */
diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c
index bdcd25560dd2..bac9a8a68e50 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_atu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c
@@ -425,9 +425,12 @@ int mv88e6xxx_g1_atu_prob_irq_setup(struct mv88e6xxx_chip *chip)
if (chip->atu_prob_irq < 0)
return chip->atu_prob_irq;
+ snprintf(chip->atu_prob_irq_name, sizeof(chip->atu_prob_irq_name),
+ "mv88e6xxx-%s-g1-atu-prob", dev_name(chip->dev));
+
err = request_threaded_irq(chip->atu_prob_irq, NULL,
mv88e6xxx_g1_atu_prob_irq_thread_fn,
- IRQF_ONESHOT, "mv88e6xxx-g1-atu-prob",
+ IRQF_ONESHOT, chip->atu_prob_irq_name,
chip);
if (err)
irq_dispose_mapping(chip->atu_prob_irq);
diff --git a/drivers/net/dsa/mv88e6xxx/global1_vtu.c b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
index 33056a609e96..48390b7b18ad 100644
--- a/drivers/net/dsa/mv88e6xxx/global1_vtu.c
+++ b/drivers/net/dsa/mv88e6xxx/global1_vtu.c
@@ -631,9 +631,12 @@ int mv88e6xxx_g1_vtu_prob_irq_setup(struct mv88e6xxx_chip *chip)
if (chip->vtu_prob_irq < 0)
return chip->vtu_prob_irq;
+ snprintf(chip->vtu_prob_irq_name, sizeof(chip->vtu_prob_irq_name),
+ "mv88e6xxx-%s-g1-vtu-prob", dev_name(chip->dev));
+
err = request_threaded_irq(chip->vtu_prob_irq, NULL,
mv88e6xxx_g1_vtu_prob_irq_thread_fn,
- IRQF_ONESHOT, "mv88e6xxx-g1-vtu-prob",
+ IRQF_ONESHOT, chip->vtu_prob_irq_name,
chip);
if (err)
irq_dispose_mapping(chip->vtu_prob_irq);
diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c
index 87bfe7c8c9cd..01503014b1ee 100644
--- a/drivers/net/dsa/mv88e6xxx/global2.c
+++ b/drivers/net/dsa/mv88e6xxx/global2.c
@@ -948,10 +948,13 @@ static int mv88e6xxx_g2_watchdog_setup(struct mv88e6xxx_chip *chip)
if (chip->watchdog_irq < 0)
return chip->watchdog_irq;
+ snprintf(chip->watchdog_irq_name, sizeof(chip->watchdog_irq_name),
+ "mv88e6xxx-%s-watchdog", dev_name(chip->dev));
+
err = request_threaded_irq(chip->watchdog_irq, NULL,
mv88e6xxx_g2_watchdog_thread_fn,
IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
- "mv88e6xxx-watchdog", chip);
+ chip->watchdog_irq_name, chip);
if (err)
return err;
@@ -1114,9 +1117,12 @@ int mv88e6xxx_g2_irq_setup(struct mv88e6xxx_chip *chip)
goto out;
}
+ snprintf(chip->device_irq_name, sizeof(chip->device_irq_name),
+ "mv88e6xxx-%s-g2", dev_name(chip->dev));
+
err = request_threaded_irq(chip->device_irq, NULL,
mv88e6xxx_g2_irq_thread_fn,
- IRQF_ONESHOT, "mv88e6xxx-g2", chip);
+ IRQF_ONESHOT, chip->device_irq_name, chip);
if (err)
goto out;
diff --git a/drivers/net/dsa/mv88e6xxx/port.c b/drivers/net/dsa/mv88e6xxx/port.c
index 7fe256c5739d..0b43c650e100 100644
--- a/drivers/net/dsa/mv88e6xxx/port.c
+++ b/drivers/net/dsa/mv88e6xxx/port.c
@@ -393,7 +393,7 @@ phy_interface_t mv88e6390x_port_max_speed_mode(int port)
}
static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
- phy_interface_t mode)
+ phy_interface_t mode, bool force)
{
u8 lane;
u16 cmode;
@@ -427,8 +427,8 @@ static int mv88e6xxx_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
cmode = 0;
}
- /* cmode doesn't change, nothing to do for us */
- if (cmode == chip->ports[port].cmode)
+ /* cmode doesn't change, nothing to do for us unless forced */
+ if (cmode == chip->ports[port].cmode && !force)
return 0;
lane = mv88e6xxx_serdes_get_lane(chip, port);
@@ -484,7 +484,7 @@ int mv88e6390x_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
if (port != 9 && port != 10)
return -EOPNOTSUPP;
- return mv88e6xxx_port_set_cmode(chip, port, mode);
+ return mv88e6xxx_port_set_cmode(chip, port, mode, false);
}
int mv88e6390_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
@@ -504,7 +504,7 @@ int mv88e6390_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
break;
}
- return mv88e6xxx_port_set_cmode(chip, port, mode);
+ return mv88e6xxx_port_set_cmode(chip, port, mode, false);
}
static int mv88e6341_port_set_cmode_writable(struct mv88e6xxx_chip *chip,
@@ -555,7 +555,7 @@ int mv88e6341_port_set_cmode(struct mv88e6xxx_chip *chip, int port,
if (err)
return err;
- return mv88e6xxx_port_set_cmode(chip, port, mode);
+ return mv88e6xxx_port_set_cmode(chip, port, mode, true);
}
int mv88e6185_port_get_cmode(struct mv88e6xxx_chip *chip, int port, u8 *cmode)
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 902feb398746..8d8b3b74aee1 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -405,22 +405,116 @@ static int mv88e6390_serdes_power_sgmii(struct mv88e6xxx_chip *chip, u8 lane,
return err;
}
+struct mv88e6390_serdes_hw_stat {
+ char string[ETH_GSTRING_LEN];
+ int reg;
+};
+
+static struct mv88e6390_serdes_hw_stat mv88e6390_serdes_hw_stats[] = {
+ { "serdes_rx_pkts", 0xf021 },
+ { "serdes_rx_bytes", 0xf024 },
+ { "serdes_rx_pkts_error", 0xf027 },
+};
+
+int mv88e6390_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port)
+{
+ if (mv88e6390_serdes_get_lane(chip, port) == 0)
+ return 0;
+
+ return ARRAY_SIZE(mv88e6390_serdes_hw_stats);
+}
+
+int mv88e6390_serdes_get_strings(struct mv88e6xxx_chip *chip,
+ int port, uint8_t *data)
+{
+ struct mv88e6390_serdes_hw_stat *stat;
+ int i;
+
+ if (mv88e6390_serdes_get_lane(chip, port) == 0)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(mv88e6390_serdes_hw_stats); i++) {
+ stat = &mv88e6390_serdes_hw_stats[i];
+ memcpy(data + i * ETH_GSTRING_LEN, stat->string,
+ ETH_GSTRING_LEN);
+ }
+ return ARRAY_SIZE(mv88e6390_serdes_hw_stats);
+}
+
+static uint64_t mv88e6390_serdes_get_stat(struct mv88e6xxx_chip *chip, int lane,
+ struct mv88e6390_serdes_hw_stat *stat)
+{
+ u16 reg[3];
+ int err, i;
+
+ for (i = 0; i < 3; i++) {
+ err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+ stat->reg + i, &reg[i]);
+ if (err) {
+ dev_err(chip->dev, "failed to read statistic\n");
+ return 0;
+ }
+ }
+
+ return reg[0] | ((u64)reg[1] << 16) | ((u64)reg[2] << 32);
+}
+
+int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data)
+{
+ struct mv88e6390_serdes_hw_stat *stat;
+ int lane;
+ int i;
+
+ lane = mv88e6390_serdes_get_lane(chip, port);
+ if (lane == 0)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(mv88e6390_serdes_hw_stats); i++) {
+ stat = &mv88e6390_serdes_hw_stats[i];
+ data[i] = mv88e6390_serdes_get_stat(chip, lane, stat);
+ }
+
+ return ARRAY_SIZE(mv88e6390_serdes_hw_stats);
+}
+
+static int mv88e6390_serdes_enable_checker(struct mv88e6xxx_chip *chip, u8 lane)
+{
+ u16 reg;
+ int err;
+
+ err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+ MV88E6390_PG_CONTROL, &reg);
+ if (err)
+ return err;
+
+ reg |= MV88E6390_PG_CONTROL_ENABLE_PC;
+ return mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
+ MV88E6390_PG_CONTROL, reg);
+}
+
int mv88e6390_serdes_power(struct mv88e6xxx_chip *chip, int port, u8 lane,
bool up)
{
u8 cmode = chip->ports[port].cmode;
+ int err = 0;
switch (cmode) {
case MV88E6XXX_PORT_STS_CMODE_SGMII:
case MV88E6XXX_PORT_STS_CMODE_1000BASEX:
case MV88E6XXX_PORT_STS_CMODE_2500BASEX:
- return mv88e6390_serdes_power_sgmii(chip, lane, up);
+ err = mv88e6390_serdes_power_sgmii(chip, lane, up);
+ break;
case MV88E6XXX_PORT_STS_CMODE_XAUI:
case MV88E6XXX_PORT_STS_CMODE_RXAUI:
- return mv88e6390_serdes_power_10g(chip, lane, up);
+ err = mv88e6390_serdes_power_10g(chip, lane, up);
+ break;
}
- return 0;
+ if (!err && up)
+ err = mv88e6390_serdes_enable_checker(chip, lane);
+
+ return err;
}
static void mv88e6390_serdes_irq_link_sgmii(struct mv88e6xxx_chip *chip,
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index bd8df36ab537..d16ef4da20b0 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -74,6 +74,10 @@
#define MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID BIT(11)
#define MV88E6390_SGMII_PHY_STATUS_LINK BIT(10)
+/* Packet generator pad packet checker */
+#define MV88E6390_PG_CONTROL 0xf010
+#define MV88E6390_PG_CONTROL_ENABLE_PC BIT(0)
+
u8 mv88e6341_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
u8 mv88e6352_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
u8 mv88e6390_serdes_get_lane(struct mv88e6xxx_chip *chip, int port);
@@ -99,6 +103,11 @@ int mv88e6352_serdes_get_strings(struct mv88e6xxx_chip *chip,
int port, uint8_t *data);
int mv88e6352_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
uint64_t *data);
+int mv88e6390_serdes_get_sset_count(struct mv88e6xxx_chip *chip, int port);
+int mv88e6390_serdes_get_strings(struct mv88e6xxx_chip *chip,
+ int port, uint8_t *data);
+int mv88e6390_serdes_get_stats(struct mv88e6xxx_chip *chip, int port,
+ uint64_t *data);
/* Return the (first) SERDES lane address a port is using, 0 otherwise. */
static inline u8 mv88e6xxx_serdes_get_lane(struct mv88e6xxx_chip *chip,
diff --git a/drivers/net/dsa/ocelot/Kconfig b/drivers/net/dsa/ocelot/Kconfig
index 0031ca814346..a5b7cca03d09 100644
--- a/drivers/net/dsa/ocelot/Kconfig
+++ b/drivers/net/dsa/ocelot/Kconfig
@@ -2,8 +2,11 @@
config NET_DSA_MSCC_FELIX
tristate "Ocelot / Felix Ethernet switch support"
depends on NET_DSA && PCI
+ depends on NET_VENDOR_MICROSEMI
+ depends on NET_VENDOR_FREESCALE
select MSCC_OCELOT_SWITCH
select NET_DSA_TAG_OCELOT
+ select FSL_ENETC_MDIO
help
This driver supports the VSC9959 network switch, which is a member of
the Vitesse / Microsemi / Microchip Ocelot family of switching cores.
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index b7f92464815d..feccb6201660 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -2,16 +2,22 @@
/* Copyright 2019 NXP Semiconductors
*/
#include <uapi/linux/if_bridge.h>
+#include <soc/mscc/ocelot_qsys.h>
+#include <soc/mscc/ocelot_sys.h>
+#include <soc/mscc/ocelot_dev.h>
+#include <soc/mscc/ocelot_ana.h>
#include <soc/mscc/ocelot.h>
#include <linux/packing.h>
#include <linux/module.h>
+#include <linux/of_net.h>
#include <linux/pci.h>
#include <linux/of.h>
#include <net/dsa.h>
#include "felix.h"
static enum dsa_tag_protocol felix_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol mp)
{
return DSA_TAG_PROTO_OCELOT;
}
@@ -26,14 +32,6 @@ static int felix_set_ageing_time(struct dsa_switch *ds,
return 0;
}
-static void felix_adjust_link(struct dsa_switch *ds, int port,
- struct phy_device *phydev)
-{
- struct ocelot *ocelot = ds->priv;
-
- ocelot_adjust_link(ocelot, port, phydev);
-}
-
static int felix_fdb_dump(struct dsa_switch *ds, int port,
dsa_fdb_dump_cb_t *cb, void *data)
{
@@ -155,6 +153,138 @@ static void felix_port_disable(struct dsa_switch *ds, int port)
return ocelot_port_disable(ocelot, port);
}
+static void felix_phylink_validate(struct dsa_switch *ds, int port,
+ unsigned long *supported,
+ struct phylink_link_state *state)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+ if (state->interface != PHY_INTERFACE_MODE_NA &&
+ state->interface != ocelot_port->phy_mode) {
+ bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ return;
+ }
+
+ /* No half-duplex. */
+ phylink_set_port_modes(mask);
+ phylink_set(mask, Autoneg);
+ phylink_set(mask, Pause);
+ phylink_set(mask, Asym_Pause);
+ if (state->interface != PHY_INTERFACE_MODE_2500BASEX) {
+ phylink_set(mask, 10baseT_Full);
+ phylink_set(mask, 100baseT_Full);
+ phylink_set(mask, 1000baseT_Full);
+ }
+ /* The internal ports that run at 2.5G are overclocked GMII */
+ if (state->interface == PHY_INTERFACE_MODE_GMII ||
+ state->interface == PHY_INTERFACE_MODE_2500BASEX ||
+ state->interface == PHY_INTERFACE_MODE_USXGMII) {
+ phylink_set(mask, 2500baseT_Full);
+ phylink_set(mask, 2500baseX_Full);
+ }
+
+ bitmap_and(supported, supported, mask,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_and(state->advertising, state->advertising, mask,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static int felix_phylink_mac_pcs_get_state(struct dsa_switch *ds, int port,
+ struct phylink_link_state *state)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct felix *felix = ocelot_to_felix(ocelot);
+
+ if (felix->info->pcs_link_state)
+ felix->info->pcs_link_state(ocelot, port, state);
+
+ return 0;
+}
+
+static void felix_phylink_mac_config(struct dsa_switch *ds, int port,
+ unsigned int link_an_mode,
+ const struct phylink_link_state *state)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+ struct felix *felix = ocelot_to_felix(ocelot);
+ u32 mac_fc_cfg;
+
+ /* Take port out of reset by clearing the MAC_TX_RST, MAC_RX_RST and
+ * PORT_RST bits in CLOCK_CFG
+ */
+ ocelot_port_writel(ocelot_port, DEV_CLOCK_CFG_LINK_SPEED(state->speed),
+ DEV_CLOCK_CFG);
+
+ /* Flow control. Link speed is only used here to evaluate the time
+ * specification in incoming pause frames.
+ */
+ mac_fc_cfg = SYS_MAC_FC_CFG_FC_LINK_SPEED(state->speed);
+ if (state->pause & MLO_PAUSE_RX)
+ mac_fc_cfg |= SYS_MAC_FC_CFG_RX_FC_ENA;
+ if (state->pause & MLO_PAUSE_TX)
+ mac_fc_cfg |= SYS_MAC_FC_CFG_TX_FC_ENA |
+ SYS_MAC_FC_CFG_PAUSE_VAL_CFG(0xffff) |
+ SYS_MAC_FC_CFG_FC_LATENCY_CFG(0x7) |
+ SYS_MAC_FC_CFG_ZERO_PAUSE_ENA;
+ ocelot_write_rix(ocelot, mac_fc_cfg, SYS_MAC_FC_CFG, port);
+
+ ocelot_write_rix(ocelot, 0, ANA_POL_FLOWC, port);
+
+ if (felix->info->pcs_init)
+ felix->info->pcs_init(ocelot, port, link_an_mode, state);
+}
+
+static void felix_phylink_mac_an_restart(struct dsa_switch *ds, int port)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct felix *felix = ocelot_to_felix(ocelot);
+
+ if (felix->info->pcs_an_restart)
+ felix->info->pcs_an_restart(ocelot, port);
+}
+
+static void felix_phylink_mac_link_down(struct dsa_switch *ds, int port,
+ unsigned int link_an_mode,
+ phy_interface_t interface)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+
+ ocelot_port_writel(ocelot_port, 0, DEV_MAC_ENA_CFG);
+ ocelot_rmw_rix(ocelot, 0, QSYS_SWITCH_PORT_MODE_PORT_ENA,
+ QSYS_SWITCH_PORT_MODE, port);
+}
+
+static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
+ unsigned int link_an_mode,
+ phy_interface_t interface,
+ struct phy_device *phydev)
+{
+ struct ocelot *ocelot = ds->priv;
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+
+ /* Enable MAC module */
+ ocelot_port_writel(ocelot_port, DEV_MAC_ENA_CFG_RX_ENA |
+ DEV_MAC_ENA_CFG_TX_ENA, DEV_MAC_ENA_CFG);
+
+ /* Enable receiving frames on the port, and activate auto-learning of
+ * MAC addresses.
+ */
+ ocelot_write_gix(ocelot, ANA_PORT_PORT_CFG_LEARNAUTO |
+ ANA_PORT_PORT_CFG_RECV_ENA |
+ ANA_PORT_PORT_CFG_PORTID_VAL(port),
+ ANA_PORT_PORT_CFG, port);
+
+ /* Core: Enable port for frame transfer */
+ ocelot_write_rix(ocelot, QSYS_SWITCH_PORT_MODE_INGRESS_DROP_MODE |
+ QSYS_SWITCH_PORT_MODE_SCH_NEXT_CFG(1) |
+ QSYS_SWITCH_PORT_MODE_PORT_ENA,
+ QSYS_SWITCH_PORT_MODE, port);
+}
+
static void felix_get_strings(struct dsa_switch *ds, int port,
u32 stringset, u8 *data)
{
@@ -185,10 +315,76 @@ static int felix_get_ts_info(struct dsa_switch *ds, int port,
return ocelot_get_ts_info(ocelot, port, info);
}
+static int felix_parse_ports_node(struct felix *felix,
+ struct device_node *ports_node,
+ phy_interface_t *port_phy_modes)
+{
+ struct ocelot *ocelot = &felix->ocelot;
+ struct device *dev = felix->ocelot.dev;
+ struct device_node *child;
+
+ for_each_child_of_node(ports_node, child) {
+ phy_interface_t phy_mode;
+ u32 port;
+ int err;
+
+ /* Get switch port number from DT */
+ if (of_property_read_u32(child, "reg", &port) < 0) {
+ dev_err(dev, "Port number not defined in device tree "
+ "(property \"reg\")\n");
+ of_node_put(child);
+ return -ENODEV;
+ }
+
+ /* Get PHY mode from DT */
+ err = of_get_phy_mode(child, &phy_mode);
+ if (err) {
+ dev_err(dev, "Failed to read phy-mode or "
+ "phy-interface-type property for port %d\n",
+ port);
+ of_node_put(child);
+ return -ENODEV;
+ }
+
+ err = felix->info->prevalidate_phy_mode(ocelot, port, phy_mode);
+ if (err < 0) {
+ dev_err(dev, "Unsupported PHY mode %s on port %d\n",
+ phy_modes(phy_mode), port);
+ return err;
+ }
+
+ port_phy_modes[port] = phy_mode;
+ }
+
+ return 0;
+}
+
+static int felix_parse_dt(struct felix *felix, phy_interface_t *port_phy_modes)
+{
+ struct device *dev = felix->ocelot.dev;
+ struct device_node *switch_node;
+ struct device_node *ports_node;
+ int err;
+
+ switch_node = dev->of_node;
+
+ ports_node = of_get_child_by_name(switch_node, "ports");
+ if (!ports_node) {
+ dev_err(dev, "Incorrect bindings: absent \"ports\" node\n");
+ return -ENODEV;
+ }
+
+ err = felix_parse_ports_node(felix, ports_node, port_phy_modes);
+ of_node_put(ports_node);
+
+ return err;
+}
+
static int felix_init_structs(struct felix *felix, int num_phys_ports)
{
struct ocelot *ocelot = &felix->ocelot;
- resource_size_t base;
+ phy_interface_t *port_phy_modes;
+ resource_size_t switch_base;
int port, i, err;
ocelot->num_phys_ports = num_phys_ports;
@@ -203,7 +399,19 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports)
ocelot->shared_queue_sz = felix->info->shared_queue_sz;
ocelot->ops = felix->info->ops;
- base = pci_resource_start(felix->pdev, felix->info->pci_bar);
+ port_phy_modes = kcalloc(num_phys_ports, sizeof(phy_interface_t),
+ GFP_KERNEL);
+ if (!port_phy_modes)
+ return -ENOMEM;
+
+ err = felix_parse_dt(felix, port_phy_modes);
+ if (err) {
+ kfree(port_phy_modes);
+ return err;
+ }
+
+ switch_base = pci_resource_start(felix->pdev,
+ felix->info->switch_pci_bar);
for (i = 0; i < TARGET_MAX; i++) {
struct regmap *target;
@@ -214,13 +422,14 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports)
res = &felix->info->target_io_res[i];
res->flags = IORESOURCE_MEM;
- res->start += base;
- res->end += base;
+ res->start += switch_base;
+ res->end += switch_base;
target = ocelot_regmap_init(ocelot, res);
if (IS_ERR(target)) {
dev_err(ocelot->dev,
"Failed to map device memory space\n");
+ kfree(port_phy_modes);
return PTR_ERR(target);
}
@@ -230,6 +439,7 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports)
err = ocelot_regfields_init(ocelot, felix->info->regfields);
if (err) {
dev_err(ocelot->dev, "failed to init reg fields map\n");
+ kfree(port_phy_modes);
return err;
}
@@ -244,26 +454,37 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports)
if (!ocelot_port) {
dev_err(ocelot->dev,
"failed to allocate port memory\n");
+ kfree(port_phy_modes);
return -ENOMEM;
}
res = &felix->info->port_io_res[port];
res->flags = IORESOURCE_MEM;
- res->start += base;
- res->end += base;
+ res->start += switch_base;
+ res->end += switch_base;
port_regs = devm_ioremap_resource(ocelot->dev, res);
if (IS_ERR(port_regs)) {
dev_err(ocelot->dev,
"failed to map registers for port %d\n", port);
+ kfree(port_phy_modes);
return PTR_ERR(port_regs);
}
+ ocelot_port->phy_mode = port_phy_modes[port];
ocelot_port->ocelot = ocelot;
ocelot_port->regs = port_regs;
ocelot->ports[port] = ocelot_port;
}
+ kfree(port_phy_modes);
+
+ if (felix->info->mdio_bus_alloc) {
+ err = felix->info->mdio_bus_alloc(ocelot);
+ if (err < 0)
+ return err;
+ }
+
return 0;
}
@@ -293,12 +514,22 @@ static int felix_setup(struct dsa_switch *ds)
OCELOT_TAG_PREFIX_LONG);
}
+ /* It looks like the MAC/PCS interrupt register - PM0_IEVENT (0x8040)
+ * isn't instantiated for the Felix PF.
+ * In-band AN may take a few ms to complete, so we need to poll.
+ */
+ ds->pcs_poll = true;
+
return 0;
}
static void felix_teardown(struct dsa_switch *ds)
{
struct ocelot *ocelot = ds->priv;
+ struct felix *felix = ocelot_to_felix(ocelot);
+
+ if (felix->info->mdio_bus_free)
+ felix->info->mdio_bus_free(ocelot);
/* stop workqueue thread */
ocelot_deinit(ocelot);
@@ -369,7 +600,12 @@ static const struct dsa_switch_ops felix_switch_ops = {
.get_ethtool_stats = felix_get_ethtool_stats,
.get_sset_count = felix_get_sset_count,
.get_ts_info = felix_get_ts_info,
- .adjust_link = felix_adjust_link,
+ .phylink_validate = felix_phylink_validate,
+ .phylink_mac_link_state = felix_phylink_mac_pcs_get_state,
+ .phylink_mac_config = felix_phylink_mac_config,
+ .phylink_mac_an_restart = felix_phylink_mac_an_restart,
+ .phylink_mac_link_down = felix_phylink_mac_link_down,
+ .phylink_mac_link_up = felix_phylink_mac_link_up,
.port_enable = felix_port_enable,
.port_disable = felix_port_disable,
.port_fdb_dump = felix_fdb_dump,
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index 204296e51d0c..3a7580015b62 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -10,6 +10,7 @@
struct felix_info {
struct resource *target_io_res;
struct resource *port_io_res;
+ struct resource *imdio_res;
const struct reg_field *regfields;
const u32 *const *map;
const struct ocelot_ops *ops;
@@ -17,7 +18,18 @@ struct felix_info {
const struct ocelot_stat_layout *stats_layout;
unsigned int num_stats;
int num_ports;
- int pci_bar;
+ int switch_pci_bar;
+ int imdio_pci_bar;
+ int (*mdio_bus_alloc)(struct ocelot *ocelot);
+ void (*mdio_bus_free)(struct ocelot *ocelot);
+ void (*pcs_init)(struct ocelot *ocelot, int port,
+ unsigned int link_an_mode,
+ const struct phylink_link_state *state);
+ void (*pcs_an_restart)(struct ocelot *ocelot, int port);
+ void (*pcs_link_state)(struct ocelot *ocelot, int port,
+ struct phylink_link_state *state);
+ int (*prevalidate_phy_mode)(struct ocelot *ocelot, int port,
+ phy_interface_t phy_mode);
};
extern struct felix_info felix_info_vsc9959;
@@ -32,6 +44,8 @@ struct felix {
struct pci_dev *pdev;
struct felix_info *info;
struct ocelot ocelot;
+ struct mii_bus *imdio;
+ struct phy_device **pcs;
};
#endif
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index b9758b0d18c7..03482616faa7 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -2,12 +2,33 @@
/* Copyright 2017 Microsemi Corporation
* Copyright 2018-2019 NXP Semiconductors
*/
+#include <linux/fsl/enetc_mdio.h>
#include <soc/mscc/ocelot_sys.h>
#include <soc/mscc/ocelot.h>
#include <linux/iopoll.h>
#include <linux/pci.h>
#include "felix.h"
+/* TODO: should find a better place for these */
+#define USXGMII_BMCR_RESET BIT(15)
+#define USXGMII_BMCR_AN_EN BIT(12)
+#define USXGMII_BMCR_RST_AN BIT(9)
+#define USXGMII_BMSR_LNKS(status) (((status) & GENMASK(2, 2)) >> 2)
+#define USXGMII_BMSR_AN_CMPL(status) (((status) & GENMASK(5, 5)) >> 5)
+#define USXGMII_ADVERTISE_LNKS(x) (((x) << 15) & BIT(15))
+#define USXGMII_ADVERTISE_FDX BIT(12)
+#define USXGMII_ADVERTISE_SPEED(x) (((x) << 9) & GENMASK(11, 9))
+#define USXGMII_LPA_LNKS(lpa) ((lpa) >> 15)
+#define USXGMII_LPA_DUPLEX(lpa) (((lpa) & GENMASK(12, 12)) >> 12)
+#define USXGMII_LPA_SPEED(lpa) (((lpa) & GENMASK(11, 9)) >> 9)
+
+enum usxgmii_speed {
+ USXGMII_SPEED_10 = 0,
+ USXGMII_SPEED_100 = 1,
+ USXGMII_SPEED_1000 = 2,
+ USXGMII_SPEED_2500 = 4,
+};
+
static const u32 vsc9959_ana_regmap[] = {
REG(ANA_ADVLEARN, 0x0089a0),
REG(ANA_VLANMASK, 0x0089a4),
@@ -386,6 +407,15 @@ static struct resource vsc9959_port_io_res[] = {
},
};
+/* Port MAC 0 Internal MDIO bus through which the SerDes acting as an
+ * SGMII/QSGMII MAC PCS can be found.
+ */
+static struct resource vsc9959_imdio_res = {
+ .start = 0x8030,
+ .end = 0x8040,
+ .name = "imdio",
+};
+
static const struct reg_field vsc9959_regfields[] = {
[ANA_ADVLEARN_VLAN_CHK] = REG_FIELD(ANA_ADVLEARN, 6, 6),
[ANA_ADVLEARN_LEARN_MIRROR] = REG_FIELD(ANA_ADVLEARN, 0, 5),
@@ -565,13 +595,475 @@ static int vsc9959_reset(struct ocelot *ocelot)
return 0;
}
+static void vsc9959_pcs_an_restart_sgmii(struct phy_device *pcs)
+{
+ phy_set_bits(pcs, MII_BMCR, BMCR_ANRESTART);
+}
+
+static void vsc9959_pcs_an_restart_usxgmii(struct phy_device *pcs)
+{
+ phy_write_mmd(pcs, MDIO_MMD_VEND2, MII_BMCR,
+ USXGMII_BMCR_RESET |
+ USXGMII_BMCR_AN_EN |
+ USXGMII_BMCR_RST_AN);
+}
+
+static void vsc9959_pcs_an_restart(struct ocelot *ocelot, int port)
+{
+ struct felix *felix = ocelot_to_felix(ocelot);
+ struct phy_device *pcs = felix->pcs[port];
+
+ if (!pcs)
+ return;
+
+ switch (pcs->interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ vsc9959_pcs_an_restart_sgmii(pcs);
+ break;
+ case PHY_INTERFACE_MODE_USXGMII:
+ vsc9959_pcs_an_restart_usxgmii(pcs);
+ break;
+ default:
+ dev_err(ocelot->dev, "Invalid PCS interface type %s\n",
+ phy_modes(pcs->interface));
+ break;
+ }
+}
+
+/* We enable SGMII AN only when the PHY has managed = "in-band-status" in the
+ * device tree. If we are in MLO_AN_PHY mode, we program directly state->speed
+ * into the PCS, which is retrieved out-of-band over MDIO. This also has the
+ * benefit of working with SGMII fixed-links, like downstream switches, where
+ * both link partners attempt to operate as AN slaves and therefore AN never
+ * completes. But it also has the disadvantage that some PHY chips don't pass
+ * traffic if SGMII AN is enabled but not completed (acknowledged by us), so
+ * setting MLO_AN_INBAND is actually required for those.
+ */
+static void vsc9959_pcs_init_sgmii(struct phy_device *pcs,
+ unsigned int link_an_mode,
+ const struct phylink_link_state *state)
+{
+ if (link_an_mode == MLO_AN_INBAND) {
+ /* SGMII spec requires tx_config_Reg[15:0] to be exactly 0x4001
+ * for the MAC PCS in order to acknowledge the AN.
+ */
+ phy_write(pcs, MII_ADVERTISE, ADVERTISE_SGMII |
+ ADVERTISE_LPACK);
+
+ phy_write(pcs, ENETC_PCS_IF_MODE,
+ ENETC_PCS_IF_MODE_SGMII_EN |
+ ENETC_PCS_IF_MODE_USE_SGMII_AN);
+
+ /* Adjust link timer for SGMII */
+ phy_write(pcs, ENETC_PCS_LINK_TIMER1,
+ ENETC_PCS_LINK_TIMER1_VAL);
+ phy_write(pcs, ENETC_PCS_LINK_TIMER2,
+ ENETC_PCS_LINK_TIMER2_VAL);
+
+ phy_write(pcs, MII_BMCR, BMCR_ANRESTART | BMCR_ANENABLE);
+ } else {
+ int speed;
+
+ if (state->duplex == DUPLEX_HALF) {
+ phydev_err(pcs, "Half duplex not supported\n");
+ return;
+ }
+ switch (state->speed) {
+ case SPEED_1000:
+ speed = ENETC_PCS_SPEED_1000;
+ break;
+ case SPEED_100:
+ speed = ENETC_PCS_SPEED_100;
+ break;
+ case SPEED_10:
+ speed = ENETC_PCS_SPEED_10;
+ break;
+ case SPEED_UNKNOWN:
+ /* Silently don't do anything */
+ return;
+ default:
+ phydev_err(pcs, "Invalid PCS speed %d\n", state->speed);
+ return;
+ }
+
+ phy_write(pcs, ENETC_PCS_IF_MODE,
+ ENETC_PCS_IF_MODE_SGMII_EN |
+ ENETC_PCS_IF_MODE_SGMII_SPEED(speed));
+
+ /* Yes, not a mistake: speed is given by IF_MODE. */
+ phy_write(pcs, MII_BMCR, BMCR_RESET |
+ BMCR_SPEED1000 |
+ BMCR_FULLDPLX);
+ }
+}
+
+/* 2500Base-X is SerDes protocol 7 on Felix and 6 on ENETC. It is a SerDes lane
+ * clocked at 3.125 GHz which encodes symbols with 8b/10b and does not have
+ * auto-negotiation of any link parameters. Electrically it is compatible with
+ * a single lane of XAUI.
+ * The hardware reference manual wants to call this mode SGMII, but it isn't
+ * really, since the fundamental features of SGMII:
+ * - Downgrading the link speed by duplicating symbols
+ * - Auto-negotiation
+ * are not there.
+ * The speed is configured at 1000 in the IF_MODE and BMCR MDIO registers
+ * because the clock frequency is actually given by a PLL configured in the
+ * Reset Configuration Word (RCW).
+ * Since there is no difference between fixed speed SGMII w/o AN and 802.3z w/o
+ * AN, we call this PHY interface type 2500Base-X. In case a PHY negotiates a
+ * lower link speed on line side, the system-side interface remains fixed at
+ * 2500 Mbps and we do rate adaptation through pause frames.
+ */
+static void vsc9959_pcs_init_2500basex(struct phy_device *pcs,
+ unsigned int link_an_mode,
+ const struct phylink_link_state *state)
+{
+ if (link_an_mode == MLO_AN_INBAND) {
+ phydev_err(pcs, "AN not supported on 3.125GHz SerDes lane\n");
+ return;
+ }
+
+ phy_write(pcs, ENETC_PCS_IF_MODE,
+ ENETC_PCS_IF_MODE_SGMII_EN |
+ ENETC_PCS_IF_MODE_SGMII_SPEED(ENETC_PCS_SPEED_2500));
+
+ phy_write(pcs, MII_BMCR, BMCR_SPEED1000 |
+ BMCR_FULLDPLX |
+ BMCR_RESET);
+}
+
+static void vsc9959_pcs_init_usxgmii(struct phy_device *pcs,
+ unsigned int link_an_mode,
+ const struct phylink_link_state *state)
+{
+ if (link_an_mode != MLO_AN_INBAND) {
+ phydev_err(pcs, "USXGMII only supports in-band AN for now\n");
+ return;
+ }
+
+ /* Configure device ability for the USXGMII Replicator */
+ phy_write_mmd(pcs, MDIO_MMD_VEND2, MII_ADVERTISE,
+ USXGMII_ADVERTISE_SPEED(USXGMII_SPEED_2500) |
+ USXGMII_ADVERTISE_LNKS(1) |
+ ADVERTISE_SGMII |
+ ADVERTISE_LPACK |
+ USXGMII_ADVERTISE_FDX);
+}
+
+static void vsc9959_pcs_init(struct ocelot *ocelot, int port,
+ unsigned int link_an_mode,
+ const struct phylink_link_state *state)
+{
+ struct felix *felix = ocelot_to_felix(ocelot);
+ struct phy_device *pcs = felix->pcs[port];
+
+ if (!pcs)
+ return;
+
+ /* The PCS does not implement the BMSR register fully, so capability
+ * detection via genphy_read_abilities does not work. Since we can get
+ * the PHY config word from the LPA register though, there is still
+ * value in using the generic phy_resolve_aneg_linkmode function. So
+ * populate the supported and advertising link modes manually here.
+ */
+ linkmode_set_bit_array(phy_basic_ports_array,
+ ARRAY_SIZE(phy_basic_ports_array),
+ pcs->supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, pcs->supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, pcs->supported);
+ linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, pcs->supported);
+ if (pcs->interface == PHY_INTERFACE_MODE_2500BASEX ||
+ pcs->interface == PHY_INTERFACE_MODE_USXGMII)
+ linkmode_set_bit(ETHTOOL_LINK_MODE_2500baseX_Full_BIT,
+ pcs->supported);
+ if (pcs->interface != PHY_INTERFACE_MODE_2500BASEX)
+ linkmode_set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
+ pcs->supported);
+ phy_advertise_supported(pcs);
+
+ switch (pcs->interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ vsc9959_pcs_init_sgmii(pcs, link_an_mode, state);
+ break;
+ case PHY_INTERFACE_MODE_2500BASEX:
+ vsc9959_pcs_init_2500basex(pcs, link_an_mode, state);
+ break;
+ case PHY_INTERFACE_MODE_USXGMII:
+ vsc9959_pcs_init_usxgmii(pcs, link_an_mode, state);
+ break;
+ default:
+ dev_err(ocelot->dev, "Unsupported link mode %s\n",
+ phy_modes(pcs->interface));
+ }
+}
+
+static void vsc9959_pcs_link_state_resolve(struct phy_device *pcs,
+ struct phylink_link_state *state)
+{
+ state->an_complete = pcs->autoneg_complete;
+ state->an_enabled = pcs->autoneg;
+ state->link = pcs->link;
+ state->duplex = pcs->duplex;
+ state->speed = pcs->speed;
+ /* SGMII AN does not negotiate flow control, but that's ok,
+ * since phylink already knows that, and does:
+ * link_state.pause |= pl->phy_state.pause;
+ */
+ state->pause = MLO_PAUSE_NONE;
+
+ phydev_dbg(pcs,
+ "mode=%s/%s/%s adv=%*pb lpa=%*pb link=%u an_enabled=%u an_complete=%u\n",
+ phy_modes(pcs->interface),
+ phy_speed_to_str(pcs->speed),
+ phy_duplex_to_str(pcs->duplex),
+ __ETHTOOL_LINK_MODE_MASK_NBITS, pcs->advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS, pcs->lp_advertising,
+ pcs->link, pcs->autoneg, pcs->autoneg_complete);
+}
+
+static void vsc9959_pcs_link_state_sgmii(struct phy_device *pcs,
+ struct phylink_link_state *state)
+{
+ int err;
+
+ err = genphy_update_link(pcs);
+ if (err < 0)
+ return;
+
+ if (pcs->autoneg_complete) {
+ u16 lpa = phy_read(pcs, MII_LPA);
+
+ mii_lpa_to_linkmode_lpa_sgmii(pcs->lp_advertising, lpa);
+
+ phy_resolve_aneg_linkmode(pcs);
+ }
+}
+
+static void vsc9959_pcs_link_state_2500basex(struct phy_device *pcs,
+ struct phylink_link_state *state)
+{
+ int err;
+
+ err = genphy_update_link(pcs);
+ if (err < 0)
+ return;
+
+ pcs->speed = SPEED_2500;
+ pcs->asym_pause = true;
+ pcs->pause = true;
+}
+
+static void vsc9959_pcs_link_state_usxgmii(struct phy_device *pcs,
+ struct phylink_link_state *state)
+{
+ int status, lpa;
+
+ status = phy_read_mmd(pcs, MDIO_MMD_VEND2, MII_BMSR);
+ if (status < 0)
+ return;
+
+ pcs->autoneg = true;
+ pcs->autoneg_complete = USXGMII_BMSR_AN_CMPL(status);
+ pcs->link = USXGMII_BMSR_LNKS(status);
+
+ if (!pcs->link || !pcs->autoneg_complete)
+ return;
+
+ lpa = phy_read_mmd(pcs, MDIO_MMD_VEND2, MII_LPA);
+ if (lpa < 0)
+ return;
+
+ switch (USXGMII_LPA_SPEED(lpa)) {
+ case USXGMII_SPEED_10:
+ pcs->speed = SPEED_10;
+ break;
+ case USXGMII_SPEED_100:
+ pcs->speed = SPEED_100;
+ break;
+ case USXGMII_SPEED_1000:
+ pcs->speed = SPEED_1000;
+ break;
+ case USXGMII_SPEED_2500:
+ pcs->speed = SPEED_2500;
+ break;
+ default:
+ break;
+ }
+
+ pcs->link = USXGMII_LPA_LNKS(lpa);
+ if (USXGMII_LPA_DUPLEX(lpa))
+ pcs->duplex = DUPLEX_FULL;
+ else
+ pcs->duplex = DUPLEX_HALF;
+}
+
+static void vsc9959_pcs_link_state(struct ocelot *ocelot, int port,
+ struct phylink_link_state *state)
+{
+ struct felix *felix = ocelot_to_felix(ocelot);
+ struct phy_device *pcs = felix->pcs[port];
+
+ if (!pcs)
+ return;
+
+ pcs->speed = SPEED_UNKNOWN;
+ pcs->duplex = DUPLEX_UNKNOWN;
+ pcs->pause = 0;
+ pcs->asym_pause = 0;
+
+ switch (pcs->interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ vsc9959_pcs_link_state_sgmii(pcs, state);
+ break;
+ case PHY_INTERFACE_MODE_2500BASEX:
+ vsc9959_pcs_link_state_2500basex(pcs, state);
+ break;
+ case PHY_INTERFACE_MODE_USXGMII:
+ vsc9959_pcs_link_state_usxgmii(pcs, state);
+ break;
+ default:
+ return;
+ }
+
+ vsc9959_pcs_link_state_resolve(pcs, state);
+}
+
+static int vsc9959_prevalidate_phy_mode(struct ocelot *ocelot, int port,
+ phy_interface_t phy_mode)
+{
+ switch (phy_mode) {
+ case PHY_INTERFACE_MODE_GMII:
+ /* Only supported on internal to-CPU ports */
+ if (port != 4 && port != 5)
+ return -ENOTSUPP;
+ return 0;
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
+ case PHY_INTERFACE_MODE_USXGMII:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ /* Not supported on internal to-CPU ports */
+ if (port == 4 || port == 5)
+ return -ENOTSUPP;
+ return 0;
+ default:
+ return -ENOTSUPP;
+ }
+}
+
static const struct ocelot_ops vsc9959_ops = {
.reset = vsc9959_reset,
};
+static int vsc9959_mdio_bus_alloc(struct ocelot *ocelot)
+{
+ struct felix *felix = ocelot_to_felix(ocelot);
+ struct enetc_mdio_priv *mdio_priv;
+ struct device *dev = ocelot->dev;
+ resource_size_t imdio_base;
+ void __iomem *imdio_regs;
+ struct resource *res;
+ struct enetc_hw *hw;
+ struct mii_bus *bus;
+ int port;
+ int rc;
+
+ felix->pcs = devm_kcalloc(dev, felix->info->num_ports,
+ sizeof(struct phy_device *),
+ GFP_KERNEL);
+ if (!felix->pcs) {
+ dev_err(dev, "failed to allocate array for PCS PHYs\n");
+ return -ENOMEM;
+ }
+
+ imdio_base = pci_resource_start(felix->pdev,
+ felix->info->imdio_pci_bar);
+
+ res = felix->info->imdio_res;
+ res->flags = IORESOURCE_MEM;
+ res->start += imdio_base;
+ res->end += imdio_base;
+
+ imdio_regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(imdio_regs)) {
+ dev_err(dev, "failed to map internal MDIO registers\n");
+ return PTR_ERR(imdio_regs);
+ }
+
+ hw = enetc_hw_alloc(dev, imdio_regs);
+ if (IS_ERR(hw)) {
+ dev_err(dev, "failed to allocate ENETC HW structure\n");
+ return PTR_ERR(hw);
+ }
+
+ bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+ if (!bus)
+ return -ENOMEM;
+
+ bus->name = "VSC9959 internal MDIO bus";
+ bus->read = enetc_mdio_read;
+ bus->write = enetc_mdio_write;
+ bus->parent = dev;
+ mdio_priv = bus->priv;
+ mdio_priv->hw = hw;
+ /* This gets added to imdio_regs, which already maps addresses
+ * starting with the proper offset.
+ */
+ mdio_priv->mdio_base = 0;
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%s-imdio", dev_name(dev));
+
+ /* Needed in order to initialize the bus mutex lock */
+ rc = mdiobus_register(bus);
+ if (rc < 0) {
+ dev_err(dev, "failed to register MDIO bus\n");
+ return rc;
+ }
+
+ felix->imdio = bus;
+
+ for (port = 0; port < felix->info->num_ports; port++) {
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
+ struct phy_device *pcs;
+ bool is_c45 = false;
+
+ if (ocelot_port->phy_mode == PHY_INTERFACE_MODE_USXGMII)
+ is_c45 = true;
+
+ pcs = get_phy_device(felix->imdio, port, is_c45);
+ if (IS_ERR(pcs))
+ continue;
+
+ pcs->interface = ocelot_port->phy_mode;
+ felix->pcs[port] = pcs;
+
+ dev_info(dev, "Found PCS at internal MDIO address %d\n", port);
+ }
+
+ return 0;
+}
+
+static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
+{
+ struct felix *felix = ocelot_to_felix(ocelot);
+ int port;
+
+ for (port = 0; port < ocelot->num_phys_ports; port++) {
+ struct phy_device *pcs = felix->pcs[port];
+
+ if (!pcs)
+ continue;
+
+ put_device(&pcs->mdio.dev);
+ }
+ mdiobus_unregister(felix->imdio);
+}
+
struct felix_info felix_info_vsc9959 = {
.target_io_res = vsc9959_target_io_res,
.port_io_res = vsc9959_port_io_res,
+ .imdio_res = &vsc9959_imdio_res,
.regfields = vsc9959_regfields,
.map = vsc9959_regmap,
.ops = &vsc9959_ops,
@@ -579,5 +1071,12 @@ struct felix_info felix_info_vsc9959 = {
.num_stats = ARRAY_SIZE(vsc9959_stats_layout),
.shared_queue_sz = 128 * 1024,
.num_ports = 6,
- .pci_bar = 4,
+ .switch_pci_bar = 4,
+ .imdio_pci_bar = 0,
+ .mdio_bus_alloc = vsc9959_mdio_bus_alloc,
+ .mdio_bus_free = vsc9959_mdio_bus_free,
+ .pcs_init = vsc9959_pcs_init,
+ .pcs_an_restart = vsc9959_pcs_an_restart,
+ .pcs_link_state = vsc9959_pcs_link_state,
+ .prevalidate_phy_mode = vsc9959_prevalidate_phy_mode,
};
diff --git a/drivers/net/dsa/qca/Kconfig b/drivers/net/dsa/qca/Kconfig
new file mode 100644
index 000000000000..e3c8d715a18f
--- /dev/null
+++ b/drivers/net/dsa/qca/Kconfig
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config NET_DSA_AR9331
+ tristate "Qualcomm Atheros AR9331 Ethernet switch support"
+ depends on NET_DSA
+ select NET_DSA_TAG_AR9331
+ select REGMAP
+ ---help---
+ This enables support for the Qualcomm Atheros AR9331 built-in Ethernet
+ switch.
diff --git a/drivers/net/dsa/qca/Makefile b/drivers/net/dsa/qca/Makefile
new file mode 100644
index 000000000000..274022319066
--- /dev/null
+++ b/drivers/net/dsa/qca/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_NET_DSA_AR9331) += ar9331.o
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
new file mode 100644
index 000000000000..de25f99e995a
--- /dev/null
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -0,0 +1,856 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2019 Pengutronix, Oleksij Rempel <[email protected]>
+/*
+ * +----------------------+
+ * GMAC1----RGMII----|--MAC0 |
+ * \---MDIO1----|--REGs |----MDIO3----\
+ * | | | +------+
+ * | | +--| |
+ * | MAC1-|----RMII--M-----| PHY0 |-o P0
+ * | | | | +------+
+ * | | | +--| |
+ * | MAC2-|----RMII--------| PHY1 |-o P1
+ * | | | | +------+
+ * | | | +--| |
+ * | MAC3-|----RMII--------| PHY2 |-o P2
+ * | | | | +------+
+ * | | | +--| |
+ * | MAC4-|----RMII--------| PHY3 |-o P3
+ * | | | | +------+
+ * | | | +--| |
+ * | MAC5-|--+-RMII--M-----|-PHY4-|-o P4
+ * | | | | +------+
+ * +----------------------+ | \--CFG_SW_PHY_SWAP
+ * GMAC0---------------RMII--------------------/ \-CFG_SW_PHY_ADDR_SWAP
+ * \---MDIO0--NC
+ *
+ * GMAC0 and MAC5 are connected together and use same PHY. Depending on
+ * configuration it can be PHY4 (default) or PHY0. Only GMAC0 or MAC5 can be
+ * used at same time. If GMAC0 is used (default) then MAC5 should be disabled.
+ *
+ * CFG_SW_PHY_SWAP - swap connections of PHY0 and PHY4. If this bit is not set
+ * PHY4 is connected to GMAC0/MAC5 bundle and PHY0 is connected to MAC1. If this
+ * bit is set, PHY4 is connected to MAC1 and PHY0 is connected to GMAC0/MAC5
+ * bundle.
+ *
+ * CFG_SW_PHY_ADDR_SWAP - swap addresses of PHY0 and PHY4
+ *
+ * CFG_SW_PHY_SWAP and CFG_SW_PHY_ADDR_SWAP are part of SoC specific register
+ * set and not related to switch internal registers.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/module.h>
+#include <linux/of_irq.h>
+#include <linux/of_mdio.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <net/dsa.h>
+
+#define AR9331_SW_NAME "ar9331_switch"
+#define AR9331_SW_PORTS 6
+
+/* dummy reg to change page */
+#define AR9331_SW_REG_PAGE 0x40000
+
+/* Global Interrupt */
+#define AR9331_SW_REG_GINT 0x10
+#define AR9331_SW_REG_GINT_MASK 0x14
+#define AR9331_SW_GINT_PHY_INT BIT(2)
+
+#define AR9331_SW_REG_FLOOD_MASK 0x2c
+#define AR9331_SW_FLOOD_MASK_BROAD_TO_CPU BIT(26)
+
+#define AR9331_SW_REG_GLOBAL_CTRL 0x30
+#define AR9331_SW_GLOBAL_CTRL_MFS_M GENMASK(13, 0)
+
+#define AR9331_SW_REG_MDIO_CTRL 0x98
+#define AR9331_SW_MDIO_CTRL_BUSY BIT(31)
+#define AR9331_SW_MDIO_CTRL_MASTER_EN BIT(30)
+#define AR9331_SW_MDIO_CTRL_CMD_READ BIT(27)
+#define AR9331_SW_MDIO_CTRL_PHY_ADDR_M GENMASK(25, 21)
+#define AR9331_SW_MDIO_CTRL_REG_ADDR_M GENMASK(20, 16)
+#define AR9331_SW_MDIO_CTRL_DATA_M GENMASK(16, 0)
+
+#define AR9331_SW_REG_PORT_STATUS(_port) (0x100 + (_port) * 0x100)
+
+/* FLOW_LINK_EN - enable mac flow control config auto-neg with phy.
+ * If not set, mac can be config by software.
+ */
+#define AR9331_SW_PORT_STATUS_FLOW_LINK_EN BIT(12)
+
+/* LINK_EN - If set, MAC is configured from PHY link status.
+ * If not set, MAC should be configured by software.
+ */
+#define AR9331_SW_PORT_STATUS_LINK_EN BIT(9)
+#define AR9331_SW_PORT_STATUS_DUPLEX_MODE BIT(6)
+#define AR9331_SW_PORT_STATUS_RX_FLOW_EN BIT(5)
+#define AR9331_SW_PORT_STATUS_TX_FLOW_EN BIT(4)
+#define AR9331_SW_PORT_STATUS_RXMAC BIT(3)
+#define AR9331_SW_PORT_STATUS_TXMAC BIT(2)
+#define AR9331_SW_PORT_STATUS_SPEED_M GENMASK(1, 0)
+#define AR9331_SW_PORT_STATUS_SPEED_1000 2
+#define AR9331_SW_PORT_STATUS_SPEED_100 1
+#define AR9331_SW_PORT_STATUS_SPEED_10 0
+
+#define AR9331_SW_PORT_STATUS_MAC_MASK \
+ (AR9331_SW_PORT_STATUS_TXMAC | AR9331_SW_PORT_STATUS_RXMAC)
+
+#define AR9331_SW_PORT_STATUS_LINK_MASK \
+ (AR9331_SW_PORT_STATUS_LINK_EN | AR9331_SW_PORT_STATUS_FLOW_LINK_EN | \
+ AR9331_SW_PORT_STATUS_DUPLEX_MODE | \
+ AR9331_SW_PORT_STATUS_RX_FLOW_EN | AR9331_SW_PORT_STATUS_TX_FLOW_EN | \
+ AR9331_SW_PORT_STATUS_SPEED_M)
+
+/* Phy bypass mode
+ * ------------------------------------------------------------------------
+ * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 |
+ *
+ * real | start | OP | PhyAddr | Reg Addr | TA |
+ * atheros| start | OP | 2'b00 |PhyAdd[2:0]| Reg Addr[4:0] | TA |
+ *
+ *
+ * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 |
+ * real | Data |
+ * atheros| Data |
+ *
+ * ------------------------------------------------------------------------
+ * Page address mode
+ * ------------------------------------------------------------------------
+ * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 |
+ * real | start | OP | PhyAddr | Reg Addr | TA |
+ * atheros| start | OP | 2'b11 | 8'b0 | TA |
+ *
+ * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 |
+ * real | Data |
+ * atheros| | Page [9:0] |
+ */
+/* In case of Page Address mode, Bit[18:9] of 32 bit register address should be
+ * written to bits[9:0] of mdio data register.
+ */
+#define AR9331_SW_ADDR_PAGE GENMASK(18, 9)
+
+/* ------------------------------------------------------------------------
+ * Normal register access mode
+ * ------------------------------------------------------------------------
+ * Bit: | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 |10 |11 |12 |13 |14 |15 |
+ * real | start | OP | PhyAddr | Reg Addr | TA |
+ * atheros| start | OP | 2'b10 | low_addr[7:0] | TA |
+ *
+ * Bit: |16 |17 |18 |19 |20 |21 |22 |23 |24 |25 |26 |27 |28 |29 |30 |31 |
+ * real | Data |
+ * atheros| Data |
+ * ------------------------------------------------------------------------
+ */
+#define AR9331_SW_LOW_ADDR_PHY GENMASK(8, 6)
+#define AR9331_SW_LOW_ADDR_REG GENMASK(5, 1)
+
+#define AR9331_SW_MDIO_PHY_MODE_M GENMASK(4, 3)
+#define AR9331_SW_MDIO_PHY_MODE_PAGE 3
+#define AR9331_SW_MDIO_PHY_MODE_REG 2
+#define AR9331_SW_MDIO_PHY_MODE_BYPASS 0
+#define AR9331_SW_MDIO_PHY_ADDR_M GENMASK(2, 0)
+
+/* Empirical determined values */
+#define AR9331_SW_MDIO_POLL_SLEEP_US 1
+#define AR9331_SW_MDIO_POLL_TIMEOUT_US 20
+
+struct ar9331_sw_priv {
+ struct device *dev;
+ struct dsa_switch ds;
+ struct dsa_switch_ops ops;
+ struct irq_domain *irqdomain;
+ struct mii_bus *mbus; /* mdio master */
+ struct mii_bus *sbus; /* mdio slave */
+ struct regmap *regmap;
+ struct reset_control *sw_reset;
+};
+
+/* Warning: switch reset will reset last AR9331_SW_MDIO_PHY_MODE_PAGE request
+ * If some kind of optimization is used, the request should be repeated.
+ */
+static int ar9331_sw_reset(struct ar9331_sw_priv *priv)
+{
+ int ret;
+
+ ret = reset_control_assert(priv->sw_reset);
+ if (ret)
+ goto error;
+
+ /* AR9331 doc do not provide any information about proper reset
+ * sequence. The AR8136 (the closes switch to the AR9331) doc says:
+ * reset duration should be greater than 10ms. So, let's use this value
+ * for now.
+ */
+ usleep_range(10000, 15000);
+ ret = reset_control_deassert(priv->sw_reset);
+ if (ret)
+ goto error;
+ /* There is no information on how long should we wait after reset.
+ * AR8136 has an EEPROM and there is an Interrupt for EEPROM load
+ * status. AR9331 has no EEPROM support.
+ * For now, do not wait. In case AR8136 will be needed, the after
+ * reset delay can be added as well.
+ */
+
+ return 0;
+error:
+ dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret);
+ return ret;
+}
+
+static int ar9331_sw_mbus_write(struct mii_bus *mbus, int port, int regnum,
+ u16 data)
+{
+ struct ar9331_sw_priv *priv = mbus->priv;
+ struct regmap *regmap = priv->regmap;
+ u32 val;
+ int ret;
+
+ ret = regmap_write(regmap, AR9331_SW_REG_MDIO_CTRL,
+ AR9331_SW_MDIO_CTRL_BUSY |
+ AR9331_SW_MDIO_CTRL_MASTER_EN |
+ FIELD_PREP(AR9331_SW_MDIO_CTRL_PHY_ADDR_M, port) |
+ FIELD_PREP(AR9331_SW_MDIO_CTRL_REG_ADDR_M, regnum) |
+ FIELD_PREP(AR9331_SW_MDIO_CTRL_DATA_M, data));
+ if (ret)
+ goto error;
+
+ ret = regmap_read_poll_timeout(regmap, AR9331_SW_REG_MDIO_CTRL, val,
+ !(val & AR9331_SW_MDIO_CTRL_BUSY),
+ AR9331_SW_MDIO_POLL_SLEEP_US,
+ AR9331_SW_MDIO_POLL_TIMEOUT_US);
+ if (ret)
+ goto error;
+
+ return 0;
+error:
+ dev_err_ratelimited(priv->dev, "PHY write error: %i\n", ret);
+ return ret;
+}
+
+static int ar9331_sw_mbus_read(struct mii_bus *mbus, int port, int regnum)
+{
+ struct ar9331_sw_priv *priv = mbus->priv;
+ struct regmap *regmap = priv->regmap;
+ u32 val;
+ int ret;
+
+ ret = regmap_write(regmap, AR9331_SW_REG_MDIO_CTRL,
+ AR9331_SW_MDIO_CTRL_BUSY |
+ AR9331_SW_MDIO_CTRL_MASTER_EN |
+ AR9331_SW_MDIO_CTRL_CMD_READ |
+ FIELD_PREP(AR9331_SW_MDIO_CTRL_PHY_ADDR_M, port) |
+ FIELD_PREP(AR9331_SW_MDIO_CTRL_REG_ADDR_M, regnum));
+ if (ret)
+ goto error;
+
+ ret = regmap_read_poll_timeout(regmap, AR9331_SW_REG_MDIO_CTRL, val,
+ !(val & AR9331_SW_MDIO_CTRL_BUSY),
+ AR9331_SW_MDIO_POLL_SLEEP_US,
+ AR9331_SW_MDIO_POLL_TIMEOUT_US);
+ if (ret)
+ goto error;
+
+ ret = regmap_read(regmap, AR9331_SW_REG_MDIO_CTRL, &val);
+ if (ret)
+ goto error;
+
+ return FIELD_GET(AR9331_SW_MDIO_CTRL_DATA_M, val);
+
+error:
+ dev_err_ratelimited(priv->dev, "PHY read error: %i\n", ret);
+ return ret;
+}
+
+static int ar9331_sw_mbus_init(struct ar9331_sw_priv *priv)
+{
+ struct device *dev = priv->dev;
+ struct mii_bus *mbus;
+ struct device_node *np, *mnp;
+ int ret;
+
+ np = dev->of_node;
+
+ mbus = devm_mdiobus_alloc(dev);
+ if (!mbus)
+ return -ENOMEM;
+
+ mbus->name = np->full_name;
+ snprintf(mbus->id, MII_BUS_ID_SIZE, "%pOF", np);
+
+ mbus->read = ar9331_sw_mbus_read;
+ mbus->write = ar9331_sw_mbus_write;
+ mbus->priv = priv;
+ mbus->parent = dev;
+
+ mnp = of_get_child_by_name(np, "mdio");
+ if (!mnp)
+ return -ENODEV;
+
+ ret = of_mdiobus_register(mbus, mnp);
+ of_node_put(mnp);
+ if (ret)
+ return ret;
+
+ priv->mbus = mbus;
+
+ return 0;
+}
+
+static int ar9331_sw_setup(struct dsa_switch *ds)
+{
+ struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+ struct regmap *regmap = priv->regmap;
+ int ret;
+
+ ret = ar9331_sw_reset(priv);
+ if (ret)
+ return ret;
+
+ /* Reset will set proper defaults. CPU - Port0 will be enabled and
+ * configured. All other ports (ports 1 - 5) are disabled
+ */
+ ret = ar9331_sw_mbus_init(priv);
+ if (ret)
+ return ret;
+
+ /* Do not drop broadcast frames */
+ ret = regmap_write_bits(regmap, AR9331_SW_REG_FLOOD_MASK,
+ AR9331_SW_FLOOD_MASK_BROAD_TO_CPU,
+ AR9331_SW_FLOOD_MASK_BROAD_TO_CPU);
+ if (ret)
+ goto error;
+
+ /* Set max frame size to the maximum supported value */
+ ret = regmap_write_bits(regmap, AR9331_SW_REG_GLOBAL_CTRL,
+ AR9331_SW_GLOBAL_CTRL_MFS_M,
+ AR9331_SW_GLOBAL_CTRL_MFS_M);
+ if (ret)
+ goto error;
+
+ return 0;
+error:
+ dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret);
+ return ret;
+}
+
+static void ar9331_sw_port_disable(struct dsa_switch *ds, int port)
+{
+ struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+ struct regmap *regmap = priv->regmap;
+ int ret;
+
+ ret = regmap_write(regmap, AR9331_SW_REG_PORT_STATUS(port), 0);
+ if (ret)
+ dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret);
+}
+
+static enum dsa_tag_protocol ar9331_sw_get_tag_protocol(struct dsa_switch *ds,
+ int port,
+ enum dsa_tag_protocol m)
+{
+ return DSA_TAG_PROTO_AR9331;
+}
+
+static void ar9331_sw_phylink_validate(struct dsa_switch *ds, int port,
+ unsigned long *supported,
+ struct phylink_link_state *state)
+{
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
+
+ switch (port) {
+ case 0:
+ if (state->interface != PHY_INTERFACE_MODE_GMII)
+ goto unsupported;
+
+ phylink_set(mask, 1000baseT_Full);
+ phylink_set(mask, 1000baseT_Half);
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ if (state->interface != PHY_INTERFACE_MODE_INTERNAL)
+ goto unsupported;
+ break;
+ default:
+ bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ dev_err(ds->dev, "Unsupported port: %i\n", port);
+ return;
+ }
+
+ phylink_set_port_modes(mask);
+ phylink_set(mask, Pause);
+ phylink_set(mask, Asym_Pause);
+
+ phylink_set(mask, 10baseT_Half);
+ phylink_set(mask, 10baseT_Full);
+ phylink_set(mask, 100baseT_Half);
+ phylink_set(mask, 100baseT_Full);
+
+ bitmap_and(supported, supported, mask,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+ bitmap_and(state->advertising, state->advertising, mask,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ return;
+
+unsupported:
+ bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ dev_err(ds->dev, "Unsupported interface: %d, port: %d\n",
+ state->interface, port);
+}
+
+static void ar9331_sw_phylink_mac_config(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ const struct phylink_link_state *state)
+{
+ struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+ struct regmap *regmap = priv->regmap;
+ int ret;
+ u32 val;
+
+ switch (state->speed) {
+ case SPEED_1000:
+ val = AR9331_SW_PORT_STATUS_SPEED_1000;
+ break;
+ case SPEED_100:
+ val = AR9331_SW_PORT_STATUS_SPEED_100;
+ break;
+ case SPEED_10:
+ val = AR9331_SW_PORT_STATUS_SPEED_10;
+ break;
+ default:
+ return;
+ }
+
+ if (state->duplex)
+ val |= AR9331_SW_PORT_STATUS_DUPLEX_MODE;
+
+ if (state->pause & MLO_PAUSE_TX)
+ val |= AR9331_SW_PORT_STATUS_TX_FLOW_EN;
+
+ if (state->pause & MLO_PAUSE_RX)
+ val |= AR9331_SW_PORT_STATUS_RX_FLOW_EN;
+
+ ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port),
+ AR9331_SW_PORT_STATUS_LINK_MASK, val);
+ if (ret)
+ dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret);
+}
+
+static void ar9331_sw_phylink_mac_link_down(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface)
+{
+ struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+ struct regmap *regmap = priv->regmap;
+ int ret;
+
+ ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port),
+ AR9331_SW_PORT_STATUS_MAC_MASK, 0);
+ if (ret)
+ dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret);
+}
+
+static void ar9331_sw_phylink_mac_link_up(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev)
+{
+ struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ds->priv;
+ struct regmap *regmap = priv->regmap;
+ int ret;
+
+ ret = regmap_update_bits(regmap, AR9331_SW_REG_PORT_STATUS(port),
+ AR9331_SW_PORT_STATUS_MAC_MASK,
+ AR9331_SW_PORT_STATUS_MAC_MASK);
+ if (ret)
+ dev_err_ratelimited(priv->dev, "%s: %i\n", __func__, ret);
+}
+
+static const struct dsa_switch_ops ar9331_sw_ops = {
+ .get_tag_protocol = ar9331_sw_get_tag_protocol,
+ .setup = ar9331_sw_setup,
+ .port_disable = ar9331_sw_port_disable,
+ .phylink_validate = ar9331_sw_phylink_validate,
+ .phylink_mac_config = ar9331_sw_phylink_mac_config,
+ .phylink_mac_link_down = ar9331_sw_phylink_mac_link_down,
+ .phylink_mac_link_up = ar9331_sw_phylink_mac_link_up,
+};
+
+static irqreturn_t ar9331_sw_irq(int irq, void *data)
+{
+ struct ar9331_sw_priv *priv = data;
+ struct regmap *regmap = priv->regmap;
+ u32 stat;
+ int ret;
+
+ ret = regmap_read(regmap, AR9331_SW_REG_GINT, &stat);
+ if (ret) {
+ dev_err(priv->dev, "can't read interrupt status\n");
+ return IRQ_NONE;
+ }
+
+ if (!stat)
+ return IRQ_NONE;
+
+ if (stat & AR9331_SW_GINT_PHY_INT) {
+ int child_irq;
+
+ child_irq = irq_find_mapping(priv->irqdomain, 0);
+ handle_nested_irq(child_irq);
+ }
+
+ ret = regmap_write(regmap, AR9331_SW_REG_GINT, stat);
+ if (ret) {
+ dev_err(priv->dev, "can't write interrupt status\n");
+ return IRQ_NONE;
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void ar9331_sw_mask_irq(struct irq_data *d)
+{
+ struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d);
+ struct regmap *regmap = priv->regmap;
+ int ret;
+
+ ret = regmap_update_bits(regmap, AR9331_SW_REG_GINT_MASK,
+ AR9331_SW_GINT_PHY_INT, 0);
+ if (ret)
+ dev_err(priv->dev, "could not mask IRQ\n");
+}
+
+static void ar9331_sw_unmask_irq(struct irq_data *d)
+{
+ struct ar9331_sw_priv *priv = irq_data_get_irq_chip_data(d);
+ struct regmap *regmap = priv->regmap;
+ int ret;
+
+ ret = regmap_update_bits(regmap, AR9331_SW_REG_GINT_MASK,
+ AR9331_SW_GINT_PHY_INT,
+ AR9331_SW_GINT_PHY_INT);
+ if (ret)
+ dev_err(priv->dev, "could not unmask IRQ\n");
+}
+
+static struct irq_chip ar9331_sw_irq_chip = {
+ .name = AR9331_SW_NAME,
+ .irq_mask = ar9331_sw_mask_irq,
+ .irq_unmask = ar9331_sw_unmask_irq,
+};
+
+static int ar9331_sw_irq_map(struct irq_domain *domain, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ irq_set_chip_data(irq, domain->host_data);
+ irq_set_chip_and_handler(irq, &ar9331_sw_irq_chip, handle_simple_irq);
+ irq_set_nested_thread(irq, 1);
+ irq_set_noprobe(irq);
+
+ return 0;
+}
+
+static void ar9331_sw_irq_unmap(struct irq_domain *d, unsigned int irq)
+{
+ irq_set_nested_thread(irq, 0);
+ irq_set_chip_and_handler(irq, NULL, NULL);
+ irq_set_chip_data(irq, NULL);
+}
+
+static const struct irq_domain_ops ar9331_sw_irqdomain_ops = {
+ .map = ar9331_sw_irq_map,
+ .unmap = ar9331_sw_irq_unmap,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static int ar9331_sw_irq_init(struct ar9331_sw_priv *priv)
+{
+ struct device_node *np = priv->dev->of_node;
+ struct device *dev = priv->dev;
+ int ret, irq;
+
+ irq = of_irq_get(np, 0);
+ if (irq <= 0) {
+ dev_err(dev, "failed to get parent IRQ\n");
+ return irq ? irq : -EINVAL;
+ }
+
+ ret = devm_request_threaded_irq(dev, irq, NULL, ar9331_sw_irq,
+ IRQF_ONESHOT, AR9331_SW_NAME, priv);
+ if (ret) {
+ dev_err(dev, "unable to request irq: %d\n", ret);
+ return ret;
+ }
+
+ priv->irqdomain = irq_domain_add_linear(np, 1, &ar9331_sw_irqdomain_ops,
+ priv);
+ if (!priv->irqdomain) {
+ dev_err(dev, "failed to create IRQ domain\n");
+ return -EINVAL;
+ }
+
+ irq_set_parent(irq_create_mapping(priv->irqdomain, 0), irq);
+
+ return 0;
+}
+
+static int __ar9331_mdio_write(struct mii_bus *sbus, u8 mode, u16 reg, u16 val)
+{
+ u8 r, p;
+
+ p = FIELD_PREP(AR9331_SW_MDIO_PHY_MODE_M, mode) |
+ FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg);
+ r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg);
+
+ return mdiobus_write(sbus, p, r, val);
+}
+
+static int __ar9331_mdio_read(struct mii_bus *sbus, u16 reg)
+{
+ u8 r, p;
+
+ p = FIELD_PREP(AR9331_SW_MDIO_PHY_MODE_M, AR9331_SW_MDIO_PHY_MODE_REG) |
+ FIELD_GET(AR9331_SW_LOW_ADDR_PHY, reg);
+ r = FIELD_GET(AR9331_SW_LOW_ADDR_REG, reg);
+
+ return mdiobus_read(sbus, p, r);
+}
+
+static int ar9331_mdio_read(void *ctx, const void *reg_buf, size_t reg_len,
+ void *val_buf, size_t val_len)
+{
+ struct ar9331_sw_priv *priv = ctx;
+ struct mii_bus *sbus = priv->sbus;
+ u32 reg = *(u32 *)reg_buf;
+ int ret;
+
+ if (reg == AR9331_SW_REG_PAGE) {
+ /* We cannot read the page selector register from hardware and
+ * we cache its value in regmap. Return all bits set here,
+ * that regmap will always write the page on first use.
+ */
+ *(u32 *)val_buf = GENMASK(9, 0);
+ return 0;
+ }
+
+ ret = __ar9331_mdio_read(sbus, reg);
+ if (ret < 0)
+ goto error;
+
+ *(u32 *)val_buf = ret;
+ ret = __ar9331_mdio_read(sbus, reg + 2);
+ if (ret < 0)
+ goto error;
+
+ *(u32 *)val_buf |= ret << 16;
+
+ return 0;
+error:
+ dev_err_ratelimited(&sbus->dev, "Bus error. Failed to read register.\n");
+ return ret;
+}
+
+static int ar9331_mdio_write(void *ctx, u32 reg, u32 val)
+{
+ struct ar9331_sw_priv *priv = (struct ar9331_sw_priv *)ctx;
+ struct mii_bus *sbus = priv->sbus;
+ int ret;
+
+ if (reg == AR9331_SW_REG_PAGE) {
+ ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_PAGE,
+ 0, val);
+ if (ret < 0)
+ goto error;
+
+ return 0;
+ }
+
+ ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg, val);
+ if (ret < 0)
+ goto error;
+
+ ret = __ar9331_mdio_write(sbus, AR9331_SW_MDIO_PHY_MODE_REG, reg + 2,
+ val >> 16);
+ if (ret < 0)
+ goto error;
+
+ return 0;
+error:
+ dev_err_ratelimited(&sbus->dev, "Bus error. Failed to write register.\n");
+ return ret;
+}
+
+static int ar9331_sw_bus_write(void *context, const void *data, size_t count)
+{
+ u32 reg = *(u32 *)data;
+ u32 val = *((u32 *)data + 1);
+
+ return ar9331_mdio_write(context, reg, val);
+}
+
+static const struct regmap_range ar9331_valid_regs[] = {
+ regmap_reg_range(0x0, 0x0),
+ regmap_reg_range(0x10, 0x14),
+ regmap_reg_range(0x20, 0x24),
+ regmap_reg_range(0x2c, 0x30),
+ regmap_reg_range(0x40, 0x44),
+ regmap_reg_range(0x50, 0x78),
+ regmap_reg_range(0x80, 0x98),
+
+ regmap_reg_range(0x100, 0x120),
+ regmap_reg_range(0x200, 0x220),
+ regmap_reg_range(0x300, 0x320),
+ regmap_reg_range(0x400, 0x420),
+ regmap_reg_range(0x500, 0x520),
+ regmap_reg_range(0x600, 0x620),
+
+ regmap_reg_range(0x20000, 0x200a4),
+ regmap_reg_range(0x20100, 0x201a4),
+ regmap_reg_range(0x20200, 0x202a4),
+ regmap_reg_range(0x20300, 0x203a4),
+ regmap_reg_range(0x20400, 0x204a4),
+ regmap_reg_range(0x20500, 0x205a4),
+
+ /* dummy page selector reg */
+ regmap_reg_range(AR9331_SW_REG_PAGE, AR9331_SW_REG_PAGE),
+};
+
+static const struct regmap_range ar9331_nonvolatile_regs[] = {
+ regmap_reg_range(AR9331_SW_REG_PAGE, AR9331_SW_REG_PAGE),
+};
+
+static const struct regmap_range_cfg ar9331_regmap_range[] = {
+ {
+ .selector_reg = AR9331_SW_REG_PAGE,
+ .selector_mask = GENMASK(9, 0),
+ .selector_shift = 0,
+
+ .window_start = 0,
+ .window_len = 512,
+
+ .range_min = 0,
+ .range_max = AR9331_SW_REG_PAGE - 4,
+ },
+};
+
+static const struct regmap_access_table ar9331_register_set = {
+ .yes_ranges = ar9331_valid_regs,
+ .n_yes_ranges = ARRAY_SIZE(ar9331_valid_regs),
+};
+
+static const struct regmap_access_table ar9331_volatile_set = {
+ .no_ranges = ar9331_nonvolatile_regs,
+ .n_no_ranges = ARRAY_SIZE(ar9331_nonvolatile_regs),
+};
+
+static const struct regmap_config ar9331_mdio_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .max_register = AR9331_SW_REG_PAGE,
+
+ .ranges = ar9331_regmap_range,
+ .num_ranges = ARRAY_SIZE(ar9331_regmap_range),
+
+ .volatile_table = &ar9331_volatile_set,
+ .wr_table = &ar9331_register_set,
+ .rd_table = &ar9331_register_set,
+
+ .cache_type = REGCACHE_RBTREE,
+};
+
+static struct regmap_bus ar9331_sw_bus = {
+ .reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
+ .val_format_endian_default = REGMAP_ENDIAN_NATIVE,
+ .read = ar9331_mdio_read,
+ .write = ar9331_sw_bus_write,
+ .max_raw_read = 4,
+ .max_raw_write = 4,
+};
+
+static int ar9331_sw_probe(struct mdio_device *mdiodev)
+{
+ struct ar9331_sw_priv *priv;
+ struct dsa_switch *ds;
+ int ret;
+
+ priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->regmap = devm_regmap_init(&mdiodev->dev, &ar9331_sw_bus, priv,
+ &ar9331_mdio_regmap_config);
+ if (IS_ERR(priv->regmap)) {
+ ret = PTR_ERR(priv->regmap);
+ dev_err(&mdiodev->dev, "regmap init failed: %d\n", ret);
+ return ret;
+ }
+
+ priv->sw_reset = devm_reset_control_get(&mdiodev->dev, "switch");
+ if (IS_ERR(priv->sw_reset)) {
+ dev_err(&mdiodev->dev, "missing switch reset\n");
+ return PTR_ERR(priv->sw_reset);
+ }
+
+ priv->sbus = mdiodev->bus;
+ priv->dev = &mdiodev->dev;
+
+ ret = ar9331_sw_irq_init(priv);
+ if (ret)
+ return ret;
+
+ ds = &priv->ds;
+ ds->dev = &mdiodev->dev;
+ ds->num_ports = AR9331_SW_PORTS;
+ ds->priv = priv;
+ priv->ops = ar9331_sw_ops;
+ ds->ops = &priv->ops;
+ dev_set_drvdata(&mdiodev->dev, priv);
+
+ ret = dsa_register_switch(ds);
+ if (ret)
+ goto err_remove_irq;
+
+ return 0;
+
+err_remove_irq:
+ irq_domain_remove(priv->irqdomain);
+
+ return ret;
+}
+
+static void ar9331_sw_remove(struct mdio_device *mdiodev)
+{
+ struct ar9331_sw_priv *priv = dev_get_drvdata(&mdiodev->dev);
+
+ irq_domain_remove(priv->irqdomain);
+ mdiobus_unregister(priv->mbus);
+ dsa_unregister_switch(&priv->ds);
+
+ reset_control_assert(priv->sw_reset);
+}
+
+static const struct of_device_id ar9331_sw_of_match[] = {
+ { .compatible = "qca,ar9331-switch" },
+ { },
+};
+
+static struct mdio_driver ar9331_sw_mdio_driver = {
+ .probe = ar9331_sw_probe,
+ .remove = ar9331_sw_remove,
+ .mdiodrv.driver = {
+ .name = AR9331_SW_NAME,
+ .of_match_table = ar9331_sw_of_match,
+ },
+};
+
+mdio_module_driver(ar9331_sw_mdio_driver);
+
+MODULE_AUTHOR("Oleksij Rempel <[email protected]>");
+MODULE_DESCRIPTION("Driver for Atheros AR9331 switch");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c
index e548289df31e..9f4205b4439b 100644
--- a/drivers/net/dsa/qca8k.c
+++ b/drivers/net/dsa/qca8k.c
@@ -1017,7 +1017,8 @@ qca8k_port_fdb_dump(struct dsa_switch *ds, int port,
}
static enum dsa_tag_protocol
-qca8k_get_tag_protocol(struct dsa_switch *ds, int port)
+qca8k_get_tag_protocol(struct dsa_switch *ds, int port,
+ enum dsa_tag_protocol mp)
{
return DSA_TAG_PROTO_QCA;
}
diff --git a/drivers/net/dsa/rtl8366rb.c b/drivers/net/dsa/rtl8366rb.c
index f5cc8b0a7c74..fd1977590cb4 100644
--- a/drivers/net/dsa/rtl8366rb.c
+++ b/drivers/net/dsa/rtl8366rb.c
@@ -964,7 +964,8 @@ static int rtl8366rb_setup(struct dsa_switch *ds)
}
static enum dsa_tag_protocol rtl8366_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol mp)
{
/* For now, the RTL switches are handled without any custom tags.
*
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index a51ac088c0bc..784e6b8166a0 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -426,14 +426,6 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
.tpid2 = ETH_P_SJA1105,
};
struct sja1105_table *table;
- int i, k = 0;
-
- for (i = 0; i < SJA1105_NUM_PORTS; i++) {
- if (dsa_is_dsa_port(priv->ds, i))
- default_general_params.casc_port = i;
- else if (dsa_is_user_port(priv->ds, i))
- priv->ports[i].mgmt_slot = k++;
- }
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
@@ -1542,7 +1534,8 @@ static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled)
}
static enum dsa_tag_protocol
-sja1105_get_tag_protocol(struct dsa_switch *ds, int port)
+sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
+ enum dsa_tag_protocol mp)
{
return DSA_TAG_PROTO_SJA1105;
}
@@ -1569,8 +1562,8 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
if (enabled) {
/* Enable VLAN filtering. */
- tpid = ETH_P_8021AD;
- tpid2 = ETH_P_8021Q;
+ tpid = ETH_P_8021Q;
+ tpid2 = ETH_P_8021AD;
} else {
/* Disable VLAN filtering. */
tpid = ETH_P_SJA1105;
@@ -1579,9 +1572,9 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
general_params = table->entries;
- /* EtherType used to identify outer tagged (S-tag) VLAN traffic */
- general_params->tpid = tpid;
/* EtherType used to identify inner tagged (C-tag) VLAN traffic */
+ general_params->tpid = tpid;
+ /* EtherType used to identify outer tagged (S-tag) VLAN traffic */
general_params->tpid2 = tpid2;
/* When VLAN filtering is on, we need to at least be able to
* decode management traffic through the "backup plan".
@@ -1740,6 +1733,16 @@ static int sja1105_setup(struct dsa_switch *ds)
static void sja1105_teardown(struct dsa_switch *ds)
{
struct sja1105_private *priv = ds->priv;
+ int port;
+
+ for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+ struct sja1105_port *sp = &priv->ports[port];
+
+ if (!dsa_is_user_port(ds, port))
+ continue;
+
+ kthread_destroy_worker(sp->xmit_worker);
+ }
sja1105_tas_teardown(ds);
sja1105_ptp_clock_unregister(ds);
@@ -1761,6 +1764,18 @@ static int sja1105_port_enable(struct dsa_switch *ds, int port,
return 0;
}
+static void sja1105_port_disable(struct dsa_switch *ds, int port)
+{
+ struct sja1105_private *priv = ds->priv;
+ struct sja1105_port *sp = &priv->ports[port];
+
+ if (!dsa_is_user_port(ds, port))
+ return;
+
+ kthread_cancel_work_sync(&sp->xmit_work);
+ skb_queue_purge(&sp->xmit_queue);
+}
+
static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot,
struct sk_buff *skb, bool takets)
{
@@ -1819,47 +1834,36 @@ static int sja1105_mgmt_xmit(struct dsa_switch *ds, int port, int slot,
return NETDEV_TX_OK;
}
+#define work_to_port(work) \
+ container_of((work), struct sja1105_port, xmit_work)
+#define tagger_to_sja1105(t) \
+ container_of((t), struct sja1105_private, tagger_data)
+
/* Deferred work is unfortunately necessary because setting up the management
* route cannot be done from atomit context (SPI transfer takes a sleepable
* lock on the bus)
*/
-static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port,
- struct sk_buff *skb)
+static void sja1105_port_deferred_xmit(struct kthread_work *work)
{
- struct sja1105_private *priv = ds->priv;
- struct sja1105_port *sp = &priv->ports[port];
- int slot = sp->mgmt_slot;
- struct sk_buff *clone;
-
- /* The tragic fact about the switch having 4x2 slots for installing
- * management routes is that all of them except one are actually
- * useless.
- * If 2 slots are simultaneously configured for two BPDUs sent to the
- * same (multicast) DMAC but on different egress ports, the switch
- * would confuse them and redirect first frame it receives on the CPU
- * port towards the port configured on the numerically first slot
- * (therefore wrong port), then second received frame on second slot
- * (also wrong port).
- * So for all practical purposes, there needs to be a lock that
- * prevents that from happening. The slot used here is utterly useless
- * (could have simply been 0 just as fine), but we are doing it
- * nonetheless, in case a smarter idea ever comes up in the future.
- */
- mutex_lock(&priv->mgmt_lock);
+ struct sja1105_port *sp = work_to_port(work);
+ struct sja1105_tagger_data *tagger_data = sp->data;
+ struct sja1105_private *priv = tagger_to_sja1105(tagger_data);
+ int port = sp - priv->ports;
+ struct sk_buff *skb;
- /* The clone, if there, was made by dsa_skb_tx_timestamp */
- clone = DSA_SKB_CB(skb)->clone;
+ while ((skb = skb_dequeue(&sp->xmit_queue)) != NULL) {
+ struct sk_buff *clone = DSA_SKB_CB(skb)->clone;
- sja1105_mgmt_xmit(ds, port, slot, skb, !!clone);
+ mutex_lock(&priv->mgmt_lock);
- if (!clone)
- goto out;
+ sja1105_mgmt_xmit(priv->ds, port, 0, skb, !!clone);
- sja1105_ptp_txtstamp_skb(ds, slot, clone);
+ /* The clone, if there, was made by dsa_skb_tx_timestamp */
+ if (clone)
+ sja1105_ptp_txtstamp_skb(priv->ds, port, clone);
-out:
- mutex_unlock(&priv->mgmt_lock);
- return NETDEV_TX_OK;
+ mutex_unlock(&priv->mgmt_lock);
+ }
}
/* The MAXAGE setting belongs to the L2 Forwarding Parameters table,
@@ -1990,6 +1994,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
.get_sset_count = sja1105_get_sset_count,
.get_ts_info = sja1105_get_ts_info,
.port_enable = sja1105_port_enable,
+ .port_disable = sja1105_port_disable,
.port_fdb_dump = sja1105_fdb_dump,
.port_fdb_add = sja1105_fdb_add,
.port_fdb_del = sja1105_fdb_del,
@@ -2003,7 +2008,6 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
.port_mdb_prepare = sja1105_mdb_prepare,
.port_mdb_add = sja1105_mdb_add,
.port_mdb_del = sja1105_mdb_del,
- .port_deferred_xmit = sja1105_port_deferred_xmit,
.port_hwtstamp_get = sja1105_hwtstamp_get,
.port_hwtstamp_set = sja1105_hwtstamp_set,
.port_rxtstamp = sja1105_port_rxtstamp,
@@ -2055,7 +2059,7 @@ static int sja1105_probe(struct spi_device *spi)
struct device *dev = &spi->dev;
struct sja1105_private *priv;
struct dsa_switch *ds;
- int rc, i;
+ int rc, port;
if (!dev->of_node) {
dev_err(dev, "No DTS bindings for SJA1105 driver\n");
@@ -2120,15 +2124,42 @@ static int sja1105_probe(struct spi_device *spi)
return rc;
/* Connections between dsa_port and sja1105_port */
- for (i = 0; i < SJA1105_NUM_PORTS; i++) {
- struct sja1105_port *sp = &priv->ports[i];
+ for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+ struct sja1105_port *sp = &priv->ports[port];
+ struct dsa_port *dp = dsa_to_port(ds, port);
+ struct net_device *slave;
- dsa_to_port(ds, i)->priv = sp;
- sp->dp = dsa_to_port(ds, i);
+ if (!dsa_is_user_port(ds, port))
+ continue;
+
+ dp->priv = sp;
+ sp->dp = dp;
sp->data = tagger_data;
+ slave = dp->slave;
+ kthread_init_work(&sp->xmit_work, sja1105_port_deferred_xmit);
+ sp->xmit_worker = kthread_create_worker(0, "%s_xmit",
+ slave->name);
+ if (IS_ERR(sp->xmit_worker)) {
+ rc = PTR_ERR(sp->xmit_worker);
+ dev_err(ds->dev,
+ "failed to create deferred xmit thread: %d\n",
+ rc);
+ goto out;
+ }
+ skb_queue_head_init(&sp->xmit_queue);
}
return 0;
+out:
+ while (port-- > 0) {
+ struct sja1105_port *sp = &priv->ports[port];
+
+ if (!dsa_is_user_port(ds, port))
+ continue;
+
+ kthread_destroy_worker(sp->xmit_worker);
+ }
+ return rc;
}
static int sja1105_remove(struct spi_device *spi)
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c
index 54258a25031d..a836fc38c4a4 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.c
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.c
@@ -83,6 +83,7 @@ static int sja1105_init_avb_params(struct sja1105_private *priv,
static int sja1105_change_rxtstamping(struct sja1105_private *priv,
bool on)
{
+ struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
struct sja1105_general_params_entry *general_params;
struct sja1105_table *table;
int rc;
@@ -101,6 +102,8 @@ static int sja1105_change_rxtstamping(struct sja1105_private *priv,
kfree_skb(priv->tagger_data.stampable_skb);
priv->tagger_data.stampable_skb = NULL;
}
+ ptp_cancel_worker_sync(ptp_data->clock);
+ skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
return sja1105_static_config_reload(priv, SJA1105_RX_HWTSTAMPING);
}
@@ -234,7 +237,7 @@ int sja1105_ptp_commit(struct dsa_switch *ds, struct sja1105_ptp_cmd *cmd,
if (rw == SPI_WRITE)
priv->info->ptp_cmd_packing(buf, cmd, PACK);
- rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf,
+ rc = sja1105_xfer_buf(priv, rw, regs->ptp_control, buf,
SJA1105_SIZE_PTP_CMD);
if (rw == SPI_READ)
@@ -367,22 +370,16 @@ static int sja1105_ptpclkval_write(struct sja1105_private *priv, u64 ticks,
ptp_sts);
}
-#define rxtstamp_to_tagger(d) \
- container_of((d), struct sja1105_tagger_data, rxtstamp_work)
-#define tagger_to_sja1105(d) \
- container_of((d), struct sja1105_private, tagger_data)
-
-static void sja1105_rxtstamp_work(struct work_struct *work)
+static long sja1105_rxtstamp_work(struct ptp_clock_info *ptp)
{
- struct sja1105_tagger_data *tagger_data = rxtstamp_to_tagger(work);
- struct sja1105_private *priv = tagger_to_sja1105(tagger_data);
- struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
+ struct sja1105_ptp_data *ptp_data = ptp_caps_to_data(ptp);
+ struct sja1105_private *priv = ptp_data_to_sja1105(ptp_data);
struct dsa_switch *ds = priv->ds;
struct sk_buff *skb;
mutex_lock(&ptp_data->lock);
- while ((skb = skb_dequeue(&tagger_data->skb_rxtstamp_queue)) != NULL) {
+ while ((skb = skb_dequeue(&ptp_data->skb_rxtstamp_queue)) != NULL) {
struct skb_shared_hwtstamps *shwt = skb_hwtstamps(skb);
u64 ticks, ts;
int rc;
@@ -404,6 +401,9 @@ static void sja1105_rxtstamp_work(struct work_struct *work)
}
mutex_unlock(&ptp_data->lock);
+
+ /* Don't restart */
+ return -1;
}
/* Called from dsa_skb_defer_rx_timestamp */
@@ -411,16 +411,16 @@ bool sja1105_port_rxtstamp(struct dsa_switch *ds, int port,
struct sk_buff *skb, unsigned int type)
{
struct sja1105_private *priv = ds->priv;
- struct sja1105_tagger_data *tagger_data = &priv->tagger_data;
+ struct sja1105_ptp_data *ptp_data = &priv->ptp_data;
- if (!test_bit(SJA1105_HWTS_RX_EN, &tagger_data->state))
+ if (!test_bit(SJA1105_HWTS_RX_EN, &priv->tagger_data.state))
return false;
/* We need to read the full PTP clock to reconstruct the Rx
* timestamp. For that we need a sleepable context.
*/
- skb_queue_tail(&tagger_data->skb_rxtstamp_queue, skb);
- schedule_work(&tagger_data->rxtstamp_work);
+ skb_queue_tail(&ptp_data->skb_rxtstamp_queue, skb);
+ ptp_schedule_worker(ptp_data->clock, 0);
return true;
}
@@ -628,11 +628,11 @@ int sja1105_ptp_clock_register(struct dsa_switch *ds)
.adjtime = sja1105_ptp_adjtime,
.gettimex64 = sja1105_ptp_gettimex,
.settime64 = sja1105_ptp_settime,
+ .do_aux_work = sja1105_rxtstamp_work,
.max_adj = SJA1105_MAX_ADJ_PPB,
};
- skb_queue_head_init(&tagger_data->skb_rxtstamp_queue);
- INIT_WORK(&tagger_data->rxtstamp_work, sja1105_rxtstamp_work);
+ skb_queue_head_init(&ptp_data->skb_rxtstamp_queue);
spin_lock_init(&tagger_data->meta_lock);
ptp_data->clock = ptp_clock_register(&ptp_data->caps, ds->dev);
@@ -653,13 +653,13 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds)
if (IS_ERR_OR_NULL(ptp_data->clock))
return;
- cancel_work_sync(&priv->tagger_data.rxtstamp_work);
- skb_queue_purge(&priv->tagger_data.skb_rxtstamp_queue);
+ ptp_cancel_worker_sync(ptp_data->clock);
+ skb_queue_purge(&ptp_data->skb_rxtstamp_queue);
ptp_clock_unregister(ptp_data->clock);
ptp_data->clock = NULL;
}
-void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot,
+void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int port,
struct sk_buff *skb)
{
struct sja1105_private *priv = ds->priv;
@@ -679,7 +679,7 @@ void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot,
goto out;
}
- rc = sja1105_ptpegr_ts_poll(ds, slot, &ts);
+ rc = sja1105_ptpegr_ts_poll(ds, port, &ts);
if (rc < 0) {
dev_err(ds->dev, "timed out polling for tstamp\n");
kfree_skb(skb);
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index 470f44b76318..6f4a19eec709 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -30,6 +30,7 @@ struct sja1105_ptp_cmd {
};
struct sja1105_ptp_data {
+ struct sk_buff_head skb_rxtstamp_queue;
struct ptp_clock_info caps;
struct ptp_clock *clock;
struct sja1105_ptp_cmd cmd;
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 0d03e13e9909..63d2311817c4 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -142,6 +142,9 @@ static size_t sja1105et_general_params_entry_packing(void *buf, void *entry_ptr,
return size;
}
+/* TPID and TPID2 are intentionally reversed so that semantic
+ * compatibility with E/T is kept.
+ */
static size_t
sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
enum packing_op op)
@@ -166,9 +169,9 @@ sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
sja1105_packing(buf, &entry->mirr_port, 141, 139, size, op);
sja1105_packing(buf, &entry->vlmarker, 138, 107, size, op);
sja1105_packing(buf, &entry->vlmask, 106, 75, size, op);
- sja1105_packing(buf, &entry->tpid, 74, 59, size, op);
+ sja1105_packing(buf, &entry->tpid2, 74, 59, size, op);
sja1105_packing(buf, &entry->ignore2stf, 58, 58, size, op);
- sja1105_packing(buf, &entry->tpid2, 57, 42, size, op);
+ sja1105_packing(buf, &entry->tpid, 57, 42, size, op);
sja1105_packing(buf, &entry->queue_ts, 41, 41, size, op);
sja1105_packing(buf, &entry->egrmirrvid, 40, 29, size, op);
sja1105_packing(buf, &entry->egrmirrpcp, 28, 26, size, op);
diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c
index 26b925b5dace..fa6750d973d7 100644
--- a/drivers/net/dsa/sja1105/sja1105_tas.c
+++ b/drivers/net/dsa/sja1105/sja1105_tas.c
@@ -477,11 +477,6 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
if (admin->cycle_time_extension)
return -ENOTSUPP;
- if (!ns_to_sja1105_delta(admin->base_time)) {
- dev_err(ds->dev, "A base time of zero is not hardware-allowed\n");
- return -ERANGE;
- }
-
for (i = 0; i < admin->num_entries; i++) {
s64 delta_ns = admin->entries[i].interval;
s64 delta_cycles = ns_to_sja1105_delta(delta_ns);
diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c
index 42c1574d45f2..6e21a2a5cf01 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-core.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-core.c
@@ -542,7 +542,8 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum,
}
static enum dsa_tag_protocol vsc73xx_get_tag_protocol(struct dsa_switch *ds,
- int port)
+ int port,
+ enum dsa_tag_protocol mp)
{
/* The switch internally uses a 8 byte header with length,
* source port, tag, LPA and priority. This is supposedly
@@ -1111,7 +1112,9 @@ static int vsc73xx_gpio_probe(struct vsc73xx *vsc)
vsc->gc.ngpio = 4;
vsc->gc.owner = THIS_MODULE;
vsc->gc.parent = vsc->dev;
+#if IS_ENABLED(CONFIG_OF_GPIO)
vsc->gc.of_node = vsc->dev->of_node;
+#endif
vsc->gc.base = -1;
vsc->gc.get = vsc73xx_gpio_get;
vsc->gc.set = vsc73xx_gpio_set;
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 3da97996bdf3..8cafd06ff0c4 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -196,7 +196,7 @@ static struct net_device_stats *el3_get_stats(struct net_device *dev);
static int el3_rx(struct net_device *dev);
static int el3_close(struct net_device *dev);
static void set_multicast_list(struct net_device *dev);
-static void el3_tx_timeout (struct net_device *dev);
+static void el3_tx_timeout (struct net_device *dev, unsigned int txqueue);
static void el3_down(struct net_device *dev);
static void el3_up(struct net_device *dev);
static const struct ethtool_ops ethtool_ops;
@@ -689,7 +689,7 @@ el3_open(struct net_device *dev)
}
static void
-el3_tx_timeout (struct net_device *dev)
+el3_tx_timeout (struct net_device *dev, unsigned int txqueue)
{
int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index b15752267c8d..1e233e2f0a5a 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -371,7 +371,7 @@ static void corkscrew_timer(struct timer_list *t);
static netdev_tx_t corkscrew_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static int corkscrew_rx(struct net_device *dev);
-static void corkscrew_timeout(struct net_device *dev);
+static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue);
static int boomerang_rx(struct net_device *dev);
static irqreturn_t corkscrew_interrupt(int irq, void *dev_id);
static int corkscrew_close(struct net_device *dev);
@@ -961,7 +961,7 @@ static void corkscrew_timer(struct timer_list *t)
#endif /* AUTOMEDIA */
}
-static void corkscrew_timeout(struct net_device *dev)
+static void corkscrew_timeout(struct net_device *dev, unsigned int txqueue)
{
int i;
struct corkscrew_private *vp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c
index 3044a6f35f04..ef1c3151fbb2 100644
--- a/drivers/net/ethernet/3com/3c574_cs.c
+++ b/drivers/net/ethernet/3com/3c574_cs.c
@@ -234,7 +234,7 @@ static void update_stats(struct net_device *dev);
static struct net_device_stats *el3_get_stats(struct net_device *dev);
static int el3_rx(struct net_device *dev, int worklimit);
static int el3_close(struct net_device *dev);
-static void el3_tx_timeout(struct net_device *dev);
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue);
static int el3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static void set_rx_mode(struct net_device *dev);
static void set_multicast_list(struct net_device *dev);
@@ -690,7 +690,7 @@ static int el3_open(struct net_device *dev)
return 0;
}
-static void el3_tx_timeout(struct net_device *dev)
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
unsigned int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index 2b2695311bda..d47cde6c5f08 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -173,7 +173,7 @@ static void update_stats(struct net_device *dev);
static struct net_device_stats *el3_get_stats(struct net_device *dev);
static int el3_rx(struct net_device *dev);
static int el3_close(struct net_device *dev);
-static void el3_tx_timeout(struct net_device *dev);
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void set_rx_mode(struct net_device *dev);
static void set_multicast_list(struct net_device *dev);
static const struct ethtool_ops netdev_ethtool_ops;
@@ -526,7 +526,7 @@ static int el3_open(struct net_device *dev)
return 0;
}
-static void el3_tx_timeout(struct net_device *dev)
+static void el3_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
unsigned int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 8785c2ff3825..a2b7f7ab8170 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -776,7 +776,7 @@ static void set_rx_mode(struct net_device *dev);
#ifdef CONFIG_PCI
static int vortex_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
#endif
-static void vortex_tx_timeout(struct net_device *dev);
+static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void acpi_set_WOL(struct net_device *dev);
static const struct ethtool_ops vortex_ethtool_ops;
static void set_8021q_mode(struct net_device *dev, int enable);
@@ -1548,7 +1548,7 @@ vortex_up(struct net_device *dev)
struct vortex_private *vp = netdev_priv(dev);
void __iomem *ioaddr = vp->ioaddr;
unsigned int config;
- int i, mii_reg1, mii_reg5, err = 0;
+ int i, mii_reg5, err = 0;
if (VORTEX_PCI(vp)) {
pci_set_power_state(VORTEX_PCI(vp), PCI_D0); /* Go active */
@@ -1605,7 +1605,7 @@ vortex_up(struct net_device *dev)
window_write32(vp, config, 3, Wn3_Config);
if (dev->if_port == XCVR_MII || dev->if_port == XCVR_NWAY) {
- mii_reg1 = mdio_read(dev, vp->phys[0], MII_BMSR);
+ mdio_read(dev, vp->phys[0], MII_BMSR);
mii_reg5 = mdio_read(dev, vp->phys[0], MII_LPA);
vp->partner_flow_ctrl = ((mii_reg5 & 0x0400) != 0);
vp->mii.full_duplex = vp->full_duplex;
@@ -1877,7 +1877,7 @@ leave_media_alone:
iowrite16(FakeIntr, ioaddr + EL3_CMD);
}
-static void vortex_tx_timeout(struct net_device *dev)
+static void vortex_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct vortex_private *vp = netdev_priv(dev);
void __iomem *ioaddr = vp->ioaddr;
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index be823c186517..14fce6658106 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -2013,7 +2013,7 @@ typhoon_stop_runtime(struct typhoon *tp, int wait_type)
}
static void
-typhoon_tx_timeout(struct net_device *dev)
+typhoon_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct typhoon *tp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c
index 78f3e532c600..0e0aa4016858 100644
--- a/drivers/net/ethernet/8390/8390.c
+++ b/drivers/net/ethernet/8390/8390.c
@@ -36,9 +36,9 @@ void ei_set_multicast_list(struct net_device *dev)
}
EXPORT_SYMBOL(ei_set_multicast_list);
-void ei_tx_timeout(struct net_device *dev)
+void ei_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
- __ei_tx_timeout(dev);
+ __ei_tx_timeout(dev, txqueue);
}
EXPORT_SYMBOL(ei_tx_timeout);
diff --git a/drivers/net/ethernet/8390/8390.h b/drivers/net/ethernet/8390/8390.h
index 3e2f2c2e7b58..529c728f334a 100644
--- a/drivers/net/ethernet/8390/8390.h
+++ b/drivers/net/ethernet/8390/8390.h
@@ -32,7 +32,7 @@ void NS8390_init(struct net_device *dev, int startp);
int ei_open(struct net_device *dev);
int ei_close(struct net_device *dev);
irqreturn_t ei_interrupt(int irq, void *dev_id);
-void ei_tx_timeout(struct net_device *dev);
+void ei_tx_timeout(struct net_device *dev, unsigned int txqueue);
netdev_tx_t ei_start_xmit(struct sk_buff *skb, struct net_device *dev);
void ei_set_multicast_list(struct net_device *dev);
struct net_device_stats *ei_get_stats(struct net_device *dev);
@@ -50,7 +50,7 @@ void NS8390p_init(struct net_device *dev, int startp);
int eip_open(struct net_device *dev);
int eip_close(struct net_device *dev);
irqreturn_t eip_interrupt(int irq, void *dev_id);
-void eip_tx_timeout(struct net_device *dev);
+void eip_tx_timeout(struct net_device *dev, unsigned int txqueue);
netdev_tx_t eip_start_xmit(struct sk_buff *skb, struct net_device *dev);
void eip_set_multicast_list(struct net_device *dev);
struct net_device_stats *eip_get_stats(struct net_device *dev);
diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c
index 6cf36992a2c6..6834742057b3 100644
--- a/drivers/net/ethernet/8390/8390p.c
+++ b/drivers/net/ethernet/8390/8390p.c
@@ -41,9 +41,9 @@ void eip_set_multicast_list(struct net_device *dev)
}
EXPORT_SYMBOL(eip_set_multicast_list);
-void eip_tx_timeout(struct net_device *dev)
+void eip_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
- __ei_tx_timeout(dev);
+ __ei_tx_timeout(dev, txqueue);
}
EXPORT_SYMBOL(eip_tx_timeout);
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 0b6bbf63f7ca..aeae7966a082 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -83,7 +83,7 @@ static netdev_tx_t axnet_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static struct net_device_stats *get_stats(struct net_device *dev);
static void set_multicast_list(struct net_device *dev);
-static void axnet_tx_timeout(struct net_device *dev);
+static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue);
static irqreturn_t ei_irq_wrapper(int irq, void *dev_id);
static void ei_watchdog(struct timer_list *t);
static void axnet_reset_8390(struct net_device *dev);
@@ -903,7 +903,7 @@ static int ax_close(struct net_device *dev)
* completed (or failed) - i.e. never posted a Tx related interrupt.
*/
-static void axnet_tx_timeout(struct net_device *dev)
+static void axnet_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
long e8390_base = dev->base_addr;
struct ei_device *ei_local = netdev_priv(dev);
diff --git a/drivers/net/ethernet/8390/lib8390.c b/drivers/net/ethernet/8390/lib8390.c
index c9c55c9eab9f..babc92e2692e 100644
--- a/drivers/net/ethernet/8390/lib8390.c
+++ b/drivers/net/ethernet/8390/lib8390.c
@@ -251,7 +251,7 @@ static int __ei_close(struct net_device *dev)
* completed (or failed) - i.e. never posted a Tx related interrupt.
*/
-static void __ei_tx_timeout(struct net_device *dev)
+static void __ei_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
unsigned long e8390_base = dev->base_addr;
struct ei_device *ei_local = netdev_priv(dev);
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 816540e6beac..165d18405b0c 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -576,7 +576,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location);
static void mdio_write(struct net_device *dev, int phy_id, int location, int value);
static int netdev_open(struct net_device *dev);
static void check_duplex(struct net_device *dev);
-static void tx_timeout(struct net_device *dev);
+static void tx_timeout(struct net_device *dev, unsigned int txqueue);
static void init_ring(struct net_device *dev);
static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t intr_handler(int irq, void *dev_instance);
@@ -1105,7 +1105,7 @@ static void check_duplex(struct net_device *dev)
}
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct netdev_private *np = netdev_priv(dev);
void __iomem *ioaddr = np->base;
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 174344c450af..3c51d8c502ed 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -3811,7 +3811,7 @@ drop_err:
* specified by the 'tx_timeo" element in the net_device structure (see
* et131x_alloc_device() to see how this value is set).
*/
-static void et131x_tx_timeout(struct net_device *netdev)
+static void et131x_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct et131x_adapter *adapter = netdev_priv(netdev);
struct tx_ring *tx_ring = &adapter->tx_ring;
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 0537df06a9b5..5ea806423e4c 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -407,7 +407,7 @@ static void emac_init_device(struct net_device *dev)
}
/* Our watchdog timed out. Called by the networking layer */
-static void emac_timeout(struct net_device *dev)
+static void emac_timeout(struct net_device *dev, unsigned int txqueue)
{
struct emac_board_info *db = netdev_priv(dev);
unsigned long flags;
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index 46b4207d3266..f366faf88eee 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -437,7 +437,7 @@ static const struct ethtool_ops ace_ethtool_ops = {
.set_link_ksettings = ace_set_link_ksettings,
};
-static void ace_watchdog(struct net_device *dev);
+static void ace_watchdog(struct net_device *dev, unsigned int txqueue);
static const struct net_device_ops ace_netdev_ops = {
.ndo_open = ace_open,
@@ -1542,7 +1542,7 @@ static void ace_set_rxtx_parms(struct net_device *dev, int jumbo)
}
-static void ace_watchdog(struct net_device *data)
+static void ace_watchdog(struct net_device *data, unsigned int txqueue)
{
struct net_device *dev = data;
struct ace_private *ap = netdev_priv(dev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 7c941eba0bc9..0ce37d54ed10 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -72,7 +72,7 @@
/*****************************************************************************/
/* ENA adaptive interrupt moderation settings */
-#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196
+#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 64
#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0
#define ENA_DEFAULT_INTR_DELAY_RESOLUTION 1
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index a3250dcf7d53..b4e891d49a94 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -315,10 +315,9 @@ static int ena_get_coalesce(struct net_device *net_dev,
ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) *
ena_dev->intr_delay_resolution;
- if (!ena_com_get_adaptive_moderation_enabled(ena_dev))
- coalesce->rx_coalesce_usecs =
- ena_com_get_nonadaptive_moderation_interval_rx(ena_dev)
- * ena_dev->intr_delay_resolution;
+ coalesce->rx_coalesce_usecs =
+ ena_com_get_nonadaptive_moderation_interval_rx(ena_dev)
+ * ena_dev->intr_delay_resolution;
coalesce->use_adaptive_rx_coalesce =
ena_com_get_adaptive_moderation_enabled(ena_dev);
@@ -367,12 +366,6 @@ static int ena_set_coalesce(struct net_device *net_dev,
ena_update_tx_rings_intr_moderation(adapter);
- if (coalesce->use_adaptive_rx_coalesce) {
- if (!ena_com_get_adaptive_moderation_enabled(ena_dev))
- ena_com_enable_adaptive_moderation(ena_dev);
- return 0;
- }
-
rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
coalesce->rx_coalesce_usecs);
if (rc)
@@ -380,10 +373,13 @@ static int ena_set_coalesce(struct net_device *net_dev,
ena_update_rx_rings_intr_moderation(adapter);
- if (!coalesce->use_adaptive_rx_coalesce) {
- if (ena_com_get_adaptive_moderation_enabled(ena_dev))
- ena_com_disable_adaptive_moderation(ena_dev);
- }
+ if (coalesce->use_adaptive_rx_coalesce &&
+ !ena_com_get_adaptive_moderation_enabled(ena_dev))
+ ena_com_enable_adaptive_moderation(ena_dev);
+
+ if (!coalesce->use_adaptive_rx_coalesce &&
+ ena_com_get_adaptive_moderation_enabled(ena_dev))
+ ena_com_disable_adaptive_moderation(ena_dev);
return 0;
}
@@ -744,7 +740,9 @@ static int ena_set_channels(struct net_device *netdev,
struct ena_adapter *adapter = netdev_priv(netdev);
u32 count = channels->combined_count;
/* The check for max value is already done in ethtool */
- if (count < ENA_MIN_NUM_IO_QUEUES)
+ if (count < ENA_MIN_NUM_IO_QUEUES ||
+ (ena_xdp_present(adapter) &&
+ !ena_xdp_legal_queue_count(adapter, channels->combined_count)))
return -EINVAL;
return ena_update_queue_count(adapter, count);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index d46a912002ff..894e8c1a8cf1 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -36,7 +36,6 @@
#include <linux/cpu_rmap.h>
#endif /* CONFIG_RFS_ACCEL */
#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/numa.h>
@@ -47,6 +46,7 @@
#include <net/ip.h>
#include "ena_netdev.h"
+#include <linux/bpf_trace.h>
#include "ena_pci_id_tbl.h"
static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n";
@@ -78,7 +78,37 @@ static void check_for_admin_com_state(struct ena_adapter *adapter);
static void ena_destroy_device(struct ena_adapter *adapter, bool graceful);
static int ena_restore_device(struct ena_adapter *adapter);
-static void ena_tx_timeout(struct net_device *dev)
+static void ena_init_io_rings(struct ena_adapter *adapter,
+ int first_index, int count);
+static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index,
+ int count);
+static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index,
+ int count);
+static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid);
+static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count);
+static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid);
+static void ena_free_tx_resources(struct ena_adapter *adapter, int qid);
+static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget);
+static void ena_destroy_all_tx_queues(struct ena_adapter *adapter);
+static void ena_free_all_io_tx_resources(struct ena_adapter *adapter);
+static void ena_napi_disable_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+static void ena_napi_enable_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+static int ena_up(struct ena_adapter *adapter);
+static void ena_down(struct ena_adapter *adapter);
+static void ena_unmask_interrupt(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring);
+static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
+ struct ena_ring *rx_ring);
+static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info);
+static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+ int first_index, int count);
+
+static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ena_adapter *adapter = netdev_priv(dev);
@@ -123,6 +153,448 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu)
return ret;
}
+static int ena_xmit_common(struct net_device *dev,
+ struct ena_ring *ring,
+ struct ena_tx_buffer *tx_info,
+ struct ena_com_tx_ctx *ena_tx_ctx,
+ u16 next_to_use,
+ u32 bytes)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+ int rc, nb_hw_desc;
+
+ if (unlikely(ena_com_is_doorbell_needed(ring->ena_com_io_sq,
+ ena_tx_ctx))) {
+ netif_dbg(adapter, tx_queued, dev,
+ "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
+ ring->qid);
+ ena_com_write_sq_doorbell(ring->ena_com_io_sq);
+ }
+
+ /* prepare the packet's descriptors to dma engine */
+ rc = ena_com_prepare_tx(ring->ena_com_io_sq, ena_tx_ctx,
+ &nb_hw_desc);
+
+ /* In case there isn't enough space in the queue for the packet,
+ * we simply drop it. All other failure reasons of
+ * ena_com_prepare_tx() are fatal and therefore require a device reset.
+ */
+ if (unlikely(rc)) {
+ netif_err(adapter, tx_queued, dev,
+ "failed to prepare tx bufs\n");
+ u64_stats_update_begin(&ring->syncp);
+ ring->tx_stats.prepare_ctx_err++;
+ u64_stats_update_end(&ring->syncp);
+ if (rc != -ENOMEM) {
+ adapter->reset_reason =
+ ENA_REGS_RESET_DRIVER_INVALID_STATE;
+ set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ }
+ return rc;
+ }
+
+ u64_stats_update_begin(&ring->syncp);
+ ring->tx_stats.cnt++;
+ ring->tx_stats.bytes += bytes;
+ u64_stats_update_end(&ring->syncp);
+
+ tx_info->tx_descs = nb_hw_desc;
+ tx_info->last_jiffies = jiffies;
+ tx_info->print_once = 0;
+
+ ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
+ ring->ring_size);
+ return 0;
+}
+
+/* This is the XDP napi callback. XDP queues use a separate napi callback
+ * than Rx/Tx queues.
+ */
+static int ena_xdp_io_poll(struct napi_struct *napi, int budget)
+{
+ struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
+ u32 xdp_work_done, xdp_budget;
+ struct ena_ring *xdp_ring;
+ int napi_comp_call = 0;
+ int ret;
+
+ xdp_ring = ena_napi->xdp_ring;
+ xdp_ring->first_interrupt = ena_napi->first_interrupt;
+
+ xdp_budget = budget;
+
+ if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) ||
+ test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) {
+ napi_complete_done(napi, 0);
+ return 0;
+ }
+
+ xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget);
+
+ /* If the device is about to reset or down, avoid unmask
+ * the interrupt and return 0 so NAPI won't reschedule
+ */
+ if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) {
+ napi_complete_done(napi, 0);
+ ret = 0;
+ } else if (xdp_budget > xdp_work_done) {
+ napi_comp_call = 1;
+ if (napi_complete_done(napi, xdp_work_done))
+ ena_unmask_interrupt(xdp_ring, NULL);
+ ena_update_ring_numa_node(xdp_ring, NULL);
+ ret = xdp_work_done;
+ } else {
+ ret = xdp_budget;
+ }
+
+ u64_stats_update_begin(&xdp_ring->syncp);
+ xdp_ring->tx_stats.napi_comp += napi_comp_call;
+ xdp_ring->tx_stats.tx_poll++;
+ u64_stats_update_end(&xdp_ring->syncp);
+
+ return ret;
+}
+
+static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring,
+ struct ena_tx_buffer *tx_info,
+ struct xdp_buff *xdp,
+ void **push_hdr,
+ u32 *push_len)
+{
+ struct ena_adapter *adapter = xdp_ring->adapter;
+ struct ena_com_buf *ena_buf;
+ dma_addr_t dma = 0;
+ u32 size;
+
+ tx_info->xdpf = convert_to_xdp_frame(xdp);
+ size = tx_info->xdpf->len;
+ ena_buf = tx_info->bufs;
+
+ /* llq push buffer */
+ *push_len = min_t(u32, size, xdp_ring->tx_max_header_size);
+ *push_hdr = tx_info->xdpf->data;
+
+ if (size - *push_len > 0) {
+ dma = dma_map_single(xdp_ring->dev,
+ *push_hdr + *push_len,
+ size - *push_len,
+ DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(xdp_ring->dev, dma)))
+ goto error_report_dma_error;
+
+ tx_info->map_linear_data = 1;
+ tx_info->num_of_bufs = 1;
+ }
+
+ ena_buf->paddr = dma;
+ ena_buf->len = size;
+
+ return 0;
+
+error_report_dma_error:
+ u64_stats_update_begin(&xdp_ring->syncp);
+ xdp_ring->tx_stats.dma_mapping_err++;
+ u64_stats_update_end(&xdp_ring->syncp);
+ netdev_warn(adapter->netdev, "failed to map xdp buff\n");
+
+ xdp_return_frame_rx_napi(tx_info->xdpf);
+ tx_info->xdpf = NULL;
+ tx_info->num_of_bufs = 0;
+
+ return -EINVAL;
+}
+
+static int ena_xdp_xmit_buff(struct net_device *dev,
+ struct xdp_buff *xdp,
+ int qid,
+ struct ena_rx_buffer *rx_info)
+{
+ struct ena_adapter *adapter = netdev_priv(dev);
+ struct ena_com_tx_ctx ena_tx_ctx = {0};
+ struct ena_tx_buffer *tx_info;
+ struct ena_ring *xdp_ring;
+ u16 next_to_use, req_id;
+ int rc;
+ void *push_hdr;
+ u32 push_len;
+
+ xdp_ring = &adapter->tx_ring[qid];
+ next_to_use = xdp_ring->next_to_use;
+ req_id = xdp_ring->free_ids[next_to_use];
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
+ tx_info->num_of_bufs = 0;
+ page_ref_inc(rx_info->page);
+ tx_info->xdp_rx_page = rx_info->page;
+
+ rc = ena_xdp_tx_map_buff(xdp_ring, tx_info, xdp, &push_hdr, &push_len);
+ if (unlikely(rc))
+ goto error_drop_packet;
+
+ ena_tx_ctx.ena_bufs = tx_info->bufs;
+ ena_tx_ctx.push_header = push_hdr;
+ ena_tx_ctx.num_bufs = tx_info->num_of_bufs;
+ ena_tx_ctx.req_id = req_id;
+ ena_tx_ctx.header_len = push_len;
+
+ rc = ena_xmit_common(dev,
+ xdp_ring,
+ tx_info,
+ &ena_tx_ctx,
+ next_to_use,
+ xdp->data_end - xdp->data);
+ if (rc)
+ goto error_unmap_dma;
+ /* trigger the dma engine. ena_com_write_sq_doorbell()
+ * has a mb
+ */
+ ena_com_write_sq_doorbell(xdp_ring->ena_com_io_sq);
+ u64_stats_update_begin(&xdp_ring->syncp);
+ xdp_ring->tx_stats.doorbells++;
+ u64_stats_update_end(&xdp_ring->syncp);
+
+ return NETDEV_TX_OK;
+
+error_unmap_dma:
+ ena_unmap_tx_buff(xdp_ring, tx_info);
+ tx_info->xdpf = NULL;
+error_drop_packet:
+
+ return NETDEV_TX_OK;
+}
+
+static int ena_xdp_execute(struct ena_ring *rx_ring,
+ struct xdp_buff *xdp,
+ struct ena_rx_buffer *rx_info)
+{
+ struct bpf_prog *xdp_prog;
+ u32 verdict = XDP_PASS;
+
+ rcu_read_lock();
+ xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog);
+
+ if (!xdp_prog)
+ goto out;
+
+ verdict = bpf_prog_run_xdp(xdp_prog, xdp);
+
+ if (verdict == XDP_TX)
+ ena_xdp_xmit_buff(rx_ring->netdev,
+ xdp,
+ rx_ring->qid + rx_ring->adapter->num_io_queues,
+ rx_info);
+ else if (unlikely(verdict == XDP_ABORTED))
+ trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict);
+ else if (unlikely(verdict > XDP_TX))
+ bpf_warn_invalid_xdp_action(verdict);
+out:
+ rcu_read_unlock();
+ return verdict;
+}
+
+static void ena_init_all_xdp_queues(struct ena_adapter *adapter)
+{
+ adapter->xdp_first_ring = adapter->num_io_queues;
+ adapter->xdp_num_queues = adapter->num_io_queues;
+
+ ena_init_io_rings(adapter,
+ adapter->xdp_first_ring,
+ adapter->xdp_num_queues);
+}
+
+static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter)
+{
+ int rc = 0;
+
+ rc = ena_setup_tx_resources_in_range(adapter, adapter->xdp_first_ring,
+ adapter->xdp_num_queues);
+ if (rc)
+ goto setup_err;
+
+ rc = ena_create_io_tx_queues_in_range(adapter,
+ adapter->xdp_first_ring,
+ adapter->xdp_num_queues);
+ if (rc)
+ goto create_err;
+
+ return 0;
+
+create_err:
+ ena_free_all_io_tx_resources(adapter);
+setup_err:
+ return rc;
+}
+
+/* Provides a way for both kernel and bpf-prog to know
+ * more about the RX-queue a given XDP frame arrived on.
+ */
+static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
+{
+ int rc;
+
+ rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid);
+
+ if (rc) {
+ netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+ "Failed to register xdp rx queue info. RX queue num %d rc: %d\n",
+ rx_ring->qid, rc);
+ goto err;
+ }
+
+ rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED,
+ NULL);
+
+ if (rc) {
+ netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
+ "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n",
+ rx_ring->qid, rc);
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+ }
+
+err:
+ return rc;
+}
+
+static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring)
+{
+ xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq);
+ xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
+}
+
+void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter,
+ struct bpf_prog *prog,
+ int first,
+ int count)
+{
+ struct ena_ring *rx_ring;
+ int i = 0;
+
+ for (i = first; i < count; i++) {
+ rx_ring = &adapter->rx_ring[i];
+ xchg(&rx_ring->xdp_bpf_prog, prog);
+ if (prog) {
+ ena_xdp_register_rxq_info(rx_ring);
+ rx_ring->rx_headroom = XDP_PACKET_HEADROOM;
+ } else {
+ ena_xdp_unregister_rxq_info(rx_ring);
+ rx_ring->rx_headroom = 0;
+ }
+ }
+}
+
+void ena_xdp_exchange_program(struct ena_adapter *adapter,
+ struct bpf_prog *prog)
+{
+ struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog);
+
+ ena_xdp_exchange_program_rx_in_range(adapter,
+ prog,
+ 0,
+ adapter->num_io_queues);
+
+ if (old_bpf_prog)
+ bpf_prog_put(old_bpf_prog);
+}
+
+static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter)
+{
+ bool was_up;
+ int rc;
+
+ was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+
+ if (was_up)
+ ena_down(adapter);
+
+ adapter->xdp_first_ring = 0;
+ adapter->xdp_num_queues = 0;
+ ena_xdp_exchange_program(adapter, NULL);
+ if (was_up) {
+ rc = ena_up(adapter);
+ if (rc)
+ return rc;
+ }
+ return 0;
+}
+
+static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+ struct bpf_prog *prog = bpf->prog;
+ struct bpf_prog *old_bpf_prog;
+ int rc, prev_mtu;
+ bool is_up;
+
+ is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
+ rc = ena_xdp_allowed(adapter);
+ if (rc == ENA_XDP_ALLOWED) {
+ old_bpf_prog = adapter->xdp_bpf_prog;
+ if (prog) {
+ if (!is_up) {
+ ena_init_all_xdp_queues(adapter);
+ } else if (!old_bpf_prog) {
+ ena_down(adapter);
+ ena_init_all_xdp_queues(adapter);
+ }
+ ena_xdp_exchange_program(adapter, prog);
+
+ if (is_up && !old_bpf_prog) {
+ rc = ena_up(adapter);
+ if (rc)
+ return rc;
+ }
+ } else if (old_bpf_prog) {
+ rc = ena_destroy_and_free_all_xdp_queues(adapter);
+ if (rc)
+ return rc;
+ }
+
+ prev_mtu = netdev->max_mtu;
+ netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu;
+
+ if (!old_bpf_prog)
+ netif_info(adapter, drv, adapter->netdev,
+ "xdp program set, changing the max_mtu from %d to %d",
+ prev_mtu, netdev->max_mtu);
+
+ } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on",
+ netdev->mtu, ENA_XDP_MAX_MTU);
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info");
+ return -EINVAL;
+ } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) {
+ netif_err(adapter, drv, adapter->netdev,
+ "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n",
+ adapter->num_io_queues, adapter->max_num_io_queues);
+ NL_SET_ERR_MSG_MOD(bpf->extack,
+ "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* This is the main xdp callback, it's used by the kernel to set/unset the xdp
+ * program as well as to query the current xdp program id.
+ */
+static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf)
+{
+ struct ena_adapter *adapter = netdev_priv(netdev);
+
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ return ena_xdp_set(netdev, bpf);
+ case XDP_QUERY_PROG:
+ bpf->prog_id = adapter->xdp_bpf_prog ?
+ adapter->xdp_bpf_prog->aux->id : 0;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter)
{
#ifdef CONFIG_RFS_ACCEL
@@ -164,7 +636,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter,
u64_stats_init(&ring->syncp);
}
-static void ena_init_io_rings(struct ena_adapter *adapter)
+static void ena_init_io_rings(struct ena_adapter *adapter,
+ int first_index, int count)
{
struct ena_com_dev *ena_dev;
struct ena_ring *txr, *rxr;
@@ -172,13 +645,12 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
ena_dev = adapter->ena_dev;
- for (i = 0; i < adapter->num_io_queues; i++) {
+ for (i = first_index; i < first_index + count; i++) {
txr = &adapter->tx_ring[i];
rxr = &adapter->rx_ring[i];
- /* TX/RX common ring state */
+ /* TX common ring state */
ena_init_io_rings_common(adapter, txr, i);
- ena_init_io_rings_common(adapter, rxr, i);
/* TX specific ring state */
txr->ring_size = adapter->requested_tx_ring_size;
@@ -188,14 +660,20 @@ static void ena_init_io_rings(struct ena_adapter *adapter)
txr->smoothed_interval =
ena_com_get_nonadaptive_moderation_interval_tx(ena_dev);
- /* RX specific ring state */
- rxr->ring_size = adapter->requested_rx_ring_size;
- rxr->rx_copybreak = adapter->rx_copybreak;
- rxr->sgl_size = adapter->max_rx_sgl_size;
- rxr->smoothed_interval =
- ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
- rxr->empty_rx_queue = 0;
- adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ /* Don't init RX queues for xdp queues */
+ if (!ENA_IS_XDP_INDEX(adapter, i)) {
+ /* RX common ring state */
+ ena_init_io_rings_common(adapter, rxr, i);
+
+ /* RX specific ring state */
+ rxr->ring_size = adapter->requested_rx_ring_size;
+ rxr->rx_copybreak = adapter->rx_copybreak;
+ rxr->sgl_size = adapter->max_rx_sgl_size;
+ rxr->smoothed_interval =
+ ena_com_get_nonadaptive_moderation_interval_rx(ena_dev);
+ rxr->empty_rx_queue = 0;
+ adapter->ena_napi[i].dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+ }
}
}
@@ -285,16 +763,13 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid)
tx_ring->push_buf_intermediate_buf = NULL;
}
-/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues
- * @adapter: private structure
- *
- * Return 0 on success, negative on failure
- */
-static int ena_setup_all_tx_resources(struct ena_adapter *adapter)
+static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count)
{
int i, rc = 0;
- for (i = 0; i < adapter->num_io_queues; i++) {
+ for (i = first_index; i < first_index + count; i++) {
rc = ena_setup_tx_resources(adapter, i);
if (rc)
goto err_setup_tx;
@@ -308,11 +783,20 @@ err_setup_tx:
"Tx queue %d: allocation failed\n", i);
/* rewind the index freeing the rings as we go */
- while (i--)
+ while (first_index < i--)
ena_free_tx_resources(adapter, i);
return rc;
}
+static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
+{
+ int i;
+
+ for (i = first_index; i < first_index + count; i++)
+ ena_free_tx_resources(adapter, i);
+}
+
/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues
* @adapter: board private structure
*
@@ -320,10 +804,10 @@ err_setup_tx:
*/
static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
{
- int i;
-
- for (i = 0; i < adapter->num_io_queues; i++)
- ena_free_tx_resources(adapter, i);
+ ena_free_all_io_tx_resources_in_range(adapter,
+ 0,
+ adapter->xdp_num_queues +
+ adapter->num_io_queues);
}
static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
@@ -495,8 +979,8 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
rx_info->page = page;
rx_info->page_offset = 0;
ena_buf = &rx_info->ena_buf;
- ena_buf->paddr = dma;
- ena_buf->len = ENA_PAGE_SIZE;
+ ena_buf->paddr = dma + rx_ring->rx_headroom;
+ ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom;
return 0;
}
@@ -513,7 +997,9 @@ static void ena_free_rx_page(struct ena_ring *rx_ring,
return;
}
- dma_unmap_page(rx_ring->dev, ena_buf->paddr, ENA_PAGE_SIZE,
+ dma_unmap_page(rx_ring->dev,
+ ena_buf->paddr - rx_ring->rx_headroom,
+ ENA_PAGE_SIZE,
DMA_FROM_DEVICE);
__free_page(page);
@@ -620,8 +1106,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter)
ena_free_rx_bufs(adapter, i);
}
-static void ena_unmap_tx_skb(struct ena_ring *tx_ring,
- struct ena_tx_buffer *tx_info)
+static void ena_unmap_tx_buff(struct ena_ring *tx_ring,
+ struct ena_tx_buffer *tx_info)
{
struct ena_com_buf *ena_buf;
u32 cnt;
@@ -675,7 +1161,7 @@ static void ena_free_tx_bufs(struct ena_ring *tx_ring)
tx_ring->qid, i);
}
- ena_unmap_tx_skb(tx_ring, tx_info);
+ ena_unmap_tx_buff(tx_ring, tx_info);
dev_kfree_skb_any(tx_info->skb);
}
@@ -688,7 +1174,7 @@ static void ena_free_all_tx_bufs(struct ena_adapter *adapter)
struct ena_ring *tx_ring;
int i;
- for (i = 0; i < adapter->num_io_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
tx_ring = &adapter->tx_ring[i];
ena_free_tx_bufs(tx_ring);
}
@@ -699,7 +1185,7 @@ static void ena_destroy_all_tx_queues(struct ena_adapter *adapter)
u16 ena_qid;
int i;
- for (i = 0; i < adapter->num_io_queues; i++) {
+ for (i = 0; i < adapter->num_io_queues + adapter->xdp_num_queues; i++) {
ena_qid = ENA_IO_TXQ_IDX(i);
ena_com_destroy_io_queue(adapter->ena_dev, ena_qid);
}
@@ -723,6 +1209,32 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter)
ena_destroy_all_rx_queues(adapter);
}
+static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id,
+ struct ena_tx_buffer *tx_info, bool is_xdp)
+{
+ if (tx_info)
+ netif_err(ring->adapter,
+ tx_done,
+ ring->netdev,
+ "tx_info doesn't have valid %s",
+ is_xdp ? "xdp frame" : "skb");
+ else
+ netif_err(ring->adapter,
+ tx_done,
+ ring->netdev,
+ "Invalid req_id: %hu\n",
+ req_id);
+
+ u64_stats_update_begin(&ring->syncp);
+ ring->tx_stats.bad_req_id++;
+ u64_stats_update_end(&ring->syncp);
+
+ /* Trigger device reset */
+ ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
+ set_bit(ENA_FLAG_TRIGGER_RESET, &ring->adapter->flags);
+ return -EFAULT;
+}
+
static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
{
struct ena_tx_buffer *tx_info = NULL;
@@ -733,21 +1245,20 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id)
return 0;
}
- if (tx_info)
- netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
- "tx_info doesn't have valid skb\n");
- else
- netif_err(tx_ring->adapter, tx_done, tx_ring->netdev,
- "Invalid req_id: %hu\n", req_id);
+ return handle_invalid_req_id(tx_ring, req_id, tx_info, false);
+}
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->tx_stats.bad_req_id++;
- u64_stats_update_end(&tx_ring->syncp);
+static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id)
+{
+ struct ena_tx_buffer *tx_info = NULL;
- /* Trigger device reset */
- tx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_TX_REQ_ID;
- set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags);
- return -EFAULT;
+ if (likely(req_id < xdp_ring->ring_size)) {
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
+ if (likely(tx_info->xdpf))
+ return 0;
+ }
+
+ return handle_invalid_req_id(xdp_ring, req_id, tx_info, true);
}
static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
@@ -786,7 +1297,7 @@ static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget)
tx_info->skb = NULL;
tx_info->last_jiffies = 0;
- ena_unmap_tx_skb(tx_ring, tx_info);
+ ena_unmap_tx_buff(tx_ring, tx_info);
netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev,
"tx_poll: q %d skb %p completed\n", tx_ring->qid,
@@ -1037,6 +1548,33 @@ static void ena_set_rx_hash(struct ena_ring *rx_ring,
}
}
+int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
+{
+ struct ena_rx_buffer *rx_info;
+ int ret;
+
+ rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
+ xdp->data = page_address(rx_info->page) +
+ rx_info->page_offset + rx_ring->rx_headroom;
+ xdp_set_data_meta_invalid(xdp);
+ xdp->data_hard_start = page_address(rx_info->page);
+ xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
+ /* If for some reason we received a bigger packet than
+ * we expect, then we simply drop it
+ */
+ if (unlikely(rx_ring->ena_bufs[0].len > ENA_XDP_MAX_MTU))
+ return XDP_DROP;
+
+ ret = ena_xdp_execute(rx_ring, xdp, rx_info);
+
+ /* The xdp program might expand the headers */
+ if (ret == XDP_PASS) {
+ rx_info->page_offset = xdp->data - xdp->data_hard_start;
+ rx_ring->ena_bufs[0].len = xdp->data_end - xdp->data;
+ }
+
+ return ret;
+}
/* ena_clean_rx_irq - Cleanup RX irq
* @rx_ring: RX ring to clean
* @napi: napi handler
@@ -1048,23 +1586,27 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
u32 budget)
{
u16 next_to_clean = rx_ring->next_to_clean;
- u32 res_budget, work_done;
-
struct ena_com_rx_ctx ena_rx_ctx;
struct ena_adapter *adapter;
+ u32 res_budget, work_done;
+ int rx_copybreak_pkt = 0;
+ int refill_threshold;
struct sk_buff *skb;
int refill_required;
- int refill_threshold;
- int rc = 0;
+ struct xdp_buff xdp;
int total_len = 0;
- int rx_copybreak_pkt = 0;
+ int xdp_verdict;
+ int rc = 0;
int i;
netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
"%s qid %d\n", __func__, rx_ring->qid);
res_budget = budget;
+ xdp.rxq = &rx_ring->xdp_rxq;
do {
+ xdp_verdict = XDP_PASS;
+ skb = NULL;
ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
ena_rx_ctx.max_bufs = rx_ring->sgl_size;
ena_rx_ctx.descs = 0;
@@ -1082,12 +1624,22 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
ena_rx_ctx.l4_proto, ena_rx_ctx.hash);
+ if (ena_xdp_present_ring(rx_ring))
+ xdp_verdict = ena_xdp_handle_buff(rx_ring, &xdp);
+
/* allocate skb and fill it */
- skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs,
- &next_to_clean);
+ if (xdp_verdict == XDP_PASS)
+ skb = ena_rx_skb(rx_ring,
+ rx_ring->ena_bufs,
+ ena_rx_ctx.descs,
+ &next_to_clean);
- /* exit if we failed to retrieve a buffer */
if (unlikely(!skb)) {
+ if (xdp_verdict == XDP_TX) {
+ ena_free_rx_page(rx_ring,
+ &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id]);
+ res_budget--;
+ }
for (i = 0; i < ena_rx_ctx.descs; i++) {
rx_ring->free_ids[next_to_clean] =
rx_ring->ena_bufs[i].req_id;
@@ -1095,6 +1647,8 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
ENA_RX_RING_IDX_NEXT(next_to_clean,
rx_ring->ring_size);
}
+ if (xdp_verdict == XDP_TX || xdp_verdict == XDP_DROP)
+ continue;
break;
}
@@ -1188,9 +1742,14 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
struct ena_ring *rx_ring)
{
struct ena_eth_io_intr_reg intr_reg;
- u32 rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
- rx_ring->smoothed_interval :
- ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
+ u32 rx_interval = 0;
+ /* Rx ring can be NULL when for XDP tx queues which don't have an
+ * accompanying rx_ring pair.
+ */
+ if (rx_ring)
+ rx_interval = ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev) ?
+ rx_ring->smoothed_interval :
+ ena_com_get_nonadaptive_moderation_interval_rx(rx_ring->ena_dev);
/* Update intr register: rx intr delay,
* tx intr delay and interrupt unmask
@@ -1203,8 +1762,9 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
/* It is a shared MSI-X.
* Tx and Rx CQ have pointer to it.
* So we use one of them to reach the intr reg
+ * The Tx ring is used because the rx_ring is NULL for XDP queues
*/
- ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg);
+ ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg);
}
static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
@@ -1222,24 +1782,84 @@ static void ena_update_ring_numa_node(struct ena_ring *tx_ring,
if (numa_node != NUMA_NO_NODE) {
ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node);
- ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node);
+ if (rx_ring)
+ ena_com_update_numa_node(rx_ring->ena_com_io_cq,
+ numa_node);
}
tx_ring->cpu = cpu;
- rx_ring->cpu = cpu;
+ if (rx_ring)
+ rx_ring->cpu = cpu;
return;
out:
put_cpu();
}
+static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget)
+{
+ u32 total_done = 0;
+ u16 next_to_clean;
+ u32 tx_bytes = 0;
+ int tx_pkts = 0;
+ u16 req_id;
+ int rc;
+
+ if (unlikely(!xdp_ring))
+ return 0;
+ next_to_clean = xdp_ring->next_to_clean;
+
+ while (tx_pkts < budget) {
+ struct ena_tx_buffer *tx_info;
+ struct xdp_frame *xdpf;
+
+ rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq,
+ &req_id);
+ if (rc)
+ break;
+
+ rc = validate_xdp_req_id(xdp_ring, req_id);
+ if (rc)
+ break;
+
+ tx_info = &xdp_ring->tx_buffer_info[req_id];
+ xdpf = tx_info->xdpf;
+
+ tx_info->xdpf = NULL;
+ tx_info->last_jiffies = 0;
+ ena_unmap_tx_buff(xdp_ring, tx_info);
+
+ netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+ "tx_poll: q %d skb %p completed\n", xdp_ring->qid,
+ xdpf);
+
+ tx_bytes += xdpf->len;
+ tx_pkts++;
+ total_done += tx_info->tx_descs;
+
+ __free_page(tx_info->xdp_rx_page);
+ xdp_ring->free_ids[next_to_clean] = req_id;
+ next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean,
+ xdp_ring->ring_size);
+ }
+
+ xdp_ring->next_to_clean = next_to_clean;
+ ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done);
+ ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq);
+
+ netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev,
+ "tx_poll: q %d done. total pkts: %d\n",
+ xdp_ring->qid, tx_pkts);
+
+ return tx_pkts;
+}
+
static int ena_io_poll(struct napi_struct *napi, int budget)
{
struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi);
struct ena_ring *tx_ring, *rx_ring;
-
- u32 tx_work_done;
- u32 rx_work_done;
+ int tx_work_done;
+ int rx_work_done = 0;
int tx_budget;
int napi_comp_call = 0;
int ret;
@@ -1247,6 +1867,9 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
tx_ring = ena_napi->tx_ring;
rx_ring = ena_napi->rx_ring;
+ tx_ring->first_interrupt = ena_napi->first_interrupt;
+ rx_ring->first_interrupt = ena_napi->first_interrupt;
+
tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER;
if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags) ||
@@ -1256,7 +1879,11 @@ static int ena_io_poll(struct napi_struct *napi, int budget)
}
tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget);
- rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
+ /* On netpoll the budget is zero and the handler should only clean the
+ * tx completions.
+ */
+ if (likely(budget))
+ rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget);
/* If the device is about to reset or down, avoid unmask
* the interrupt and return 0 so NAPI won't reschedule
@@ -1318,8 +1945,7 @@ static irqreturn_t ena_intr_msix_io(int irq, void *data)
{
struct ena_napi *ena_napi = data;
- ena_napi->tx_ring->first_interrupt = true;
- ena_napi->rx_ring->first_interrupt = true;
+ ena_napi->first_interrupt = true;
napi_schedule_irqoff(&ena_napi->napi);
@@ -1394,10 +2020,12 @@ static void ena_setup_io_intr(struct ena_adapter *adapter)
{
struct net_device *netdev;
int irq_idx, i, cpu;
+ int io_queue_count;
netdev = adapter->netdev;
+ io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
- for (i = 0; i < adapter->num_io_queues; i++) {
+ for (i = 0; i < io_queue_count; i++) {
irq_idx = ENA_IO_IRQ_IDX(i);
cpu = i % num_online_cpus();
@@ -1525,45 +2153,64 @@ static void ena_disable_io_intr_sync(struct ena_adapter *adapter)
synchronize_irq(adapter->irq_tbl[i].vector);
}
-static void ena_del_napi(struct ena_adapter *adapter)
+static void ena_del_napi_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count)
{
int i;
- for (i = 0; i < adapter->num_io_queues; i++)
- netif_napi_del(&adapter->ena_napi[i].napi);
+ for (i = first_index; i < first_index + count; i++) {
+ /* Check if napi was initialized before */
+ if (!ENA_IS_XDP_INDEX(adapter, i) ||
+ adapter->ena_napi[i].xdp_ring)
+ netif_napi_del(&adapter->ena_napi[i].napi);
+ else
+ WARN_ON(ENA_IS_XDP_INDEX(adapter, i) &&
+ adapter->ena_napi[i].xdp_ring);
+ }
}
-static void ena_init_napi(struct ena_adapter *adapter)
+static void ena_init_napi_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
{
- struct ena_napi *napi;
+ struct ena_napi *napi = {0};
int i;
- for (i = 0; i < adapter->num_io_queues; i++) {
+ for (i = first_index; i < first_index + count; i++) {
napi = &adapter->ena_napi[i];
netif_napi_add(adapter->netdev,
&adapter->ena_napi[i].napi,
- ena_io_poll,
+ ENA_IS_XDP_INDEX(adapter, i) ? ena_xdp_io_poll : ena_io_poll,
ENA_NAPI_BUDGET);
- napi->rx_ring = &adapter->rx_ring[i];
- napi->tx_ring = &adapter->tx_ring[i];
+
+ if (!ENA_IS_XDP_INDEX(adapter, i)) {
+ napi->rx_ring = &adapter->rx_ring[i];
+ napi->tx_ring = &adapter->tx_ring[i];
+ } else {
+ napi->xdp_ring = &adapter->tx_ring[i];
+ }
napi->qid = i;
}
}
-static void ena_napi_disable_all(struct ena_adapter *adapter)
+static void ena_napi_disable_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count)
{
int i;
- for (i = 0; i < adapter->num_io_queues; i++)
+ for (i = first_index; i < first_index + count; i++)
napi_disable(&adapter->ena_napi[i].napi);
}
-static void ena_napi_enable_all(struct ena_adapter *adapter)
+static void ena_napi_enable_in_range(struct ena_adapter *adapter,
+ int first_index,
+ int count)
{
int i;
- for (i = 0; i < adapter->num_io_queues; i++)
+ for (i = first_index; i < first_index + count; i++)
napi_enable(&adapter->ena_napi[i].napi);
}
@@ -1578,7 +2225,7 @@ static int ena_rss_configure(struct ena_adapter *adapter)
rc = ena_rss_init_default(adapter);
if (rc && (rc != -EOPNOTSUPP)) {
netif_err(adapter, ifup, adapter->netdev,
- "Failed to init RSS rc: %d\n", rc);
+ "Failed to init RSS rc: %d\n", rc);
return rc;
}
}
@@ -1616,7 +2263,9 @@ static int ena_up_complete(struct ena_adapter *adapter)
/* enable transmits */
netif_tx_start_all_queues(adapter->netdev);
- ena_napi_enable_all(adapter);
+ ena_napi_enable_in_range(adapter,
+ 0,
+ adapter->xdp_num_queues + adapter->num_io_queues);
return 0;
}
@@ -1649,7 +2298,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
if (rc) {
netif_err(adapter, ifup, adapter->netdev,
"Failed to create I/O TX queue num %d rc: %d\n",
- qid, rc);
+ qid, rc);
return rc;
}
@@ -1668,12 +2317,13 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
return rc;
}
-static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
+static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter,
+ int first_index, int count)
{
struct ena_com_dev *ena_dev = adapter->ena_dev;
int rc, i;
- for (i = 0; i < adapter->num_io_queues; i++) {
+ for (i = first_index; i < first_index + count; i++) {
rc = ena_create_io_tx_queue(adapter, i);
if (rc)
goto create_err;
@@ -1682,7 +2332,7 @@ static int ena_create_all_io_tx_queues(struct ena_adapter *adapter)
return 0;
create_err:
- while (i--)
+ while (i-- > first_index)
ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i));
return rc;
@@ -1727,13 +2377,15 @@ static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid)
netif_err(adapter, ifup, adapter->netdev,
"Failed to get RX queue handlers. RX queue num %d rc: %d\n",
qid, rc);
- ena_com_destroy_io_queue(ena_dev, ena_qid);
- return rc;
+ goto err;
}
ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node);
return rc;
+err:
+ ena_com_destroy_io_queue(ena_dev, ena_qid);
+ return rc;
}
static int ena_create_all_io_rx_queues(struct ena_adapter *adapter)
@@ -1760,7 +2412,8 @@ create_err:
}
static void set_io_rings_size(struct ena_adapter *adapter,
- int new_tx_size, int new_rx_size)
+ int new_tx_size,
+ int new_rx_size)
{
int i;
@@ -1794,14 +2447,24 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter)
* ones due to past queue allocation failures.
*/
set_io_rings_size(adapter, adapter->requested_tx_ring_size,
- adapter->requested_rx_ring_size);
+ adapter->requested_rx_ring_size);
while (1) {
- rc = ena_setup_all_tx_resources(adapter);
+ if (ena_xdp_present(adapter)) {
+ rc = ena_setup_and_create_all_xdp_queues(adapter);
+
+ if (rc)
+ goto err_setup_tx;
+ }
+ rc = ena_setup_tx_resources_in_range(adapter,
+ 0,
+ adapter->num_io_queues);
if (rc)
goto err_setup_tx;
- rc = ena_create_all_io_tx_queues(adapter);
+ rc = ena_create_io_tx_queues_in_range(adapter,
+ 0,
+ adapter->num_io_queues);
if (rc)
goto err_create_tx_queues;
@@ -1825,7 +2488,7 @@ err_setup_tx:
if (rc != -ENOMEM) {
netif_err(adapter, ifup, adapter->netdev,
"Queue creation failed with error code %d\n",
- rc);
+ rc);
return rc;
}
@@ -1848,7 +2511,7 @@ err_setup_tx:
new_rx_ring_size = cur_rx_ring_size / 2;
if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
- new_rx_ring_size < ENA_MIN_RING_SIZE) {
+ new_rx_ring_size < ENA_MIN_RING_SIZE) {
netif_err(adapter, ifup, adapter->netdev,
"Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
ENA_MIN_RING_SIZE);
@@ -1867,10 +2530,11 @@ err_setup_tx:
static int ena_up(struct ena_adapter *adapter)
{
- int rc, i;
+ int io_queue_count, rc, i;
netdev_dbg(adapter->netdev, "%s\n", __func__);
+ io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
ena_setup_io_intr(adapter);
/* napi poll functions should be initialized before running
@@ -1878,7 +2542,7 @@ static int ena_up(struct ena_adapter *adapter)
* interrupt, causing the ISR to fire immediately while the poll
* function wasn't set yet, causing a null dereference
*/
- ena_init_napi(adapter);
+ ena_init_napi_in_range(adapter, 0, io_queue_count);
rc = ena_request_io_irq(adapter);
if (rc)
@@ -1909,7 +2573,7 @@ static int ena_up(struct ena_adapter *adapter)
/* schedule napi in case we had pending packets
* from the last time we disable napi
*/
- for (i = 0; i < adapter->num_io_queues; i++)
+ for (i = 0; i < io_queue_count; i++)
napi_schedule(&adapter->ena_napi[i].napi);
return rc;
@@ -1922,13 +2586,15 @@ err_up:
err_create_queues_with_backoff:
ena_free_io_irq(adapter);
err_req_irq:
- ena_del_napi(adapter);
+ ena_del_napi_in_range(adapter, 0, io_queue_count);
return rc;
}
static void ena_down(struct ena_adapter *adapter)
{
+ int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues;
+
netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__);
clear_bit(ENA_FLAG_DEV_UP, &adapter->flags);
@@ -1941,7 +2607,7 @@ static void ena_down(struct ena_adapter *adapter)
netif_tx_disable(adapter->netdev);
/* After this point the napi handler won't enable the tx queue */
- ena_napi_disable_all(adapter);
+ ena_napi_disable_in_range(adapter, 0, io_queue_count);
/* After destroy the queue there won't be any new interrupts */
@@ -1959,7 +2625,7 @@ static void ena_down(struct ena_adapter *adapter)
ena_disable_io_intr_sync(adapter);
ena_free_io_irq(adapter);
- ena_del_napi(adapter);
+ ena_del_napi_in_range(adapter, 0, io_queue_count);
ena_free_all_tx_bufs(adapter);
ena_free_all_rx_bufs(adapter);
@@ -2049,23 +2715,47 @@ int ena_update_queue_sizes(struct ena_adapter *adapter,
ena_close(adapter->netdev);
adapter->requested_tx_ring_size = new_tx_size;
adapter->requested_rx_ring_size = new_rx_size;
- ena_init_io_rings(adapter);
+ ena_init_io_rings(adapter,
+ 0,
+ adapter->xdp_num_queues +
+ adapter->num_io_queues);
return dev_was_up ? ena_up(adapter) : 0;
}
int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count)
{
struct ena_com_dev *ena_dev = adapter->ena_dev;
+ int prev_channel_count;
bool dev_was_up;
dev_was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags);
ena_close(adapter->netdev);
+ prev_channel_count = adapter->num_io_queues;
adapter->num_io_queues = new_channel_count;
+ if (ena_xdp_present(adapter) &&
+ ena_xdp_allowed(adapter) == ENA_XDP_ALLOWED) {
+ adapter->xdp_first_ring = new_channel_count;
+ adapter->xdp_num_queues = new_channel_count;
+ if (prev_channel_count > new_channel_count)
+ ena_xdp_exchange_program_rx_in_range(adapter,
+ NULL,
+ new_channel_count,
+ prev_channel_count);
+ else
+ ena_xdp_exchange_program_rx_in_range(adapter,
+ adapter->xdp_bpf_prog,
+ prev_channel_count,
+ new_channel_count);
+ }
+
/* We need to destroy the rss table so that the indirection
* table will be reinitialized by ena_up()
*/
ena_com_rss_destroy(ena_dev);
- ena_init_io_rings(adapter);
+ ena_init_io_rings(adapter,
+ 0,
+ adapter->xdp_num_queues +
+ adapter->num_io_queues);
return dev_was_up ? ena_open(adapter->netdev) : 0;
}
@@ -2249,7 +2939,7 @@ error_report_dma_error:
tx_info->skb = NULL;
tx_info->num_of_bufs += i;
- ena_unmap_tx_skb(tx_ring, tx_info);
+ ena_unmap_tx_buff(tx_ring, tx_info);
return -EINVAL;
}
@@ -2264,7 +2954,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct netdev_queue *txq;
void *push_hdr;
u16 next_to_use, req_id, header_len;
- int qid, rc, nb_hw_desc;
+ int qid, rc;
netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb);
/* Determine which tx ring we will be placed on */
@@ -2299,50 +2989,17 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
/* set flags and meta data */
ena_tx_csum(&ena_tx_ctx, skb);
- if (unlikely(ena_com_is_doorbell_needed(tx_ring->ena_com_io_sq, &ena_tx_ctx))) {
- netif_dbg(adapter, tx_queued, dev,
- "llq tx max burst size of queue %d achieved, writing doorbell to send burst\n",
- qid);
- ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
- }
-
- /* prepare the packet's descriptors to dma engine */
- rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx,
- &nb_hw_desc);
-
- /* ena_com_prepare_tx() can't fail due to overflow of tx queue,
- * since the number of free descriptors in the queue is checked
- * after sending the previous packet. In case there isn't enough
- * space in the queue for the next packet, it is stopped
- * until there is again enough available space in the queue.
- * All other failure reasons of ena_com_prepare_tx() are fatal
- * and therefore require a device reset.
- */
- if (unlikely(rc)) {
- netif_err(adapter, tx_queued, dev,
- "failed to prepare tx bufs\n");
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->tx_stats.prepare_ctx_err++;
- u64_stats_update_end(&tx_ring->syncp);
- adapter->reset_reason = ENA_REGS_RESET_DRIVER_INVALID_STATE;
- set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
+ rc = ena_xmit_common(dev,
+ tx_ring,
+ tx_info,
+ &ena_tx_ctx,
+ next_to_use,
+ skb->len);
+ if (rc)
goto error_unmap_dma;
- }
netdev_tx_sent_queue(txq, skb->len);
- u64_stats_update_begin(&tx_ring->syncp);
- tx_ring->tx_stats.cnt++;
- tx_ring->tx_stats.bytes += skb->len;
- u64_stats_update_end(&tx_ring->syncp);
-
- tx_info->tx_descs = nb_hw_desc;
- tx_info->last_jiffies = jiffies;
- tx_info->print_once = 0;
-
- tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use,
- tx_ring->ring_size);
-
/* stop the queue when no more space available, the packet can have up
* to sgl_size + 2. one for the meta descriptor and one for header
* (if the header is larger than tx_max_header_size).
@@ -2389,7 +3046,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
error_unmap_dma:
- ena_unmap_tx_skb(tx_ring, tx_info);
+ ena_unmap_tx_buff(tx_ring, tx_info);
tx_info->skb = NULL;
error_drop_packet:
@@ -2568,6 +3225,7 @@ static const struct net_device_ops ena_netdev_ops = {
.ndo_change_mtu = ena_change_mtu,
.ndo_set_mac_address = NULL,
.ndo_validate_addr = eth_validate_addr,
+ .ndo_bpf = ena_xdp,
};
static int ena_device_validate_params(struct ena_adapter *adapter,
@@ -2947,7 +3605,9 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
int i, budget, rc;
+ int io_queue_count;
+ io_queue_count = adapter->xdp_num_queues + adapter->num_io_queues;
/* Make sure the driver doesn't turn the device in other process */
smp_rmb();
@@ -2962,7 +3622,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
budget = ENA_MONITORED_TX_QUEUES;
- for (i = adapter->last_monitored_tx_qid; i < adapter->num_io_queues; i++) {
+ for (i = adapter->last_monitored_tx_qid; i < io_queue_count; i++) {
tx_ring = &adapter->tx_ring[i];
rx_ring = &adapter->rx_ring[i];
@@ -2970,7 +3630,8 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
if (unlikely(rc))
return;
- rc = check_for_rx_interrupt_queue(adapter, rx_ring);
+ rc = !ENA_IS_XDP_INDEX(adapter, i) ?
+ check_for_rx_interrupt_queue(adapter, rx_ring) : 0;
if (unlikely(rc))
return;
@@ -2979,7 +3640,7 @@ static void check_for_missing_completions(struct ena_adapter *adapter)
break;
}
- adapter->last_monitored_tx_qid = i % adapter->num_io_queues;
+ adapter->last_monitored_tx_qid = i % io_queue_count;
}
/* trigger napi schedule after 2 consecutive detections */
@@ -3556,6 +4217,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
adapter->num_io_queues = max_num_io_queues;
adapter->max_num_io_queues = max_num_io_queues;
+ adapter->xdp_first_ring = 0;
+ adapter->xdp_num_queues = 0;
+
adapter->last_monitored_tx_qid = 0;
adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
@@ -3569,7 +4233,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
"Failed to query interrupt moderation feature\n");
goto err_netdev_destroy;
}
- ena_init_io_rings(adapter);
+ ena_init_io_rings(adapter,
+ 0,
+ adapter->xdp_num_queues +
+ adapter->num_io_queues);
netdev->netdev_ops = &ena_netdev_ops;
netdev->watchdog_timeo = TX_TIMEOUT;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index bffd778f2ce3..094324fd0edc 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -36,6 +36,7 @@
#include <linux/bitops.h>
#include <linux/dim.h>
#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
#include <linux/inetdevice.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
@@ -142,6 +143,18 @@
#define ENA_MMIO_DISABLE_REG_READ BIT(0)
+/* The max MTU size is configured to be the ethernet frame size without
+ * the overhead of the ethernet header, which can have a VLAN header, and
+ * a frame check sequence (FCS).
+ * The buffer size we share with the device is defined to be ENA_PAGE_SIZE
+ */
+
+#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
+ VLAN_HLEN - XDP_PACKET_HEADROOM)
+
+#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
+ ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
+
struct ena_irq {
irq_handler_t handler;
void *data;
@@ -155,6 +168,8 @@ struct ena_napi {
struct napi_struct napi ____cacheline_aligned;
struct ena_ring *tx_ring;
struct ena_ring *rx_ring;
+ struct ena_ring *xdp_ring;
+ bool first_interrupt;
u32 qid;
struct dim dim;
};
@@ -180,6 +195,17 @@ struct ena_tx_buffer {
/* num of buffers used by this skb */
u32 num_of_bufs;
+ /* XDP buffer structure which is used for sending packets in
+ * the xdp queues
+ */
+ struct xdp_frame *xdpf;
+ /* The rx page for the rx buffer that was received in rx and
+ * re transmitted on xdp tx queues as a result of XDP_TX action.
+ * We need to free the page once we finished cleaning the buffer in
+ * clean_xdp_irq()
+ */
+ struct page *xdp_rx_page;
+
/* Indicate if bufs[0] map the linear data of the skb. */
u8 map_linear_data;
@@ -258,10 +284,13 @@ struct ena_ring {
struct ena_adapter *adapter;
struct ena_com_io_cq *ena_com_io_cq;
struct ena_com_io_sq *ena_com_io_sq;
+ struct bpf_prog *xdp_bpf_prog;
+ struct xdp_rxq_info xdp_rxq;
u16 next_to_use;
u16 next_to_clean;
u16 rx_copybreak;
+ u16 rx_headroom;
u16 qid;
u16 mtu;
u16 sgl_size;
@@ -379,6 +408,10 @@ struct ena_adapter {
u32 last_monitored_tx_qid;
enum ena_regs_reset_reason_types reset_reason;
+
+ struct bpf_prog *xdp_bpf_prog;
+ u32 xdp_first_ring;
+ u32 xdp_num_queues;
};
void ena_set_ethtool_ops(struct net_device *netdev);
@@ -390,8 +423,48 @@ void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf);
int ena_update_queue_sizes(struct ena_adapter *adapter,
u32 new_tx_size,
u32 new_rx_size);
+
int ena_update_queue_count(struct ena_adapter *adapter, u32 new_channel_count);
int ena_get_sset_count(struct net_device *netdev, int sset);
+enum ena_xdp_errors_t {
+ ENA_XDP_ALLOWED = 0,
+ ENA_XDP_CURRENT_MTU_TOO_LARGE,
+ ENA_XDP_NO_ENOUGH_QUEUES,
+};
+
+static inline bool ena_xdp_queues_present(struct ena_adapter *adapter)
+{
+ return adapter->xdp_first_ring != 0;
+}
+
+static inline bool ena_xdp_present(struct ena_adapter *adapter)
+{
+ return !!adapter->xdp_bpf_prog;
+}
+
+static inline bool ena_xdp_present_ring(struct ena_ring *ring)
+{
+ return !!ring->xdp_bpf_prog;
+}
+
+static inline int ena_xdp_legal_queue_count(struct ena_adapter *adapter,
+ u32 queues)
+{
+ return 2 * queues <= adapter->max_num_io_queues;
+}
+
+static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter)
+{
+ enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED;
+
+ if (adapter->netdev->mtu > ENA_XDP_MAX_MTU)
+ rc = ENA_XDP_CURRENT_MTU_TOO_LARGE;
+ else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
+ rc = ENA_XDP_NO_ENOUGH_QUEUES;
+
+ return rc;
+}
+
#endif /* !(ENA_H) */
diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c
index ab30761003da..cf3562e82ca9 100644
--- a/drivers/net/ethernet/amd/7990.c
+++ b/drivers/net/ethernet/amd/7990.c
@@ -527,7 +527,7 @@ int lance_close(struct net_device *dev)
}
EXPORT_SYMBOL_GPL(lance_close);
-void lance_tx_timeout(struct net_device *dev)
+void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
printk("lance_tx_timeout\n");
lance_reset(dev);
diff --git a/drivers/net/ethernet/amd/7990.h b/drivers/net/ethernet/amd/7990.h
index 741cdc392c6b..8266b3c1fefc 100644
--- a/drivers/net/ethernet/amd/7990.h
+++ b/drivers/net/ethernet/amd/7990.h
@@ -243,7 +243,7 @@ int lance_open(struct net_device *dev);
int lance_close(struct net_device *dev);
int lance_start_xmit(struct sk_buff *skb, struct net_device *dev);
void lance_set_multicast(struct net_device *dev);
-void lance_tx_timeout(struct net_device *dev);
+void lance_tx_timeout(struct net_device *dev, unsigned int txqueue);
#ifdef CONFIG_NET_POLL_CONTROLLER
void lance_poll(struct net_device *dev);
#endif
diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index 212fe72a190b..2f808dbc8b0e 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -118,10 +118,6 @@ struct lance_private {
int auto_select; /* cable-selection by carrier */
unsigned short busmaster_regval;
-#ifdef CONFIG_SUNLANCE
- struct Linux_SBus_DMA *ledma; /* if set this points to ledma and arch=4m */
- int burst_sizes; /* ledma SBus burst sizes */
-#endif
struct timer_list multicast_timer;
struct net_device *dev;
};
@@ -522,7 +518,7 @@ static inline int lance_reset(struct net_device *dev)
return status;
}
-static void lance_tx_timeout(struct net_device *dev)
+static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct lance_private *lp = netdev_priv(dev);
volatile struct lance_regs *ll = lp->ll;
@@ -551,11 +547,10 @@ static netdev_tx_t lance_start_xmit(struct sk_buff *skb,
if (!lance_tx_buffs_avail(lp))
goto out_free;
-#ifdef DEBUG
/* dump the packet */
- print_hex_dump(KERN_DEBUG, "skb->data: ", DUMP_PREFIX_NONE,
- 16, 1, skb->data, 64, true);
-#endif
+ print_hex_dump_debug("skb->data: ", DUMP_PREFIX_NONE, 16, 1, skb->data,
+ 64, true);
+
entry = lp->tx_new & lp->tx_ring_mod_mask;
ib->btx_ring[entry].length = (-skblen) | 0xf000;
ib->btx_ring[entry].misc = 0;
diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c
index 0842da492a64..1c53408f5d47 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c
@@ -422,7 +422,7 @@ static void am79c961_setmulticastlist (struct net_device *dev)
spin_unlock_irqrestore(&priv->chip_lock, flags);
}
-static void am79c961_timeout(struct net_device *dev)
+static void am79c961_timeout(struct net_device *dev, unsigned int txqueue)
{
printk(KERN_WARNING "%s: transmit timed out, network cable problem?\n",
dev->name);
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 573e88fc8ede..0f3b743425e8 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1569,7 +1569,7 @@ static int amd8111e_enable_link_change(struct amd8111e_priv *lp)
* failed or the interface is locked up. This function will reinitialize
* the hardware.
*/
-static void amd8111e_tx_timeout(struct net_device *dev)
+static void amd8111e_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct amd8111e_priv *lp = netdev_priv(dev);
int err;
diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c
index 4b6a5cb85dd2..5e0f645f5bde 100644
--- a/drivers/net/ethernet/amd/ariadne.c
+++ b/drivers/net/ethernet/amd/ariadne.c
@@ -530,7 +530,7 @@ static inline void ariadne_reset(struct net_device *dev)
netif_start_queue(dev);
}
-static void ariadne_tx_timeout(struct net_device *dev)
+static void ariadne_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
volatile struct Am79C960 *lance = (struct Am79C960 *)dev->base_addr;
diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index d3d44e07afbc..4e36122609a3 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -346,7 +346,7 @@ static int lance_rx( struct net_device *dev );
static int lance_close( struct net_device *dev );
static void set_multicast_list( struct net_device *dev );
static int lance_set_mac_address( struct net_device *dev, void *addr );
-static void lance_tx_timeout (struct net_device *dev);
+static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue);
/************************* End of Prototypes **************************/
@@ -727,7 +727,7 @@ static void lance_init_ring( struct net_device *dev )
/* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
-static void lance_tx_timeout (struct net_device *dev)
+static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue)
{
struct lance_private *lp = netdev_priv(dev);
struct lance_ioreg *IO = lp->iobase;
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index 1793950f0582..d832c9f4d306 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1014,7 +1014,7 @@ static netdev_tx_t au1000_tx(struct sk_buff *skb, struct net_device *dev)
* The Tx ring has been full longer than the watchdog timeout
* value. The transmitter must be hung?
*/
-static void au1000_tx_timeout(struct net_device *dev)
+static void au1000_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
netdev_err(dev, "au1000_tx_timeout: dev=%p\n", dev);
au1000_reset_mac(dev);
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index dac4a2fcad6a..6592a2db9efb 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -884,7 +884,7 @@ static inline int lance_reset(struct net_device *dev)
return status;
}
-static void lance_tx_timeout(struct net_device *dev)
+static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct lance_private *lp = netdev_priv(dev);
volatile struct lance_regs *ll = lp->ll;
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index f90b454b1642..aff44241988c 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -306,7 +306,7 @@ static irqreturn_t lance_interrupt(int irq, void *dev_id);
static int lance_close(struct net_device *dev);
static struct net_device_stats *lance_get_stats(struct net_device *dev);
static void set_multicast_list(struct net_device *dev);
-static void lance_tx_timeout (struct net_device *dev);
+static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue);
@@ -913,7 +913,7 @@ lance_restart(struct net_device *dev, unsigned int csr0_bits, int must_reinit)
}
-static void lance_tx_timeout (struct net_device *dev)
+static void lance_tx_timeout (struct net_device *dev, unsigned int txqueue)
{
struct lance_private *lp = (struct lance_private *) dev->ml_priv;
int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index c6c2a54c1121..c38edf6f03a3 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -254,7 +254,7 @@ static int ni65_lance_reinit(struct net_device *dev);
static void ni65_init_lance(struct priv *p,unsigned char*,int,int);
static netdev_tx_t ni65_send_packet(struct sk_buff *skb,
struct net_device *dev);
-static void ni65_timeout(struct net_device *dev);
+static void ni65_timeout(struct net_device *dev, unsigned int txqueue);
static int ni65_close(struct net_device *dev);
static int ni65_alloc_buffer(struct net_device *dev);
static void ni65_free_buffer(struct priv *p);
@@ -1133,7 +1133,7 @@ static void ni65_recv_intr(struct net_device *dev,int csr0)
* kick xmitter ..
*/
-static void ni65_timeout(struct net_device *dev)
+static void ni65_timeout(struct net_device *dev, unsigned int txqueue)
{
int i;
struct priv *p = dev->ml_priv;
diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 9c152d85840d..023aecf6ab30 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c
@@ -407,7 +407,7 @@ static int mace_open(struct net_device *dev);
static int mace_close(struct net_device *dev);
static netdev_tx_t mace_start_xmit(struct sk_buff *skb,
struct net_device *dev);
-static void mace_tx_timeout(struct net_device *dev);
+static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue);
static irqreturn_t mace_interrupt(int irq, void *dev_id);
static struct net_device_stats *mace_get_stats(struct net_device *dev);
static int mace_rx(struct net_device *dev, unsigned char RxCnt);
@@ -837,7 +837,7 @@ mace_start_xmit
failed, put skb back into a list."
---------------------------------------------------------------------------- */
-static void mace_tx_timeout(struct net_device *dev)
+static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
mace_private *lp = netdev_priv(dev);
struct pcmcia_device *link = lp->p_dev;
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index f5ad12c10934..dc7d88227e76 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -314,7 +314,7 @@ static int pcnet32_open(struct net_device *);
static int pcnet32_init_ring(struct net_device *);
static netdev_tx_t pcnet32_start_xmit(struct sk_buff *,
struct net_device *);
-static void pcnet32_tx_timeout(struct net_device *dev);
+static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue);
static irqreturn_t pcnet32_interrupt(int, void *);
static int pcnet32_close(struct net_device *);
static struct net_device_stats *pcnet32_get_stats(struct net_device *);
@@ -2455,7 +2455,7 @@ static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits)
lp->a->write_csr(ioaddr, CSR0, csr0_bits);
}
-static void pcnet32_tx_timeout(struct net_device *dev)
+static void pcnet32_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct pcnet32_private *lp = netdev_priv(dev);
unsigned long ioaddr = dev->base_addr, flags;
diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index ebcbf8ca4829..b00e00881253 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c
@@ -1097,7 +1097,7 @@ static void lance_piozero(void __iomem *dest, int len)
sbus_writeb(0, piobuf);
}
-static void lance_tx_timeout(struct net_device *dev)
+static void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct lance_private *lp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 98f8f2033154..b71f9b04a51e 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -2152,7 +2152,7 @@ static int xgbe_change_mtu(struct net_device *netdev, int mtu)
return 0;
}
-static void xgbe_tx_timeout(struct net_device *netdev)
+static void xgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct xgbe_prv_data *pdata = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index a880f10e3e70..8083173f1a8f 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -129,13 +129,13 @@ struct xgbe_stats {
#define XGMAC_MMC_STAT(_string, _var) \
{ _string, \
- FIELD_SIZEOF(struct xgbe_mmc_stats, _var), \
+ sizeof_field(struct xgbe_mmc_stats, _var), \
offsetof(struct xgbe_prv_data, mmc_stats._var), \
}
#define XGMAC_EXT_STAT(_string, _var) \
{ _string, \
- FIELD_SIZEOF(struct xgbe_ext_stats, _var), \
+ sizeof_field(struct xgbe_ext_stats, _var), \
offsetof(struct xgbe_prv_data, ext_stats._var), \
}
diff --git a/drivers/net/ethernet/apm/xgene-v2/main.c b/drivers/net/ethernet/apm/xgene-v2/main.c
index 02b4f3af02b5..c48f60996761 100644
--- a/drivers/net/ethernet/apm/xgene-v2/main.c
+++ b/drivers/net/ethernet/apm/xgene-v2/main.c
@@ -575,7 +575,7 @@ static void xge_free_pending_skb(struct net_device *ndev)
}
}
-static void xge_timeout(struct net_device *ndev)
+static void xge_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct xge_pdata *pdata = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index d8612131c55e..e284b6753725 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -859,7 +859,7 @@ static int xgene_enet_napi(struct napi_struct *napi, const int budget)
return processed;
}
-static void xgene_enet_timeout(struct net_device *ndev)
+static void xgene_enet_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct xgene_enet_pdata *pdata = netdev_priv(ndev);
struct netdev_queue *txq;
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index 8d03578d5e8c..95d3061c61be 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -91,7 +91,7 @@ static int mace_set_address(struct net_device *dev, void *addr);
static void mace_reset(struct net_device *dev);
static irqreturn_t mace_interrupt(int irq, void *dev_id);
static irqreturn_t mace_dma_intr(int irq, void *dev_id);
-static void mace_tx_timeout(struct net_device *dev);
+static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void __mace_set_address(struct net_device *dev, void *addr);
/*
@@ -600,7 +600,7 @@ static irqreturn_t mace_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void mace_tx_timeout(struct net_device *dev)
+static void mace_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mace_data *mp = netdev_priv(dev);
volatile struct mace *mb = mp->mace;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index a17a4da7bc15..c85e3e29012c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -403,6 +403,8 @@ int aq_nic_start(struct aq_nic_s *self)
if (err < 0)
goto err_exit;
+ aq_nic_set_loopback(self);
+
err = self->aq_hw_ops->hw_start(self->aq_hw);
if (err < 0)
goto err_exit;
@@ -413,8 +415,6 @@ int aq_nic_start(struct aq_nic_s *self)
INIT_WORK(&self->service_task, aq_nic_service_task);
- aq_nic_set_loopback(self);
-
timer_setup(&self->service_timer, aq_nic_service_timer_cb, 0);
aq_nic_service_timer_cb(&self->service_timer);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 58e891af6e09..ec041f78d063 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -1525,9 +1525,6 @@ const struct aq_hw_ops hw_atl_ops_b0 = {
.rx_extract_ts = hw_atl_b0_rx_extract_ts,
.extract_hwts = hw_atl_b0_extract_hwts,
.hw_set_offload = hw_atl_b0_hw_offload_set,
- .hw_get_hw_stats = hw_atl_utils_get_hw_stats,
- .hw_get_fw_version = hw_atl_utils_get_fw_version,
- .hw_set_offload = hw_atl_b0_hw_offload_set,
.hw_set_loopback = hw_atl_b0_set_loopback,
.hw_set_fc = hw_atl_b0_set_fc,
};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 8910b62e67ed..f547baa6c954 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -667,9 +667,7 @@ int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
u32 speed;
mpi_state = hw_atl_utils_mpi_get_state(self);
- speed = mpi_state & (FW2X_RATE_100M | FW2X_RATE_1G |
- FW2X_RATE_2G5 | FW2X_RATE_5G |
- FW2X_RATE_10G);
+ speed = mpi_state >> HW_ATL_MPI_SPEED_SHIFT;
if (!speed) {
link_status->mbps = 0U;
diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 8f5021091eee..88e4e1500a8e 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -313,7 +313,7 @@ struct ag71xx {
struct ag71xx_desc *stop_desc;
dma_addr_t stop_desc_dma;
- int phy_if_mode;
+ phy_interface_t phy_if_mode;
struct delayed_work restart_work;
struct timer_list oom_timer;
@@ -1409,7 +1409,7 @@ static void ag71xx_oom_timer_handler(struct timer_list *t)
napi_schedule(&ag->napi);
}
-static void ag71xx_tx_timeout(struct net_device *ndev)
+static void ag71xx_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct ag71xx *ag = netdev_priv(ndev);
@@ -1744,7 +1744,7 @@ static int ag71xx_probe(struct platform_device *pdev)
eth_random_addr(ndev->dev_addr);
}
- err = of_get_phy_mode(np, ag->phy_if_mode);
+ err = of_get_phy_mode(np, &ag->phy_if_mode);
if (err) {
netif_err(ag, probe, ndev, "missing phy-mode property in DT\n");
goto err_free;
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index d4bbcdfd691a..1dcbc486eca9 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -1553,7 +1553,7 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb));
}
-static void alx_tx_timeout(struct net_device *dev)
+static void alx_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct alx_priv *alx = netdev_priv(dev);
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 2b239ecea05f..4c0b1f8551dd 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -350,7 +350,7 @@ static void atl1c_del_timer(struct atl1c_adapter *adapter)
* atl1c_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
*/
-static void atl1c_tx_timeout(struct net_device *netdev)
+static void atl1c_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct atl1c_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 4f7b65825c15..e0d89942d537 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -251,7 +251,7 @@ static void atl1e_cancel_work(struct atl1e_adapter *adapter)
* atl1e_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
*/
-static void atl1e_tx_timeout(struct net_device *netdev)
+static void atl1e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct atl1e_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 3aba38322717..b81a4e0c5b57 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -1001,7 +1001,7 @@ static int atl2_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
* atl2_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
*/
-static void atl2_tx_timeout(struct net_device *netdev)
+static void atl2_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct atl2_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c
index 505a22c703f7..0941d07d0833 100644
--- a/drivers/net/ethernet/atheros/atlx/atlx.c
+++ b/drivers/net/ethernet/atheros/atlx/atlx.c
@@ -183,7 +183,7 @@ static void atlx_clear_phy_int(struct atlx_adapter *adapter)
* atlx_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
*/
-static void atlx_tx_timeout(struct net_device *netdev)
+static void atlx_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct atlx_adapter *adapter = netdev_priv(netdev);
/* Do the reset outside of interrupt context */
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 035dbb1b2c98..a780b7215021 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -948,7 +948,7 @@ irq_ack:
return IRQ_RETVAL(handled);
}
-static void b44_tx_timeout(struct net_device *dev)
+static void b44_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct b44 *bp = netdev_priv(dev);
@@ -1516,8 +1516,10 @@ static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset)
int ethaddr_bytes = ETH_ALEN;
memset(ppattern + offset, 0xff, magicsync);
- for (j = 0; j < magicsync; j++)
- set_bit(len++, (unsigned long *) pmask);
+ for (j = 0; j < magicsync; j++) {
+ pmask[len >> 3] |= BIT(len & 7);
+ len++;
+ }
for (j = 0; j < B44_MAX_PATTERNS; j++) {
if ((B44_PATTERN_SIZE - len) >= ETH_ALEN)
@@ -1529,7 +1531,8 @@ static int b44_magic_pattern(u8 *macaddr, u8 *ppattern, u8 *pmask, int offset)
for (k = 0; k< ethaddr_bytes; k++) {
ppattern[offset + magicsync +
(j * ETH_ALEN) + k] = macaddr[k];
- set_bit(len++, (unsigned long *) pmask);
+ pmask[len >> 3] |= BIT(len & 7);
+ len++;
}
}
return len - 1;
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index 825af709708e..1907e47fd0af 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1354,7 +1354,7 @@ out:
return ret;
}
-static void bcm_sysport_tx_timeout(struct net_device *dev)
+static void bcm_sysport_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
netdev_warn(dev, "transmit timeout!\n");
@@ -2427,6 +2427,14 @@ static int bcm_sysport_probe(struct platform_device *pdev)
if (!of_id || !of_id->data)
return -EINVAL;
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+ if (ret)
+ ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (ret) {
+ dev_err(&pdev->dev, "unable to set DMA mask: %d\n", ret);
+ return ret;
+ }
+
/* Fairly quickly we need to know the type of adapter we have */
params = of_id->data;
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index fbc196b480b6..dbb7874607ca 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -6575,7 +6575,7 @@ bnx2_dump_state(struct bnx2 *bp)
}
static void
-bnx2_tx_timeout(struct net_device *dev)
+bnx2_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct bnx2 *bp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 5e037a305b83..ee9e9290f112 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -4970,7 +4970,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features)
return 0;
}
-void bnx2x_tx_timeout(struct net_device *dev)
+void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct bnx2x *bp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 8b08cb18e363..6f1352d51cb2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -617,7 +617,7 @@ int bnx2x_set_features(struct net_device *dev, netdev_features_t features);
*
* @dev: net device
*/
-void bnx2x_tx_timeout(struct net_device *dev);
+void bnx2x_tx_timeout(struct net_device *dev, unsigned int txqueue);
/** bnx2x_get_c2s_mapping - read inner-to-outer vlan configuration
* c2s_map should have BNX2X_MAX_PRIORITY entries.
@@ -1109,7 +1109,7 @@ static inline u8 bnx2x_get_path_func_num(struct bnx2x *bp)
for (i = 0; i < E1H_FUNC_MAX / 2; i++) {
u32 func_config =
MF_CFG_RD(bp,
- func_mf_config[BP_PORT(bp) + 2 * i].
+ func_mf_config[BP_PATH(bp) + 2 * i].
config);
func_num +=
((func_config & FUNC_MF_CFG_FUNC_HIDE) ? 0 : 1);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 192ff8d5da32..741d865e4afc 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9976,10 +9976,18 @@ static void bnx2x_recovery_failed(struct bnx2x *bp)
*/
static void bnx2x_parity_recover(struct bnx2x *bp)
{
- bool global = false;
u32 error_recovered, error_unrecovered;
- bool is_parity;
+ bool is_parity, global = false;
+#ifdef CONFIG_BNX2X_SRIOV
+ int vf_idx;
+
+ for (vf_idx = 0; vf_idx < bp->requested_nr_virtfn; vf_idx++) {
+ struct bnx2x_virtf *vf = BP_VF(bp, vf_idx);
+ if (vf)
+ vf->state = VF_LOST;
+ }
+#endif
DP(NETIF_MSG_HW, "Handling parity\n");
while (1) {
switch (bp->recovery_state) {
@@ -15402,6 +15410,7 @@ int bnx2x_configure_ptp_filters(struct bnx2x *bp)
REG_WR(bp, rule, BNX2X_PTP_TX_ON_RULE_MASK);
break;
case HWTSTAMP_TX_ONESTEP_SYNC:
+ case HWTSTAMP_TX_ONESTEP_P2P:
BNX2X_ERR("One-step timestamping is not supported\n");
return -ERANGE;
}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
index 7a6e82db4231..bacc8552bce1 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h
@@ -1536,8 +1536,11 @@ void bnx2x_get_rss_ind_table(struct bnx2x_rss_config_obj *rss_obj,
((MAX_MAC_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_MAC_CREDIT_CNT) / \
func_num + GET_NUM_VFS_PER_PF(bp) * VF_MAC_CREDIT_CNT)
+#define BNX2X_VFS_VLAN_CREDIT(bp) \
+ (GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT)
+
#define PF_VLAN_CREDIT_E2(bp, func_num) \
- ((MAX_MAC_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT) / \
+ ((MAX_VLAN_CREDIT_E2 - 1 - BNX2X_VFS_VLAN_CREDIT(bp)) / \
func_num + GET_NUM_VFS_PER_PF(bp) * VF_VLAN_CREDIT_CNT)
#endif /* BNX2X_SP_VERBS */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
index b6ebd92ec565..3a716c015415 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h
@@ -139,6 +139,7 @@ struct bnx2x_virtf {
#define VF_ACQUIRED 1 /* VF acquired, but not initialized */
#define VF_ENABLED 2 /* VF Enabled */
#define VF_RESET 3 /* VF FLR'd, pending cleanup */
+#define VF_LOST 4 /* Recovery while VFs are loaded */
bool flr_clnup_stage; /* true during flr cleanup */
bool malicious; /* true if FW indicated so, until FLR */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 0752b7fa4d9c..ea0e9394f898 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -2107,6 +2107,18 @@ static void bnx2x_vf_mbx_request(struct bnx2x *bp, struct bnx2x_virtf *vf,
{
int i;
+ if (vf->state == VF_LOST) {
+ /* Just ack the FW and return if VFs are lost
+ * in case of parity error. VFs are supposed to be timedout
+ * on waiting for PF response.
+ */
+ DP(BNX2X_MSG_IOV,
+ "VF 0x%x lost, not handling the request\n", vf->abs_vfid);
+
+ storm_memset_vf_mbx_ack(bp, vf->abs_vfid);
+ return;
+ }
+
/* check if tlv type is known */
if (bnx2x_tlv_supported(mbx->first_tlv.tl.type)) {
/* Lock the per vf op mutex and note the locker's identity.
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 85983f0e3134..33eb8cd6551e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -944,6 +944,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
dma_addr -= bp->rx_dma_offset;
dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);
+ page_pool_release_page(rxr->page_pool, page);
if (unlikely(!payload))
payload = eth_get_headlen(bp->dev, data_ptr, len);
@@ -2001,6 +2002,9 @@ static int bnxt_async_event_process(struct bnxt *bp,
case ASYNC_EVENT_CMPL_EVENT_ID_RESET_NOTIFY: {
u32 data1 = le32_to_cpu(cmpl->event_data1);
+ if (!bp->fw_health)
+ goto async_event_process_exit;
+
bp->fw_reset_timestamp = jiffies;
bp->fw_reset_min_dsecs = cmpl->timestamp_lo;
if (!bp->fw_reset_min_dsecs)
@@ -4421,8 +4425,9 @@ int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size,
FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
- flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE |
- FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
+ flags = FUNC_DRV_RGTR_REQ_FLAGS_16BIT_VER_MODE;
+ if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
+ flags |= FUNC_DRV_RGTR_REQ_FLAGS_HOT_RESET_SUPPORT;
if (bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY)
flags |= FUNC_DRV_RGTR_REQ_FLAGS_ERROR_RECOVERY_SUPPORT |
FUNC_DRV_RGTR_REQ_FLAGS_MASTER_SUPPORT;
@@ -6186,7 +6191,7 @@ static void bnxt_hwrm_set_coal_params(struct bnxt *bp,
tmr = bnxt_usec_to_coal_tmr(bp, hw_coal->coal_ticks_irq);
val = clamp_t(u16, tmr, 1,
coal_cap->cmpl_aggr_dma_tmr_during_int_max);
- req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(tmr);
+ req->cmpl_aggr_dma_tmr_during_int = cpu_to_le16(val);
req->enables |=
cpu_to_le16(BNXT_COAL_CMPL_AGGR_TMR_DURING_INT_ENABLE);
}
@@ -7115,14 +7120,6 @@ static int bnxt_hwrm_error_recovery_qcfg(struct bnxt *bp)
rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
if (rc)
goto err_recovery_out;
- if (!fw_health) {
- fw_health = kzalloc(sizeof(*fw_health), GFP_KERNEL);
- bp->fw_health = fw_health;
- if (!fw_health) {
- rc = -ENOMEM;
- goto err_recovery_out;
- }
- }
fw_health->flags = le32_to_cpu(resp->flags);
if ((fw_health->flags & ERROR_RECOVERY_QCFG_RESP_FLAGS_CO_CPU) &&
!(bp->fw_cap & BNXT_FW_CAP_KONG_MB_CHNL)) {
@@ -8796,6 +8793,9 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
if (fw_reset) {
if (!test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
bnxt_ulp_stop(bp);
+ bnxt_free_ctx_mem(bp);
+ kfree(bp->ctx);
+ bp->ctx = NULL;
rc = bnxt_fw_init_one(bp);
if (rc) {
set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
@@ -9976,7 +9976,7 @@ static void bnxt_reset_task(struct bnxt *bp, bool silent)
}
}
-static void bnxt_tx_timeout(struct net_device *dev)
+static void bnxt_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct bnxt *bp = netdev_priv(dev);
@@ -9990,8 +9990,7 @@ static void bnxt_fw_health_check(struct bnxt *bp)
struct bnxt_fw_health *fw_health = bp->fw_health;
u32 val;
- if (!fw_health || !fw_health->enabled ||
- test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+ if (!fw_health->enabled || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return;
if (fw_health->tmr_counter) {
@@ -10482,6 +10481,23 @@ static void bnxt_init_dflt_coal(struct bnxt *bp)
bp->stats_coal_ticks = BNXT_DEF_STATS_COAL_TICKS;
}
+static void bnxt_alloc_fw_health(struct bnxt *bp)
+{
+ if (bp->fw_health)
+ return;
+
+ if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) &&
+ !(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
+ return;
+
+ bp->fw_health = kzalloc(sizeof(*bp->fw_health), GFP_KERNEL);
+ if (!bp->fw_health) {
+ netdev_warn(bp->dev, "Failed to allocate fw_health\n");
+ bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
+ bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
+ }
+}
+
static int bnxt_fw_init_one_p1(struct bnxt *bp)
{
int rc;
@@ -10528,6 +10544,7 @@ static int bnxt_fw_init_one_p2(struct bnxt *bp)
netdev_warn(bp->dev, "hwrm query adv flow mgnt failure rc: %d\n",
rc);
+ bnxt_alloc_fw_health(bp);
rc = bnxt_hwrm_error_recovery_qcfg(bp);
if (rc)
netdev_warn(bp->dev, "hwrm query error recovery failure rc: %d\n",
@@ -10609,6 +10626,12 @@ static int bnxt_fw_init_one(struct bnxt *bp)
rc = bnxt_approve_mac(bp, bp->dev->dev_addr, false);
if (rc)
return rc;
+
+ /* In case fw capabilities have changed, destroy the unneeded
+ * reporters and create newly capable ones.
+ */
+ bnxt_dl_fw_reporters_destroy(bp, false);
+ bnxt_dl_fw_reporters_create(bp);
bnxt_fw_init_one_p3(bp);
return 0;
}
@@ -10751,8 +10774,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
bnxt_queue_fw_reset_work(bp, bp->fw_reset_min_dsecs * HZ / 10);
return;
case BNXT_FW_RESET_STATE_ENABLE_DEV:
- if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state) &&
- bp->fw_health) {
+ if (test_bit(BNXT_STATE_FW_FATAL_COND, &bp->state)) {
u32 val;
val = bnxt_fw_health_readl(bp,
@@ -10801,6 +10823,7 @@ static void bnxt_fw_reset_task(struct work_struct *work)
smp_mb__before_atomic();
clear_bit(BNXT_STATE_IN_FW_RESET, &bp->state);
bnxt_ulp_start(bp, rc);
+ bnxt_dl_health_recovery_done(bp);
bnxt_dl_health_status_update(bp, true);
rtnl_unlock();
break;
@@ -11396,11 +11419,11 @@ static void bnxt_remove_one(struct pci_dev *pdev)
struct net_device *dev = pci_get_drvdata(pdev);
struct bnxt *bp = netdev_priv(dev);
- if (BNXT_PF(bp)) {
+ if (BNXT_PF(bp))
bnxt_sriov_disable(bp);
- bnxt_dl_unregister(bp);
- }
+ bnxt_dl_fw_reporters_destroy(bp, true);
+ bnxt_dl_unregister(bp);
pci_disable_pcie_error_reporting(pdev);
unregister_netdev(dev);
bnxt_shutdown_tc(bp);
@@ -11415,6 +11438,8 @@ static void bnxt_remove_one(struct pci_dev *pdev)
bnxt_dcb_free(bp);
kfree(bp->edev);
bp->edev = NULL;
+ kfree(bp->fw_health);
+ bp->fw_health = NULL;
bnxt_cleanup_pci(bp);
bnxt_free_ctx_mem(bp);
kfree(bp->ctx);
@@ -11875,8 +11900,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
if (rc)
goto init_err_cleanup_tc;
- if (BNXT_PF(bp))
- bnxt_dl_register(bp);
+ bnxt_dl_register(bp);
+ bnxt_dl_fw_reporters_create(bp);
netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
board_info[ent->driver_data].name,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
index acb2dd64c023..0c3d224637b9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c
@@ -39,11 +39,10 @@ static int bnxt_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
struct netlink_ext_ack *extack)
{
struct bnxt *bp = devlink_health_reporter_priv(reporter);
- struct bnxt_fw_health *health = bp->fw_health;
u32 val, health_status;
int rc;
- if (!health || test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
+ if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return 0;
val = bnxt_fw_health_readl(bp, BNXT_FW_HEALTH_REG);
@@ -90,7 +89,7 @@ static int bnxt_fw_reset_recover(struct devlink_health_reporter *reporter,
return -EOPNOTSUPP;
bnxt_fw_reset(bp);
- return 0;
+ return -EINPROGRESS;
}
static const
@@ -117,7 +116,7 @@ static int bnxt_fw_fatal_recover(struct devlink_health_reporter *reporter,
else if (event == BNXT_FW_EXCEPTION_SP_EVENT)
bnxt_fw_exception(bp);
- return 0;
+ return -EINPROGRESS;
}
static const
@@ -126,21 +125,15 @@ struct devlink_health_reporter_ops bnxt_dl_fw_fatal_reporter_ops = {
.recover = bnxt_fw_fatal_recover,
};
-static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
+void bnxt_dl_fw_reporters_create(struct bnxt *bp)
{
struct bnxt_fw_health *health = bp->fw_health;
- if (!health)
+ if (!bp->dl || !health)
return;
- health->fw_reporter =
- devlink_health_reporter_create(bp->dl, &bnxt_dl_fw_reporter_ops,
- 0, false, bp);
- if (IS_ERR(health->fw_reporter)) {
- netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
- PTR_ERR(health->fw_reporter));
- health->fw_reporter = NULL;
- }
+ if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET) || health->fw_reset_reporter)
+ goto err_recovery;
health->fw_reset_reporter =
devlink_health_reporter_create(bp->dl,
@@ -150,8 +143,30 @@ static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
PTR_ERR(health->fw_reset_reporter));
health->fw_reset_reporter = NULL;
+ bp->fw_cap &= ~BNXT_FW_CAP_HOT_RESET;
+ }
+
+err_recovery:
+ if (!(bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY))
+ return;
+
+ if (!health->fw_reporter) {
+ health->fw_reporter =
+ devlink_health_reporter_create(bp->dl,
+ &bnxt_dl_fw_reporter_ops,
+ 0, false, bp);
+ if (IS_ERR(health->fw_reporter)) {
+ netdev_warn(bp->dev, "Failed to create FW health reporter, rc = %ld\n",
+ PTR_ERR(health->fw_reporter));
+ health->fw_reporter = NULL;
+ bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
+ return;
+ }
}
+ if (health->fw_fatal_reporter)
+ return;
+
health->fw_fatal_reporter =
devlink_health_reporter_create(bp->dl,
&bnxt_dl_fw_fatal_reporter_ops,
@@ -160,24 +175,35 @@ static void bnxt_dl_fw_reporters_create(struct bnxt *bp)
netdev_warn(bp->dev, "Failed to create FW fatal health reporter, rc = %ld\n",
PTR_ERR(health->fw_fatal_reporter));
health->fw_fatal_reporter = NULL;
+ bp->fw_cap &= ~BNXT_FW_CAP_ERROR_RECOVERY;
}
}
-static void bnxt_dl_fw_reporters_destroy(struct bnxt *bp)
+void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all)
{
struct bnxt_fw_health *health = bp->fw_health;
- if (!health)
+ if (!bp->dl || !health)
return;
- if (health->fw_reporter)
- devlink_health_reporter_destroy(health->fw_reporter);
-
- if (health->fw_reset_reporter)
+ if ((all || !(bp->fw_cap & BNXT_FW_CAP_HOT_RESET)) &&
+ health->fw_reset_reporter) {
devlink_health_reporter_destroy(health->fw_reset_reporter);
+ health->fw_reset_reporter = NULL;
+ }
- if (health->fw_fatal_reporter)
+ if ((bp->fw_cap & BNXT_FW_CAP_ERROR_RECOVERY) && !all)
+ return;
+
+ if (health->fw_reporter) {
+ devlink_health_reporter_destroy(health->fw_reporter);
+ health->fw_reporter = NULL;
+ }
+
+ if (health->fw_fatal_reporter) {
devlink_health_reporter_destroy(health->fw_fatal_reporter);
+ health->fw_fatal_reporter = NULL;
+ }
}
void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
@@ -185,9 +211,6 @@ void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event)
struct bnxt_fw_health *fw_health = bp->fw_health;
struct bnxt_fw_reporter_ctx fw_reporter_ctx;
- if (!fw_health)
- return;
-
fw_reporter_ctx.sp_event = event;
switch (event) {
case BNXT_FW_RESET_NOTIFY_SP_EVENT:
@@ -239,6 +262,16 @@ void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy)
health->fatal = false;
}
+void bnxt_dl_health_recovery_done(struct bnxt *bp)
+{
+ struct bnxt_fw_health *hlth = bp->fw_health;
+
+ if (hlth->fatal)
+ devlink_health_reporter_recovery_done(hlth->fw_fatal_reporter);
+ else
+ devlink_health_reporter_recovery_done(hlth->fw_reset_reporter);
+}
+
static const struct devlink_ops bnxt_dl_ops = {
#ifdef CONFIG_BNXT_SRIOV
.eswitch_mode_set = bnxt_dl_eswitch_mode_set,
@@ -247,6 +280,8 @@ static const struct devlink_ops bnxt_dl_ops = {
.flash_update = bnxt_dl_flash_update,
};
+static const struct devlink_ops bnxt_vf_dl_ops;
+
enum bnxt_dl_param_id {
BNXT_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
BNXT_DEVLINK_PARAM_ID_GRE_VER_CHECK,
@@ -460,7 +495,10 @@ int bnxt_dl_register(struct bnxt *bp)
return -ENOTSUPP;
}
- dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+ if (BNXT_PF(bp))
+ dl = devlink_alloc(&bnxt_dl_ops, sizeof(struct bnxt_dl));
+ else
+ dl = devlink_alloc(&bnxt_vf_dl_ops, sizeof(struct bnxt_dl));
if (!dl) {
netdev_warn(bp->dev, "devlink_alloc failed");
return -ENOMEM;
@@ -479,6 +517,9 @@ int bnxt_dl_register(struct bnxt *bp)
goto err_dl_free;
}
+ if (!BNXT_PF(bp))
+ return 0;
+
rc = devlink_params_register(dl, bnxt_dl_params,
ARRAY_SIZE(bnxt_dl_params));
if (rc) {
@@ -506,8 +547,6 @@ int bnxt_dl_register(struct bnxt *bp)
devlink_params_publish(dl);
- bnxt_dl_fw_reporters_create(bp);
-
return 0;
err_dl_port_unreg:
@@ -530,12 +569,14 @@ void bnxt_dl_unregister(struct bnxt *bp)
if (!dl)
return;
- bnxt_dl_fw_reporters_destroy(bp);
- devlink_port_params_unregister(&bp->dl_port, bnxt_dl_port_params,
- ARRAY_SIZE(bnxt_dl_port_params));
- devlink_port_unregister(&bp->dl_port);
- devlink_params_unregister(dl, bnxt_dl_params,
- ARRAY_SIZE(bnxt_dl_params));
+ if (BNXT_PF(bp)) {
+ devlink_port_params_unregister(&bp->dl_port,
+ bnxt_dl_port_params,
+ ARRAY_SIZE(bnxt_dl_port_params));
+ devlink_port_unregister(&bp->dl_port);
+ devlink_params_unregister(dl, bnxt_dl_params,
+ ARRAY_SIZE(bnxt_dl_params));
+ }
devlink_unregister(dl);
devlink_free(dl);
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
index 665d4bdcd8c0..08aaa4441c78 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.h
@@ -58,6 +58,9 @@ struct bnxt_dl_nvm_param {
void bnxt_devlink_health_report(struct bnxt *bp, unsigned long event);
void bnxt_dl_health_status_update(struct bnxt *bp, bool healthy);
+void bnxt_dl_health_recovery_done(struct bnxt *bp);
+void bnxt_dl_fw_reporters_create(struct bnxt *bp);
+void bnxt_dl_fw_reporters_destroy(struct bnxt *bp, bool all);
int bnxt_dl_register(struct bnxt *bp);
void bnxt_dl_unregister(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 2ccf79cdcb1e..08d56ec7b68a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -3071,8 +3071,15 @@ static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, int msg_len,
}
}
- if (info->dest_buf)
- memcpy(info->dest_buf + off, dma_buf, len);
+ if (info->dest_buf) {
+ if ((info->seg_start + off + len) <=
+ BNXT_COREDUMP_BUF_LEN(info->buf_len)) {
+ memcpy(info->dest_buf + off, dma_buf, len);
+ } else {
+ rc = -ENOBUFS;
+ break;
+ }
+ }
if (cmn_req->req_type ==
cpu_to_le16(HWRM_DBG_COREDUMP_RETRIEVE))
@@ -3126,7 +3133,7 @@ static int bnxt_hwrm_dbg_coredump_initiate(struct bnxt *bp, u16 component_id,
static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
u16 segment_id, u32 *seg_len,
- void *buf, u32 offset)
+ void *buf, u32 buf_len, u32 offset)
{
struct hwrm_dbg_coredump_retrieve_input req = {0};
struct bnxt_hwrm_dbg_dma_info info = {NULL};
@@ -3141,8 +3148,11 @@ static int bnxt_hwrm_dbg_coredump_retrieve(struct bnxt *bp, u16 component_id,
seq_no);
info.data_len_off = offsetof(struct hwrm_dbg_coredump_retrieve_output,
data_len);
- if (buf)
+ if (buf) {
info.dest_buf = buf + offset;
+ info.buf_len = buf_len;
+ info.seg_start = offset;
+ }
rc = bnxt_hwrm_dbg_dma_data(bp, &req, sizeof(req), &info);
if (!rc)
@@ -3232,14 +3242,17 @@ bnxt_fill_coredump_record(struct bnxt *bp, struct bnxt_coredump_record *record,
static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
{
u32 ver_get_resp_len = sizeof(struct hwrm_ver_get_output);
+ u32 offset = 0, seg_hdr_len, seg_record_len, buf_len = 0;
struct coredump_segment_record *seg_record = NULL;
- u32 offset = 0, seg_hdr_len, seg_record_len;
struct bnxt_coredump_segment_hdr seg_hdr;
struct bnxt_coredump coredump = {NULL};
time64_t start_time;
u16 start_utc;
int rc = 0, i;
+ if (buf)
+ buf_len = *dump_len;
+
start_time = ktime_get_real_seconds();
start_utc = sys_tz.tz_minuteswest * 60;
seg_hdr_len = sizeof(seg_hdr);
@@ -3272,6 +3285,12 @@ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
u32 duration = 0, seg_len = 0;
unsigned long start, end;
+ if (buf && ((offset + seg_hdr_len) >
+ BNXT_COREDUMP_BUF_LEN(buf_len))) {
+ rc = -ENOBUFS;
+ goto err;
+ }
+
start = jiffies;
rc = bnxt_hwrm_dbg_coredump_initiate(bp, comp_id, seg_id);
@@ -3284,9 +3303,11 @@ static int bnxt_get_coredump(struct bnxt *bp, void *buf, u32 *dump_len)
/* Write segment data into the buffer */
rc = bnxt_hwrm_dbg_coredump_retrieve(bp, comp_id, seg_id,
- &seg_len, buf,
+ &seg_len, buf, buf_len,
offset + seg_hdr_len);
- if (rc)
+ if (rc && rc == -ENOBUFS)
+ goto err;
+ else if (rc)
netdev_err(bp->dev,
"Failed to retrieve coredump for seg = %d\n",
seg_record->segment_id);
@@ -3316,7 +3337,8 @@ err:
rc);
kfree(coredump.data);
*dump_len += sizeof(struct bnxt_coredump_record);
-
+ if (rc == -ENOBUFS)
+ netdev_err(bp->dev, "Firmware returned large coredump buffer");
return rc;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
index 4428d0abcbc1..3576d951727b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
@@ -31,6 +31,8 @@ struct bnxt_coredump {
u16 total_segs;
};
+#define BNXT_COREDUMP_BUF_LEN(len) ((len) - sizeof(struct bnxt_coredump_record))
+
struct bnxt_hwrm_dbg_dma_info {
void *dest_buf;
int dest_buf_size;
@@ -38,6 +40,8 @@ struct bnxt_hwrm_dbg_dma_info {
u16 seq_off;
u16 data_len_off;
u16 segs;
+ u32 seg_start;
+ u32 buf_len;
};
struct hwrm_dbg_cmn_input {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index c601ff7b8f61..4a316c4b3fa8 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -113,8 +113,10 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
{
struct net_device *dev = edev->net;
struct bnxt *bp = netdev_priv(dev);
+ struct bnxt_hw_resc *hw_resc;
int max_idx, max_cp_rings;
int avail_msix, idx;
+ int total_vecs;
int rc = 0;
ASSERT_RTNL();
@@ -142,7 +144,10 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
}
edev->ulp_tbl[ulp_id].msix_base = idx;
edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
- if (bp->total_irqs < (idx + avail_msix)) {
+ hw_resc = &bp->hw_resc;
+ total_vecs = idx + avail_msix;
+ if (bp->total_irqs < total_vecs ||
+ (BNXT_NEW_RM(bp) && hw_resc->resv_irqs < total_vecs)) {
if (netif_running(dev)) {
bnxt_close_nic(bp, true, false);
rc = bnxt_open_nic(bp, true, false);
@@ -156,7 +161,6 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
}
if (BNXT_NEW_RM(bp)) {
- struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
int resv_msix;
resv_msix = hw_resc->resv_irqs - bp->cp_nr_rings;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 120fa05a39ff..3ee7917e3fc0 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2,7 +2,7 @@
/*
* Broadcom GENET (Gigabit Ethernet) controller driver
*
- * Copyright (c) 2014-2017 Broadcom
+ * Copyright (c) 2014-2019 Broadcom
*/
#define pr_fmt(fmt) "bcmgenet: " fmt
@@ -508,8 +508,8 @@ static int bcmgenet_set_link_ksettings(struct net_device *dev,
return phy_ethtool_ksettings_set(dev->phydev, cmd);
}
-static int bcmgenet_set_rx_csum(struct net_device *dev,
- netdev_features_t wanted)
+static void bcmgenet_set_rx_csum(struct net_device *dev,
+ netdev_features_t wanted)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
u32 rbuf_chk_ctrl;
@@ -521,7 +521,7 @@ static int bcmgenet_set_rx_csum(struct net_device *dev,
/* enable rx checksumming */
if (rx_csum_en)
- rbuf_chk_ctrl |= RBUF_RXCHK_EN;
+ rbuf_chk_ctrl |= RBUF_RXCHK_EN | RBUF_L3_PARSE_DIS;
else
rbuf_chk_ctrl &= ~RBUF_RXCHK_EN;
priv->desc_rxchk_en = rx_csum_en;
@@ -535,12 +535,10 @@ static int bcmgenet_set_rx_csum(struct net_device *dev,
rbuf_chk_ctrl &= ~RBUF_SKIP_FCS;
bcmgenet_rbuf_writel(priv, rbuf_chk_ctrl, RBUF_CHK_CTRL);
-
- return 0;
}
-static int bcmgenet_set_tx_csum(struct net_device *dev,
- netdev_features_t wanted)
+static void bcmgenet_set_tx_csum(struct net_device *dev,
+ netdev_features_t wanted)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
bool desc_64b_en;
@@ -549,7 +547,7 @@ static int bcmgenet_set_tx_csum(struct net_device *dev,
tbuf_ctrl = bcmgenet_tbuf_ctrl_get(priv);
rbuf_ctrl = bcmgenet_rbuf_readl(priv, RBUF_CTRL);
- desc_64b_en = !!(wanted & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM));
+ desc_64b_en = !!(wanted & NETIF_F_HW_CSUM);
/* enable 64 bytes descriptor in both directions (RBUF and TBUF) */
if (desc_64b_en) {
@@ -563,21 +561,27 @@ static int bcmgenet_set_tx_csum(struct net_device *dev,
bcmgenet_tbuf_ctrl_set(priv, tbuf_ctrl);
bcmgenet_rbuf_writel(priv, rbuf_ctrl, RBUF_CTRL);
-
- return 0;
}
static int bcmgenet_set_features(struct net_device *dev,
netdev_features_t features)
{
- netdev_features_t changed = features ^ dev->features;
- netdev_features_t wanted = dev->wanted_features;
- int ret = 0;
+ struct bcmgenet_priv *priv = netdev_priv(dev);
+ u32 reg;
+ int ret;
- if (changed & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))
- ret = bcmgenet_set_tx_csum(dev, wanted);
- if (changed & (NETIF_F_RXCSUM))
- ret = bcmgenet_set_rx_csum(dev, wanted);
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;
+
+ /* Make sure we reflect the value of CRC_CMD_FWD */
+ reg = bcmgenet_umac_readl(priv, UMAC_CMD);
+ priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
+
+ bcmgenet_set_tx_csum(dev, features);
+ bcmgenet_set_rx_csum(dev, features);
+
+ clk_disable_unprepare(priv->clk);
return ret;
}
@@ -857,6 +861,9 @@ static const struct bcmgenet_stats bcmgenet_gstrings_stats[] = {
STAT_GENET_SOFT_MIB("alloc_rx_buff_failed", mib.alloc_rx_buff_failed),
STAT_GENET_SOFT_MIB("rx_dma_failed", mib.rx_dma_failed),
STAT_GENET_SOFT_MIB("tx_dma_failed", mib.tx_dma_failed),
+ STAT_GENET_SOFT_MIB("tx_realloc_tsb", mib.tx_realloc_tsb),
+ STAT_GENET_SOFT_MIB("tx_realloc_tsb_failed",
+ mib.tx_realloc_tsb_failed),
/* Per TX queues */
STAT_GENET_Q(0),
STAT_GENET_Q(1),
@@ -1483,6 +1490,7 @@ static void bcmgenet_tx_reclaim_all(struct net_device *dev)
static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
struct sk_buff *skb)
{
+ struct bcmgenet_priv *priv = netdev_priv(dev);
struct status_64 *status = NULL;
struct sk_buff *new_skb;
u16 offset;
@@ -1495,12 +1503,15 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
* enough headroom for us to insert 64B status block.
*/
new_skb = skb_realloc_headroom(skb, sizeof(*status));
- dev_kfree_skb(skb);
if (!new_skb) {
+ dev_kfree_skb_any(skb);
+ priv->mib.tx_realloc_tsb_failed++;
dev->stats.tx_dropped++;
return NULL;
}
+ dev_consume_skb_any(skb);
skb = new_skb;
+ priv->mib.tx_realloc_tsb++;
}
skb_push(skb, sizeof(*status));
@@ -1516,24 +1527,19 @@ static struct sk_buff *bcmgenet_put_tx_csum(struct net_device *dev,
ip_proto = ipv6_hdr(skb)->nexthdr;
break;
default:
- return skb;
+ /* don't use UDP flag */
+ ip_proto = 0;
+ break;
}
offset = skb_checksum_start_offset(skb) - sizeof(*status);
tx_csum_info = (offset << STATUS_TX_CSUM_START_SHIFT) |
- (offset + skb->csum_offset);
+ (offset + skb->csum_offset) |
+ STATUS_TX_CSUM_LV;
- /* Set the length valid bit for TCP and UDP and just set
- * the special UDP flag for IPv4, else just set to 0.
- */
- if (ip_proto == IPPROTO_TCP || ip_proto == IPPROTO_UDP) {
- tx_csum_info |= STATUS_TX_CSUM_LV;
- if (ip_proto == IPPROTO_UDP &&
- ip_ver == htons(ETH_P_IP))
- tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP;
- } else {
- tx_csum_info = 0;
- }
+ /* Set the special UDP flag for UDP */
+ if (ip_proto == IPPROTO_UDP)
+ tx_csum_info |= STATUS_TX_CSUM_PROTO_UDP;
status->tx_csum_info = tx_csum_info;
}
@@ -1744,7 +1750,6 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
unsigned int bytes_processed = 0;
unsigned int p_index, mask;
unsigned int discards;
- unsigned int chksum_ok = 0;
/* Clear status before servicing to reduce spurious interrupts */
if (ring->index == DESC_INDEX) {
@@ -1795,9 +1800,15 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
dmadesc_get_length_status(priv, cb->bd_addr);
} else {
struct status_64 *status;
+ __be16 rx_csum;
status = (struct status_64 *)skb->data;
dma_length_status = status->length_status;
+ rx_csum = (__force __be16)(status->rx_csum & 0xffff);
+ if (priv->desc_rxchk_en) {
+ skb->csum = (__force __wsum)ntohs(rx_csum);
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ }
}
/* DMA flags and length are still valid no matter how
@@ -1840,18 +1851,12 @@ static unsigned int bcmgenet_desc_rx(struct bcmgenet_rx_ring *ring,
goto next;
} /* error packet */
- chksum_ok = (dma_flag & priv->dma_rx_chk_bit) &&
- priv->desc_rxchk_en;
-
skb_put(skb, len);
if (priv->desc_64b_en) {
skb_pull(skb, 64);
len -= 64;
}
- if (likely(chksum_ok))
- skb->ip_summed = CHECKSUM_UNNECESSARY;
-
/* remove hardware 2bytes added for IP alignment */
skb_pull(skb, 2);
len -= 2;
@@ -2886,9 +2891,10 @@ static int bcmgenet_open(struct net_device *dev)
init_umac(priv);
- /* Make sure we reflect the value of CRC_CMD_FWD */
- reg = bcmgenet_umac_readl(priv, UMAC_CMD);
- priv->crc_fwd_en = !!(reg & CMD_CRC_FWD);
+ /* Apply features again in case we changed them while interface was
+ * down
+ */
+ bcmgenet_set_features(dev, dev->features);
bcmgenet_set_hw_addr(priv, dev->dev_addr);
@@ -3055,7 +3061,7 @@ static void bcmgenet_dump_tx_queue(struct bcmgenet_tx_ring *ring)
ring->cb_ptr, ring->end_ptr);
}
-static void bcmgenet_timeout(struct net_device *dev)
+static void bcmgenet_timeout(struct net_device *dev, unsigned int txqueue)
{
struct bcmgenet_priv *priv = netdev_priv(dev);
u32 int0_enable = 0;
@@ -3327,19 +3333,15 @@ static void bcmgenet_set_hw_params(struct bcmgenet_priv *priv)
if (GENET_IS_V5(priv) || GENET_IS_V4(priv)) {
bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus;
genet_dma_ring_regs = genet_dma_ring_regs_v4;
- priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS;
} else if (GENET_IS_V3(priv)) {
bcmgenet_dma_regs = bcmgenet_dma_regs_v3plus;
genet_dma_ring_regs = genet_dma_ring_regs_v123;
- priv->dma_rx_chk_bit = DMA_RX_CHK_V3PLUS;
} else if (GENET_IS_V2(priv)) {
bcmgenet_dma_regs = bcmgenet_dma_regs_v2;
genet_dma_ring_regs = genet_dma_ring_regs_v123;
- priv->dma_rx_chk_bit = DMA_RX_CHK_V12;
} else if (GENET_IS_V1(priv)) {
bcmgenet_dma_regs = bcmgenet_dma_regs_v1;
genet_dma_ring_regs = genet_dma_ring_regs_v123;
- priv->dma_rx_chk_bit = DMA_RX_CHK_V12;
}
/* enum genet_version starts at 1 */
@@ -3535,9 +3537,11 @@ static int bcmgenet_probe(struct platform_device *pdev)
priv->msg_enable = netif_msg_init(-1, GENET_MSG_DEFAULT);
- /* Set hardware features */
- dev->hw_features |= NETIF_F_SG | NETIF_F_IP_CSUM |
- NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM;
+ /* Set default features */
+ dev->features |= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM |
+ NETIF_F_RXCSUM;
+ dev->hw_features |= dev->features;
+ dev->vlan_features |= dev->features;
/* Request the WOL interrupt and advertise suspend if available */
priv->wol_irq_disabled = true;
@@ -3574,6 +3578,14 @@ static int bcmgenet_probe(struct platform_device *pdev)
bcmgenet_set_hw_params(priv);
+ err = -EIO;
+ if (priv->hw_params->flags & GENET_HAS_40BITS)
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(40));
+ if (err)
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ if (err)
+ goto err;
+
/* Mii wait queue */
init_waitqueue_head(&priv->wq);
/* Always use RX_BUF_LENGTH (2KB) buffer for all chips */
@@ -3689,6 +3701,9 @@ static int bcmgenet_resume(struct device *d)
genphy_config_aneg(dev->phydev);
bcmgenet_mii_config(priv->dev, false);
+ /* Restore enabled features */
+ bcmgenet_set_features(dev, dev->features);
+
bcmgenet_set_hw_addr(priv, dev->dev_addr);
if (priv->internal_phy) {
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index a5659197598f..61a6fe9f4cec 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -144,6 +144,8 @@ struct bcmgenet_mib_counters {
u32 alloc_rx_buff_failed;
u32 rx_dma_failed;
u32 tx_dma_failed;
+ u32 tx_realloc_tsb;
+ u32 tx_realloc_tsb_failed;
};
#define UMAC_HD_BKP_CTRL 0x004
@@ -251,6 +253,7 @@ struct bcmgenet_mib_counters {
#define RBUF_CHK_CTRL 0x14
#define RBUF_RXCHK_EN (1 << 0)
#define RBUF_SKIP_FCS (1 << 4)
+#define RBUF_L3_PARSE_DIS (1 << 5)
#define RBUF_ENERGY_CTRL 0x9c
#define RBUF_EEE_EN (1 << 0)
@@ -663,7 +666,6 @@ struct bcmgenet_priv {
bool desc_rxchk_en;
bool crc_fwd_en;
- unsigned int dma_rx_chk_bit;
u32 dma_max_burst_length;
u32 msg_enable;
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index 1604ad32e920..80ff52527233 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -294,7 +294,7 @@ static int sbmac_set_duplex(struct sbmac_softc *s, enum sbmac_duplex duplex,
enum sbmac_fc fc);
static int sbmac_open(struct net_device *dev);
-static void sbmac_tx_timeout (struct net_device *dev);
+static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue);
static void sbmac_set_rx_mode(struct net_device *dev);
static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static int sbmac_close(struct net_device *dev);
@@ -2419,7 +2419,7 @@ static void sbmac_mii_poll(struct net_device *dev)
}
-static void sbmac_tx_timeout (struct net_device *dev)
+static void sbmac_tx_timeout (struct net_device *dev, unsigned int txqueue)
{
struct sbmac_softc *sc = netdev_priv(dev);
unsigned long flags;
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ca3aa1250dd1..88466255bf66 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -7645,7 +7645,7 @@ static void tg3_poll_controller(struct net_device *dev)
}
#endif
-static void tg3_tx_timeout(struct net_device *dev)
+static void tg3_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct tg3 *tp = netdev_priv(dev);
@@ -7874,8 +7874,8 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *, struct net_device *);
static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
struct netdev_queue *txq, struct sk_buff *skb)
{
- struct sk_buff *segs, *nskb;
u32 frag_cnt_est = skb_shinfo(skb)->gso_segs * 3;
+ struct sk_buff *segs, *seg, *next;
/* Estimate the number of fragments in the worst case */
if (unlikely(tg3_tx_avail(tnapi) <= frag_cnt_est)) {
@@ -7898,12 +7898,10 @@ static int tg3_tso_bug(struct tg3 *tp, struct tg3_napi *tnapi,
if (IS_ERR(segs) || !segs)
goto tg3_tso_bug_end;
- do {
- nskb = segs;
- segs = segs->next;
- nskb->next = NULL;
- tg3_start_xmit(nskb, tp->dev);
- } while (segs);
+ skb_list_walk_safe(segs, seg, next) {
+ skb_mark_not_on_list(seg);
+ tg3_start_xmit(seg, tp->dev);
+ }
tg3_tso_bug_end:
dev_consume_skb_any(skb);
diff --git a/drivers/net/ethernet/brocade/bna/bfa_ioc.c b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
index 4042c2185e98..e17bfc87da90 100644
--- a/drivers/net/ethernet/brocade/bna/bfa_ioc.c
+++ b/drivers/net/ethernet/brocade/bna/bfa_ioc.c
@@ -1124,11 +1124,10 @@ bfa_nw_ioc_sem_release(void __iomem *sem_reg)
static void
bfa_ioc_fwver_clear(struct bfa_ioc *ioc)
{
- u32 pgnum, pgoff, loff = 0;
+ u32 pgnum, loff = 0;
int i;
pgnum = PSS_SMEM_PGNUM(ioc->ioc_regs.smem_pg0, loff);
- pgoff = PSS_SMEM_PGOFF(loff);
writel(pgnum, ioc->ioc_regs.host_page_num_fn);
for (i = 0; i < (sizeof(struct bfi_ioc_image_hdr) / sizeof(u32)); i++) {
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 19fe4f4867c7..dbf7070fcdba 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -630,10 +630,17 @@
#define GEM_CLK_DIV96 5
/* Constants for MAN register */
-#define MACB_MAN_SOF 1
-#define MACB_MAN_WRITE 1
-#define MACB_MAN_READ 2
-#define MACB_MAN_CODE 2
+#define MACB_MAN_C22_SOF 1
+#define MACB_MAN_C22_WRITE 1
+#define MACB_MAN_C22_READ 2
+#define MACB_MAN_C22_CODE 2
+
+#define MACB_MAN_C45_SOF 0
+#define MACB_MAN_C45_ADDR 0
+#define MACB_MAN_C45_WRITE 1
+#define MACB_MAN_C45_POST_READ_INCR 2
+#define MACB_MAN_C45_READ 3
+#define MACB_MAN_C45_CODE 2
/* Capability mask bits */
#define MACB_CAPS_ISR_CLEAR_ON_WRITE 0x00000001
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 9c767ee252ac..1c547ee0d444 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -337,11 +337,30 @@ static int macb_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
if (status < 0)
goto mdio_read_exit;
- macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
- | MACB_BF(RW, MACB_MAN_READ)
- | MACB_BF(PHYA, mii_id)
- | MACB_BF(REGA, regnum)
- | MACB_BF(CODE, MACB_MAN_CODE)));
+ if (regnum & MII_ADDR_C45) {
+ macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+ | MACB_BF(RW, MACB_MAN_C45_ADDR)
+ | MACB_BF(PHYA, mii_id)
+ | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+ | MACB_BF(DATA, regnum & 0xFFFF)
+ | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+
+ status = macb_mdio_wait_for_idle(bp);
+ if (status < 0)
+ goto mdio_read_exit;
+
+ macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+ | MACB_BF(RW, MACB_MAN_C45_READ)
+ | MACB_BF(PHYA, mii_id)
+ | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+ | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+ } else {
+ macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C22_SOF)
+ | MACB_BF(RW, MACB_MAN_C22_READ)
+ | MACB_BF(PHYA, mii_id)
+ | MACB_BF(REGA, regnum)
+ | MACB_BF(CODE, MACB_MAN_C22_CODE)));
+ }
status = macb_mdio_wait_for_idle(bp);
if (status < 0)
@@ -370,12 +389,32 @@ static int macb_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
if (status < 0)
goto mdio_write_exit;
- macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_SOF)
- | MACB_BF(RW, MACB_MAN_WRITE)
- | MACB_BF(PHYA, mii_id)
- | MACB_BF(REGA, regnum)
- | MACB_BF(CODE, MACB_MAN_CODE)
- | MACB_BF(DATA, value)));
+ if (regnum & MII_ADDR_C45) {
+ macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+ | MACB_BF(RW, MACB_MAN_C45_ADDR)
+ | MACB_BF(PHYA, mii_id)
+ | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+ | MACB_BF(DATA, regnum & 0xFFFF)
+ | MACB_BF(CODE, MACB_MAN_C45_CODE)));
+
+ status = macb_mdio_wait_for_idle(bp);
+ if (status < 0)
+ goto mdio_write_exit;
+
+ macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C45_SOF)
+ | MACB_BF(RW, MACB_MAN_C45_WRITE)
+ | MACB_BF(PHYA, mii_id)
+ | MACB_BF(REGA, (regnum >> 16) & 0x1F)
+ | MACB_BF(CODE, MACB_MAN_C45_CODE)
+ | MACB_BF(DATA, value)));
+ } else {
+ macb_writel(bp, MAN, (MACB_BF(SOF, MACB_MAN_C22_SOF)
+ | MACB_BF(RW, MACB_MAN_C22_WRITE)
+ | MACB_BF(PHYA, mii_id)
+ | MACB_BF(REGA, regnum)
+ | MACB_BF(CODE, MACB_MAN_C22_CODE)
+ | MACB_BF(DATA, value)));
+ }
status = macb_mdio_wait_for_idle(bp);
if (status < 0)
@@ -664,9 +703,30 @@ static int macb_mii_probe(struct net_device *dev)
return 0;
}
+static int macb_mdiobus_register(struct macb *bp)
+{
+ struct device_node *child, *np = bp->pdev->dev.of_node;
+
+ /* Only create the PHY from the device tree if at least one PHY is
+ * described. Otherwise scan the entire MDIO bus. We do this to support
+ * old device tree that did not follow the best practices and did not
+ * describe their network PHYs.
+ */
+ for_each_available_child_of_node(np, child)
+ if (of_mdiobus_child_is_phy(child)) {
+ /* The loop increments the child refcount,
+ * decrement it before returning.
+ */
+ of_node_put(child);
+
+ return of_mdiobus_register(bp->mii_bus, np);
+ }
+
+ return mdiobus_register(bp->mii_bus);
+}
+
static int macb_mii_init(struct macb *bp)
{
- struct device_node *np;
int err = -ENXIO;
/* Enable management port */
@@ -688,9 +748,7 @@ static int macb_mii_init(struct macb *bp)
dev_set_drvdata(&bp->dev->dev, bp->mii_bus);
- np = bp->pdev->dev.of_node;
-
- err = of_mdiobus_register(bp->mii_bus, np);
+ err = macb_mdiobus_register(bp);
if (err)
goto err_out_free_mdiobus;
@@ -4069,7 +4127,7 @@ static int fu540_c000_clk_init(struct platform_device *pdev, struct clk **pclk,
mgmt->rate = 0;
mgmt->hw.init = &init;
- *tx_clk = clk_register(NULL, &mgmt->hw);
+ *tx_clk = devm_clk_register(&pdev->dev, &mgmt->hw);
if (IS_ERR(*tx_clk))
return PTR_ERR(*tx_clk);
@@ -4397,7 +4455,6 @@ err_out_free_netdev:
err_disable_clocks:
clk_disable_unprepare(tx_clk);
- clk_unregister(tx_clk);
clk_disable_unprepare(hclk);
clk_disable_unprepare(pclk);
clk_disable_unprepare(rx_clk);
@@ -4427,7 +4484,6 @@ static int macb_remove(struct platform_device *pdev)
pm_runtime_dont_use_autosuspend(&pdev->dev);
if (!pm_runtime_suspended(&pdev->dev)) {
clk_disable_unprepare(bp->tx_clk);
- clk_unregister(bp->tx_clk);
clk_disable_unprepare(bp->hclk);
clk_disable_unprepare(bp->pclk);
clk_disable_unprepare(bp->rx_clk);
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index af04a2c81adb..05a3d067c3fc 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1251,7 +1251,7 @@ static int xgmac_poll(struct napi_struct *napi, int budget)
* netdev structure and arrange for the device to be reset to a sane state
* in order to transmit a new packet.
*/
-static void xgmac_tx_timeout(struct net_device *dev)
+static void xgmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct xgmac_priv *priv = netdev_priv(dev);
schedule_work(&priv->tx_timeout_work);
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index 7f3b2e3b0868..eab05b5534ea 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -2562,7 +2562,7 @@ lio_xmit_failed:
/** \brief Network device Tx timeout
* @param netdev pointer to network device
*/
-static void liquidio_tx_timeout(struct net_device *netdev)
+static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct lio *lio;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 370d76822ee0..7a77544a54f5 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -1628,7 +1628,7 @@ lio_xmit_failed:
/** \brief Network device Tx timeout
* @param netdev pointer to network device
*/
-static void liquidio_tx_timeout(struct net_device *netdev)
+static void liquidio_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct lio *lio;
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
index f3f2e71431ac..600de587d7a9 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_rep.c
@@ -31,7 +31,7 @@ static int lio_vf_rep_open(struct net_device *ndev);
static int lio_vf_rep_stop(struct net_device *ndev);
static netdev_tx_t lio_vf_rep_pkt_xmit(struct sk_buff *skb,
struct net_device *ndev);
-static void lio_vf_rep_tx_timeout(struct net_device *netdev);
+static void lio_vf_rep_tx_timeout(struct net_device *netdev, unsigned int txqueue);
static int lio_vf_rep_phys_port_name(struct net_device *dev,
char *buf, size_t len);
static void lio_vf_rep_get_stats64(struct net_device *dev,
@@ -172,7 +172,7 @@ lio_vf_rep_stop(struct net_device *ndev)
}
static void
-lio_vf_rep_tx_timeout(struct net_device *ndev)
+lio_vf_rep_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
netif_trans_update(ndev);
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index 0cc2338d8d2a..dfc77507b159 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -205,11 +205,11 @@ static int __cvmx_bootmem_check_version(struct octeon_device *oct,
major_version = (u32)__cvmx_bootmem_desc_get(
oct, oct->bootmem_desc_addr,
offsetof(struct cvmx_bootmem_desc, major_version),
- FIELD_SIZEOF(struct cvmx_bootmem_desc, major_version));
+ sizeof_field(struct cvmx_bootmem_desc, major_version));
minor_version = (u32)__cvmx_bootmem_desc_get(
oct, oct->bootmem_desc_addr,
offsetof(struct cvmx_bootmem_desc, minor_version),
- FIELD_SIZEOF(struct cvmx_bootmem_desc, minor_version));
+ sizeof_field(struct cvmx_bootmem_desc, minor_version));
dev_dbg(&oct->pci_dev->dev, "%s: major_version=%d\n", __func__,
major_version);
@@ -237,13 +237,13 @@ static const struct cvmx_bootmem_named_block_desc
oct, named_addr,
offsetof(struct cvmx_bootmem_named_block_desc,
base_addr),
- FIELD_SIZEOF(
+ sizeof_field(
struct cvmx_bootmem_named_block_desc,
base_addr));
desc->size = __cvmx_bootmem_desc_get(oct, named_addr,
offsetof(struct cvmx_bootmem_named_block_desc,
size),
- FIELD_SIZEOF(
+ sizeof_field(
struct cvmx_bootmem_named_block_desc,
size));
@@ -268,20 +268,20 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
oct, oct->bootmem_desc_addr,
offsetof(struct cvmx_bootmem_desc,
named_block_array_addr),
- FIELD_SIZEOF(struct cvmx_bootmem_desc,
+ sizeof_field(struct cvmx_bootmem_desc,
named_block_array_addr));
u32 num_blocks = (u32)__cvmx_bootmem_desc_get(
oct, oct->bootmem_desc_addr,
offsetof(struct cvmx_bootmem_desc,
nb_num_blocks),
- FIELD_SIZEOF(struct cvmx_bootmem_desc,
+ sizeof_field(struct cvmx_bootmem_desc,
nb_num_blocks));
u32 name_length = (u32)__cvmx_bootmem_desc_get(
oct, oct->bootmem_desc_addr,
offsetof(struct cvmx_bootmem_desc,
named_block_name_len),
- FIELD_SIZEOF(struct cvmx_bootmem_desc,
+ sizeof_field(struct cvmx_bootmem_desc,
named_block_name_len));
u64 named_addr = named_block_array_addr;
@@ -292,7 +292,7 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
offsetof(
struct cvmx_bootmem_named_block_desc,
size),
- FIELD_SIZEOF(
+ sizeof_field(
struct cvmx_bootmem_named_block_desc,
size));
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index f28409279ea4..016957285f99 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1741,7 +1741,7 @@ static void nicvf_get_stats64(struct net_device *netdev,
}
-static void nicvf_tx_timeout(struct net_device *dev)
+static void nicvf_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct nicvf *nic = netdev_priv(dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index a70ac2097892..8b7d156f79d3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -56,6 +56,7 @@
#include <asm/io.h>
#include "t4_chip_type.h"
#include "cxgb4_uld.h"
+#include "t4fw_api.h"
#define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
extern struct list_head adapter_list;
@@ -68,6 +69,16 @@ extern struct mutex uld_mutex;
#define ETHTXQ_STOP_THRES \
(1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
+#define FW_PARAM_DEV(param) \
+ (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param))
+
+#define FW_PARAM_PFVF(param) \
+ (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
+ FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param) | \
+ FW_PARAMS_PARAM_Y_V(0) | \
+ FW_PARAMS_PARAM_Z_V(0))
+
enum {
MAX_NPORTS = 4, /* max # of ports */
SERNUM_LEN = 24, /* Serial # length */
@@ -504,6 +515,7 @@ struct link_config {
enum cc_pause requested_fc; /* flow control user has requested */
enum cc_pause fc; /* actual link flow control */
+ enum cc_pause advertised_fc; /* actual advertised flow control */
enum cc_fec requested_fec; /* Forward Error Correction: */
enum cc_fec fec; /* requested and actual in use */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 93868dca186a..ee3aab563b1d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3048,6 +3048,9 @@ static int sge_queue_entries(const struct adapter *adap)
int tot_uld_entries = 0;
int i;
+ if (!is_uld(adap))
+ goto lld_only;
+
mutex_lock(&uld_mutex);
for (i = 0; i < CXGB4_TX_MAX; i++)
tot_uld_entries += sge_qinfo_uld_txq_entries(adap, i);
@@ -3058,6 +3061,7 @@ static int sge_queue_entries(const struct adapter *adap)
}
mutex_unlock(&uld_mutex);
+lld_only:
return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
(adap->sge.eohw_txq ? DIV_ROUND_UP(adap->sge.eoqsets, 4) : 0) +
tot_uld_entries +
@@ -3171,14 +3175,12 @@ static const struct file_operations mem_debugfs_fops = {
static int tid_info_show(struct seq_file *seq, void *v)
{
- unsigned int tid_start = 0;
struct adapter *adap = seq->private;
- const struct tid_info *t = &adap->tids;
- enum chip_type chip = CHELSIO_CHIP_VERSION(adap->params.chip);
-
- if (chip > CHELSIO_T5)
- tid_start = t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A);
+ const struct tid_info *t;
+ enum chip_type chip;
+ t = &adap->tids;
+ chip = CHELSIO_CHIP_VERSION(adap->params.chip);
if (t4_read_reg(adap, LE_DB_CONFIG_A) & HASHEN_F) {
unsigned int sb;
seq_printf(seq, "Connections in use: %u\n",
@@ -3190,9 +3192,9 @@ static int tid_info_show(struct seq_file *seq, void *v)
sb = t4_read_reg(adap, LE_DB_SRVR_START_INDEX_A);
if (sb) {
- seq_printf(seq, "TID range: %u..%u/%u..%u", tid_start,
+ seq_printf(seq, "TID range: %u..%u/%u..%u", t->tid_base,
sb - 1, adap->tids.hash_base,
- t->ntids - 1);
+ t->tid_base + t->ntids - 1);
seq_printf(seq, ", in use: %u/%u\n",
atomic_read(&t->tids_in_use),
atomic_read(&t->hash_tids_in_use));
@@ -3201,14 +3203,14 @@ static int tid_info_show(struct seq_file *seq, void *v)
t->aftid_base,
t->aftid_end,
adap->tids.hash_base,
- t->ntids - 1);
+ t->tid_base + t->ntids - 1);
seq_printf(seq, ", in use: %u/%u\n",
atomic_read(&t->tids_in_use),
atomic_read(&t->hash_tids_in_use));
} else {
seq_printf(seq, "TID range: %u..%u",
adap->tids.hash_base,
- t->ntids - 1);
+ t->tid_base + t->ntids - 1);
seq_printf(seq, ", in use: %u\n",
atomic_read(&t->hash_tids_in_use));
}
@@ -3216,8 +3218,8 @@ static int tid_info_show(struct seq_file *seq, void *v)
seq_printf(seq, "Connections in use: %u\n",
atomic_read(&t->conns_in_use));
- seq_printf(seq, "TID range: %u..%u", tid_start,
- tid_start + t->ntids - 1);
+ seq_printf(seq, "TID range: %u..%u", t->tid_base,
+ t->tid_base + t->ntids - 1);
seq_printf(seq, ", in use: %u\n",
atomic_read(&t->tids_in_use));
}
@@ -3240,6 +3242,9 @@ static int tid_info_show(struct seq_file *seq, void *v)
seq_printf(seq, "SFTID range: %u..%u in use: %u\n",
t->sftid_base, t->sftid_base + t->nsftids - 2,
t->sftids_in_use);
+ if (t->nhpftids)
+ seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base,
+ t->hpftid_base + t->nhpftids - 1);
if (t->ntids)
seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n",
t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A),
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
index 20ab3b6285a2..c837382ee522 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c
@@ -807,8 +807,8 @@ static void get_pauseparam(struct net_device *dev,
struct port_info *p = netdev_priv(dev);
epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
- epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0;
- epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0;
+ epause->rx_pause = (p->link_cfg.advertised_fc & PAUSE_RX) != 0;
+ epause->tx_pause = (p->link_cfg.advertised_fc & PAUSE_TX) != 0;
}
static int set_pauseparam(struct net_device *dev,
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 1d39fca11810..2a2938bbb93a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -361,20 +361,22 @@ static int get_filter_count(struct adapter *adapter, unsigned int fidx,
tcb_base = t4_read_reg(adapter, TP_CMM_TCB_BASE_A);
if (is_hashfilter(adapter) && hash) {
- if (fidx < adapter->tids.ntids) {
- f = adapter->tids.tid_tab[fidx];
- if (!f)
- return -EINVAL;
- } else {
+ if (tid_out_of_range(&adapter->tids, fidx))
return -E2BIG;
- }
+ f = adapter->tids.tid_tab[fidx - adapter->tids.tid_base];
+ if (!f)
+ return -EINVAL;
} else {
- if ((fidx != (adapter->tids.nftids +
- adapter->tids.nsftids - 1)) &&
- fidx >= adapter->tids.nftids)
+ if ((fidx != (adapter->tids.nftids + adapter->tids.nsftids +
+ adapter->tids.nhpftids - 1)) &&
+ fidx >= (adapter->tids.nftids + adapter->tids.nhpftids))
return -E2BIG;
- f = &adapter->tids.ftid_tab[fidx];
+ if (fidx < adapter->tids.nhpftids)
+ f = &adapter->tids.hpftid_tab[fidx];
+ else
+ f = &adapter->tids.ftid_tab[fidx -
+ adapter->tids.nhpftids];
if (!f->valid)
return -EINVAL;
}
@@ -480,6 +482,7 @@ int cxgb4_get_free_ftid(struct net_device *dev, int family)
ftid -= n;
}
spin_unlock_bh(&t->ftid_lock);
+ ftid += t->nhpftids;
return found ? ftid : -ENOMEM;
}
@@ -507,6 +510,24 @@ static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family,
return 0;
}
+static int cxgb4_set_hpftid(struct tid_info *t, int fidx, int family)
+{
+ spin_lock_bh(&t->ftid_lock);
+
+ if (test_bit(fidx, t->hpftid_bmap)) {
+ spin_unlock_bh(&t->ftid_lock);
+ return -EBUSY;
+ }
+
+ if (family == PF_INET)
+ __set_bit(fidx, t->hpftid_bmap);
+ else
+ bitmap_allocate_region(t->hpftid_bmap, fidx, 1);
+
+ spin_unlock_bh(&t->ftid_lock);
+ return 0;
+}
+
static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family,
unsigned int chip_ver)
{
@@ -522,33 +543,58 @@ static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family,
spin_unlock_bh(&t->ftid_lock);
}
+static void cxgb4_clear_hpftid(struct tid_info *t, int fidx, int family)
+{
+ spin_lock_bh(&t->ftid_lock);
+
+ if (family == PF_INET)
+ __clear_bit(fidx, t->hpftid_bmap);
+ else
+ bitmap_release_region(t->hpftid_bmap, fidx, 1);
+
+ spin_unlock_bh(&t->ftid_lock);
+}
+
bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio)
{
+ struct filter_entry *prev_fe, *next_fe, *tab;
struct adapter *adap = netdev2adap(dev);
- struct filter_entry *prev_fe, *next_fe;
+ u32 prev_ftid, next_ftid, max_tid;
struct tid_info *t = &adap->tids;
- u32 prev_ftid, next_ftid;
+ unsigned long *bmap;
bool valid = true;
+ if (idx < t->nhpftids) {
+ bmap = t->hpftid_bmap;
+ tab = t->hpftid_tab;
+ max_tid = t->nhpftids;
+ } else {
+ idx -= t->nhpftids;
+ bmap = t->ftid_bmap;
+ tab = t->ftid_tab;
+ max_tid = t->nftids;
+ }
+
/* Only insert the rule if both of the following conditions
* are met:
* 1. The immediate previous rule has priority <= @prio.
* 2. The immediate next rule has priority >= @prio.
*/
spin_lock_bh(&t->ftid_lock);
+
/* Don't insert if there's a rule already present at @idx. */
- if (test_bit(idx, t->ftid_bmap)) {
+ if (test_bit(idx, bmap)) {
valid = false;
goto out_unlock;
}
- next_ftid = find_next_bit(t->ftid_bmap, t->nftids, idx);
- if (next_ftid >= t->nftids)
+ next_ftid = find_next_bit(bmap, max_tid, idx);
+ if (next_ftid >= max_tid)
next_ftid = idx;
- next_fe = &adap->tids.ftid_tab[next_ftid];
+ next_fe = &tab[next_ftid];
- prev_ftid = find_last_bit(t->ftid_bmap, idx);
+ prev_ftid = find_last_bit(bmap, idx);
if (prev_ftid >= idx)
prev_ftid = idx;
@@ -558,13 +604,13 @@ bool cxgb4_filter_prio_in_range(struct net_device *dev, u32 idx, u32 prio)
* accordingly.
*/
if (CHELSIO_CHIP_VERSION(adap->params.chip) < CHELSIO_T6) {
- prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x3];
+ prev_fe = &tab[prev_ftid & ~0x3];
if (!prev_fe->fs.type)
- prev_fe = &adap->tids.ftid_tab[prev_ftid];
+ prev_fe = &tab[prev_ftid];
} else {
- prev_fe = &adap->tids.ftid_tab[prev_ftid & ~0x1];
+ prev_fe = &tab[prev_ftid & ~0x1];
if (!prev_fe->fs.type)
- prev_fe = &adap->tids.ftid_tab[prev_ftid];
+ prev_fe = &tab[prev_ftid];
}
if ((prev_fe->valid && prio < prev_fe->fs.tc_prio) ||
@@ -579,11 +625,16 @@ out_unlock:
/* Delete the filter at a specified index. */
static int del_filter_wr(struct adapter *adapter, int fidx)
{
- struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
struct fw_filter_wr *fwr;
+ struct filter_entry *f;
struct sk_buff *skb;
unsigned int len;
+ if (fidx < adapter->tids.nhpftids)
+ f = &adapter->tids.hpftid_tab[fidx];
+ else
+ f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids];
+
len = sizeof(*fwr);
skb = alloc_skb(len, GFP_KERNEL);
@@ -609,10 +660,15 @@ static int del_filter_wr(struct adapter *adapter, int fidx)
*/
int set_filter_wr(struct adapter *adapter, int fidx)
{
- struct filter_entry *f = &adapter->tids.ftid_tab[fidx];
struct fw_filter2_wr *fwr;
+ struct filter_entry *f;
struct sk_buff *skb;
+ if (fidx < adapter->tids.nhpftids)
+ f = &adapter->tids.hpftid_tab[fidx];
+ else
+ f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids];
+
skb = alloc_skb(sizeof(*fwr), GFP_KERNEL);
if (!skb)
return -ENOMEM;
@@ -762,10 +818,14 @@ int delete_filter(struct adapter *adapter, unsigned int fidx)
struct filter_entry *f;
int ret;
- if (fidx >= adapter->tids.nftids + adapter->tids.nsftids)
+ if (fidx >= adapter->tids.nftids + adapter->tids.nsftids +
+ adapter->tids.nhpftids)
return -EINVAL;
- f = &adapter->tids.ftid_tab[fidx];
+ if (fidx < adapter->tids.nhpftids)
+ f = &adapter->tids.hpftid_tab[fidx];
+ else
+ f = &adapter->tids.ftid_tab[fidx - adapter->tids.nhpftids];
ret = writable_filter(f);
if (ret)
return ret;
@@ -811,12 +871,22 @@ void clear_all_filters(struct adapter *adapter)
struct net_device *dev = adapter->port[0];
unsigned int i;
+ if (adapter->tids.hpftid_tab) {
+ struct filter_entry *f = &adapter->tids.hpftid_tab[0];
+
+ for (i = 0; i < adapter->tids.nhpftids; i++, f++)
+ if (f->valid || f->pending)
+ cxgb4_del_filter(dev, i, &f->fs);
+ }
+
if (adapter->tids.ftid_tab) {
struct filter_entry *f = &adapter->tids.ftid_tab[0];
unsigned int max_ftid = adapter->tids.nftids +
- adapter->tids.nsftids;
+ adapter->tids.nsftids +
+ adapter->tids.nhpftids;
+
/* Clear all TCAM filters */
- for (i = 0; i < max_ftid; i++, f++)
+ for (i = adapter->tids.nhpftids; i < max_ftid; i++, f++)
if (f->valid || f->pending)
cxgb4_del_filter(dev, i, &f->fs);
}
@@ -1319,17 +1389,17 @@ out_err:
* filter specification in order to facilitate signaling completion of the
* operation.
*/
-int __cxgb4_set_filter(struct net_device *dev, int filter_id,
+int __cxgb4_set_filter(struct net_device *dev, int ftid,
struct ch_filter_specification *fs,
struct filter_ctx *ctx)
{
struct adapter *adapter = netdev2adap(dev);
- unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
- unsigned int max_fidx, fidx;
- struct filter_entry *f;
+ unsigned int max_fidx, fidx, chip_ver;
+ int iq, ret, filter_id = ftid;
+ struct filter_entry *f, *tab;
u32 iconf;
- int iq, ret;
+ chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
if (fs->hash) {
if (is_hashfilter(adapter))
return cxgb4_set_hash_filter(dev, fs, ctx);
@@ -1338,7 +1408,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
return -EINVAL;
}
- max_fidx = adapter->tids.nftids;
+ max_fidx = adapter->tids.nftids + adapter->tids.nhpftids;
if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
filter_id >= max_fidx)
return -E2BIG;
@@ -1353,6 +1423,13 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
if (iq < 0)
return iq;
+ if (fs->prio) {
+ tab = &adapter->tids.hpftid_tab[0];
+ } else {
+ tab = &adapter->tids.ftid_tab[0];
+ filter_id = ftid - adapter->tids.nhpftids;
+ }
+
/* IPv6 filters occupy four slots and must be aligned on
* four-slot boundaries. IPv4 filters only occupy a single
* slot and have no alignment requirements but writing a new
@@ -1373,9 +1450,8 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
else
fidx = filter_id & ~0x1;
- if (fidx != filter_id &&
- adapter->tids.ftid_tab[fidx].fs.type) {
- f = &adapter->tids.ftid_tab[fidx];
+ if (fidx != filter_id && tab[fidx].fs.type) {
+ f = &tab[fidx];
if (f->valid) {
dev_err(adapter->pdev_dev,
"Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n",
@@ -1399,7 +1475,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
*/
for (fidx = filter_id + 1; fidx < filter_id + 4;
fidx++) {
- f = &adapter->tids.ftid_tab[fidx];
+ f = &tab[fidx];
if (f->valid) {
dev_err(adapter->pdev_dev,
"Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n",
@@ -1415,7 +1491,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
return -EINVAL;
/* Check overlapping IPv4 filter slot */
fidx = filter_id + 1;
- f = &adapter->tids.ftid_tab[fidx];
+ f = &tab[fidx];
if (f->valid) {
pr_err("%s: IPv6 filter requires 2 indices. IPv4 filter already present at %d. Please remove IPv4 filter first.\n",
__func__, fidx);
@@ -1427,36 +1503,35 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
/* Check to make sure that provided filter index is not
* already in use by someone else
*/
- f = &adapter->tids.ftid_tab[filter_id];
+ f = &tab[filter_id];
if (f->valid)
return -EBUSY;
- fidx = filter_id + adapter->tids.ftid_base;
- ret = cxgb4_set_ftid(&adapter->tids, filter_id,
- fs->type ? PF_INET6 : PF_INET,
- chip_ver);
+ if (fs->prio) {
+ fidx = filter_id + adapter->tids.hpftid_base;
+ ret = cxgb4_set_hpftid(&adapter->tids, filter_id,
+ fs->type ? PF_INET6 : PF_INET);
+ } else {
+ fidx = filter_id + adapter->tids.ftid_base;
+ ret = cxgb4_set_ftid(&adapter->tids, filter_id,
+ fs->type ? PF_INET6 : PF_INET,
+ chip_ver);
+ }
+
if (ret)
return ret;
/* Check t make sure the filter requested is writable ... */
ret = writable_filter(f);
- if (ret) {
- /* Clear the bits we have set above */
- cxgb4_clear_ftid(&adapter->tids, filter_id,
- fs->type ? PF_INET6 : PF_INET,
- chip_ver);
- return ret;
- }
+ if (ret)
+ goto free_tid;
if (is_t6(adapter->params.chip) && fs->type &&
ipv6_addr_type((const struct in6_addr *)fs->val.lip) !=
IPV6_ADDR_ANY) {
ret = cxgb4_clip_get(dev, (const u32 *)&fs->val.lip, 1);
- if (ret) {
- cxgb4_clear_ftid(&adapter->tids, filter_id, PF_INET6,
- chip_ver);
- return ret;
- }
+ if (ret)
+ goto free_tid;
}
/* Convert the filter specification into our internal format.
@@ -1487,7 +1562,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
f->fs.mask.vni,
0, 1, 1);
if (ret < 0)
- goto free_clip;
+ goto free_tid;
f->fs.val.ovlan = ret;
f->fs.mask.ovlan = 0x1ff;
@@ -1501,21 +1576,22 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
*/
f->ctx = ctx;
f->tid = fidx; /* Save the actual tid */
- ret = set_filter_wr(adapter, filter_id);
- if (ret) {
+ ret = set_filter_wr(adapter, ftid);
+ if (ret)
+ goto free_tid;
+
+ return ret;
+
+free_tid:
+ if (f->fs.prio)
+ cxgb4_clear_hpftid(&adapter->tids, filter_id,
+ fs->type ? PF_INET6 : PF_INET);
+ else
cxgb4_clear_ftid(&adapter->tids, filter_id,
fs->type ? PF_INET6 : PF_INET,
chip_ver);
- clear_filter(adapter, f);
- }
-
- return ret;
-free_clip:
- if (is_t6(adapter->params.chip) && f->fs.type)
- cxgb4_clip_release(f->dev, (const u32 *)&f->fs.val.lip, 1);
- cxgb4_clear_ftid(&adapter->tids, filter_id,
- fs->type ? PF_INET6 : PF_INET, chip_ver);
+ clear_filter(adapter, f);
return ret;
}
@@ -1537,7 +1613,7 @@ static int cxgb4_del_hash_filter(struct net_device *dev, int filter_id,
netdev_dbg(dev, "%s: filter_id = %d ; nftids = %d\n",
__func__, filter_id, adapter->tids.nftids);
- if (filter_id > adapter->tids.ntids)
+ if (tid_out_of_range(t, filter_id))
return -E2BIG;
f = lookup_tid(t, filter_id);
@@ -1590,11 +1666,11 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id,
struct filter_ctx *ctx)
{
struct adapter *adapter = netdev2adap(dev);
- unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
+ unsigned int max_fidx, chip_ver;
struct filter_entry *f;
- unsigned int max_fidx;
int ret;
+ chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
if (fs && fs->hash) {
if (is_hashfilter(adapter))
return cxgb4_del_hash_filter(dev, filter_id, ctx);
@@ -1603,21 +1679,31 @@ int __cxgb4_del_filter(struct net_device *dev, int filter_id,
return -EINVAL;
}
- max_fidx = adapter->tids.nftids;
+ max_fidx = adapter->tids.nftids + adapter->tids.nhpftids;
if (filter_id != (max_fidx + adapter->tids.nsftids - 1) &&
filter_id >= max_fidx)
return -E2BIG;
- f = &adapter->tids.ftid_tab[filter_id];
+ if (filter_id < adapter->tids.nhpftids)
+ f = &adapter->tids.hpftid_tab[filter_id];
+ else
+ f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids];
+
ret = writable_filter(f);
if (ret)
return ret;
if (f->valid) {
f->ctx = ctx;
- cxgb4_clear_ftid(&adapter->tids, filter_id,
- f->fs.type ? PF_INET6 : PF_INET,
- chip_ver);
+ if (f->fs.prio)
+ cxgb4_clear_hpftid(&adapter->tids,
+ f->tid - adapter->tids.hpftid_base,
+ f->fs.type ? PF_INET6 : PF_INET);
+ else
+ cxgb4_clear_ftid(&adapter->tids,
+ f->tid - adapter->tids.ftid_base,
+ f->fs.type ? PF_INET6 : PF_INET,
+ chip_ver);
return del_filter_wr(adapter, filter_id);
}
@@ -1842,11 +1928,18 @@ void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl)
max_fidx = adap->tids.nftids + adap->tids.nsftids;
/* Get the corresponding filter entry for this tid */
if (adap->tids.ftid_tab) {
- /* Check this in normal filter region */
- idx = tid - adap->tids.ftid_base;
- if (idx >= max_fidx)
- return;
- f = &adap->tids.ftid_tab[idx];
+ idx = tid - adap->tids.hpftid_base;
+ if (idx < adap->tids.nhpftids) {
+ f = &adap->tids.hpftid_tab[idx];
+ } else {
+ /* Check this in normal filter region */
+ idx = tid - adap->tids.ftid_base;
+ if (idx >= max_fidx)
+ return;
+ f = &adap->tids.ftid_tab[idx];
+ idx += adap->tids.nhpftids;
+ }
+
if (f->tid != tid)
return;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 12ff69b3ba91..1930e39f195e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -804,6 +804,26 @@ static int setup_ppod_edram(struct adapter *adap)
return 0;
}
+static void adap_config_hpfilter(struct adapter *adapter)
+{
+ u32 param, val = 0;
+ int ret;
+
+ /* Enable HP filter region. Older fw will fail this request and
+ * it is fine.
+ */
+ param = FW_PARAM_DEV(HPFILTER_REGION_SUPPORT);
+ ret = t4_set_params(adapter, adapter->mbox, adapter->pf, 0,
+ 1, &param, &val);
+
+ /* An error means FW doesn't know about HP filter support,
+ * it's not a problem, don't return an error.
+ */
+ if (ret < 0)
+ dev_err(adapter->pdev_dev,
+ "HP filter region isn't supported by FW\n");
+}
+
/**
* cxgb4_write_rss - write the RSS table for a given port
* @pi: the port
@@ -1427,8 +1447,8 @@ static void mk_tid_release(struct sk_buff *skb, unsigned int chan,
static void cxgb4_queue_tid_release(struct tid_info *t, unsigned int chan,
unsigned int tid)
{
- void **p = &t->tid_tab[tid];
struct adapter *adap = container_of(t, struct adapter, tids);
+ void **p = &t->tid_tab[tid - t->tid_base];
spin_lock_bh(&adap->tid_release_lock);
*p = adap->tid_release_head;
@@ -1480,13 +1500,13 @@ static void process_tid_release_list(struct work_struct *work)
void cxgb4_remove_tid(struct tid_info *t, unsigned int chan, unsigned int tid,
unsigned short family)
{
- struct sk_buff *skb;
struct adapter *adap = container_of(t, struct adapter, tids);
+ struct sk_buff *skb;
- WARN_ON(tid >= t->ntids);
+ WARN_ON(tid_out_of_range(&adap->tids, tid));
- if (t->tid_tab[tid]) {
- t->tid_tab[tid] = NULL;
+ if (t->tid_tab[tid - adap->tids.tid_base]) {
+ t->tid_tab[tid - adap->tids.tid_base] = NULL;
atomic_dec(&t->conns_in_use);
if (t->hash_base && (tid >= t->hash_base)) {
if (family == AF_INET6)
@@ -1518,6 +1538,7 @@ static int tid_init(struct tid_info *t)
struct adapter *adap = container_of(t, struct adapter, tids);
unsigned int max_ftids = t->nftids + t->nsftids;
unsigned int natids = t->natids;
+ unsigned int hpftid_bmap_size;
unsigned int eotid_bmap_size;
unsigned int stid_bmap_size;
unsigned int ftid_bmap_size;
@@ -1525,12 +1546,15 @@ static int tid_init(struct tid_info *t)
stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids);
ftid_bmap_size = BITS_TO_LONGS(t->nftids);
+ hpftid_bmap_size = BITS_TO_LONGS(t->nhpftids);
eotid_bmap_size = BITS_TO_LONGS(t->neotids);
size = t->ntids * sizeof(*t->tid_tab) +
natids * sizeof(*t->atid_tab) +
t->nstids * sizeof(*t->stid_tab) +
t->nsftids * sizeof(*t->stid_tab) +
stid_bmap_size * sizeof(long) +
+ t->nhpftids * sizeof(*t->hpftid_tab) +
+ hpftid_bmap_size * sizeof(long) +
max_ftids * sizeof(*t->ftid_tab) +
ftid_bmap_size * sizeof(long) +
t->neotids * sizeof(*t->eotid_tab) +
@@ -1543,7 +1567,9 @@ static int tid_init(struct tid_info *t)
t->atid_tab = (union aopen_entry *)&t->tid_tab[t->ntids];
t->stid_tab = (struct serv_entry *)&t->atid_tab[natids];
t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids];
- t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+ t->hpftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size];
+ t->hpftid_bmap = (unsigned long *)&t->hpftid_tab[t->nhpftids];
+ t->ftid_tab = (struct filter_entry *)&t->hpftid_bmap[hpftid_bmap_size];
t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids];
t->eotid_tab = (struct eotid_entry *)&t->ftid_bmap[ftid_bmap_size];
t->eotid_bmap = (unsigned long *)&t->eotid_tab[t->neotids];
@@ -1578,6 +1604,8 @@ static int tid_init(struct tid_info *t)
bitmap_zero(t->eotid_bmap, t->neotids);
}
+ if (t->nhpftids)
+ bitmap_zero(t->hpftid_bmap, t->nhpftids);
bitmap_zero(t->ftid_bmap, t->nftids);
return 0;
}
@@ -4351,6 +4379,7 @@ static int adap_init0_config(struct adapter *adapter, int reset)
"HMA configuration failed with error %d\n", ret);
if (is_t6(adapter->params.chip)) {
+ adap_config_hpfilter(adapter);
ret = setup_ppod_edram(adapter);
if (!ret)
dev_info(adapter->pdev_dev, "Successfully enabled "
@@ -4660,16 +4689,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
/*
* Grab some of our basic fundamental operating parameters.
*/
-#define FW_PARAM_DEV(param) \
- (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) | \
- FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_##param))
-
-#define FW_PARAM_PFVF(param) \
- FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) | \
- FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_##param)| \
- FW_PARAMS_PARAM_Y_V(0) | \
- FW_PARAMS_PARAM_Z_V(0)
-
params[0] = FW_PARAM_PFVF(EQ_START);
params[1] = FW_PARAM_PFVF(L2T_START);
params[2] = FW_PARAM_PFVF(L2T_END);
@@ -4687,6 +4706,16 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
adap->sge.ingr_start = val[5];
if (CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) {
+ params[0] = FW_PARAM_PFVF(HPFILTER_START);
+ params[1] = FW_PARAM_PFVF(HPFILTER_END);
+ ret = t4_query_params(adap, adap->mbox, adap->pf, 0, 2,
+ params, val);
+ if (ret < 0)
+ goto bye;
+
+ adap->tids.hpftid_base = val[0];
+ adap->tids.nhpftids = val[1] - val[0] + 1;
+
/* Read the raw mps entries. In T6, the last 2 tcam entries
* are reserved for raw mac addresses (rawf = 2, one per port).
*/
@@ -4698,6 +4727,9 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
adap->rawf_start = val[0];
adap->rawf_cnt = val[1] - val[0] + 1;
}
+
+ adap->tids.tid_base =
+ t4_read_reg(adap, LE_DB_ACTIVE_TABLE_START_INDEX_A);
}
/* qids (ingress/egress) returned from firmware can be anywhere
@@ -5050,8 +5082,6 @@ static int adap_init0(struct adapter *adap, int vpd_skip)
}
adap->params.crypto = ntohs(caps_cmd.cryptocaps);
}
-#undef FW_PARAM_PFVF
-#undef FW_PARAM_DEV
/* The MTU/MSS Table is initialized by now, so load their values. If
* we're initializing the adapter, then we'll make any modifications
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
index 0fa80bef575d..bb5513bdd293 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_flower.c
@@ -672,10 +672,14 @@ int cxgb4_tc_flower_replace(struct net_device *dev,
* 0 to driver. However, the hardware TCAM index
* starts from 0. Hence, the -1 here.
*/
- if (cls->common.prio <= adap->tids.nftids)
+ if (cls->common.prio <= (adap->tids.nftids +
+ adap->tids.nhpftids)) {
fidx = cls->common.prio - 1;
- else
+ if (fidx < adap->tids.nhpftids)
+ fs->prio = 1;
+ } else {
fidx = cxgb4_get_free_ftid(dev, inet_family);
+ }
/* Only insert FLOWER rule if its priority doesn't
* conflict with existing rules in the LETCAM.
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
index 102b370fbd3e..24c3c2dc7171 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_matchall.c
@@ -137,7 +137,7 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev,
* -1 here. 1 slot is enough to create a wildcard matchall
* VIID rule.
*/
- if (cls->common.prio <= adap->tids.nftids)
+ if (cls->common.prio <= (adap->tids.nftids + adap->tids.nhpftids))
fidx = cls->common.prio - 1;
else
fidx = cxgb4_get_free_ftid(dev, PF_INET);
@@ -156,6 +156,8 @@ static int cxgb4_matchall_alloc_filter(struct net_device *dev,
fs = &tc_port_matchall->ingress.fs;
memset(fs, 0, sizeof(*fs));
+ if (fidx < adap->tids.nhpftids)
+ fs->prio = 1;
fs->tc_prio = cls->common.prio;
fs->tc_cookie = cls->cookie;
fs->hitcnts = 1;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
index 477973d2e341..8971dddcdb7a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -145,6 +145,10 @@ static int cxgb4_mqprio_alloc_hw_resources(struct net_device *dev)
kfree(adap->sge.eohw_rxq);
return -ENOMEM;
}
+
+ refcount_set(&adap->tc_mqprio->refcnt, 1);
+ } else {
+ refcount_inc(&adap->tc_mqprio->refcnt);
}
if (!(adap->flags & CXGB4_USING_MSIX))
@@ -205,7 +209,6 @@ static int cxgb4_mqprio_alloc_hw_resources(struct net_device *dev)
cxgb4_enable_rx(adap, &eorxq->rspq);
}
- refcount_inc(&adap->tc_mqprio->refcnt);
return 0;
out_free_msix:
@@ -234,9 +237,10 @@ out_free_queues:
t4_sge_free_ethofld_txq(adap, eotxq);
}
- kfree(adap->sge.eohw_txq);
- kfree(adap->sge.eohw_rxq);
-
+ if (refcount_dec_and_test(&adap->tc_mqprio->refcnt)) {
+ kfree(adap->sge.eohw_txq);
+ kfree(adap->sge.eohw_rxq);
+ }
return ret;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 133f8623ba86..269b8d9e25e0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -176,7 +176,7 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
/* Only insert U32 rule if its priority doesn't conflict with
* existing rules in the LETCAM.
*/
- if (filter_id >= adapter->tids.nftids ||
+ if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids ||
!cxgb4_filter_prio_in_range(dev, filter_id, cls->common.prio)) {
NL_SET_ERR_MSG_MOD(extack,
"No free LETCAM index available");
@@ -199,6 +199,8 @@ int cxgb4_config_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
memset(&fs, 0, sizeof(fs));
+ if (filter_id < adapter->tids.nhpftids)
+ fs.prio = 1;
fs.tc_prio = cls->common.prio;
fs.tc_cookie = cls->knode.handle;
@@ -355,6 +357,7 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
unsigned int filter_id, max_tids, i, j;
struct cxgb4_link *link = NULL;
struct cxgb4_tc_u32_table *t;
+ struct filter_entry *f;
u32 handle, uhtid;
int ret;
@@ -363,8 +366,15 @@ int cxgb4_delete_knode(struct net_device *dev, struct tc_cls_u32_offload *cls)
/* Fetch the location to delete the filter. */
filter_id = TC_U32_NODE(cls->knode.handle) - 1;
- if (filter_id >= adapter->tids.nftids ||
- cls->knode.handle != adapter->tids.ftid_tab[filter_id].fs.tc_cookie)
+ if (filter_id >= adapter->tids.nftids + adapter->tids.nhpftids)
+ return -ERANGE;
+
+ if (filter_id < adapter->tids.nhpftids)
+ f = &adapter->tids.hpftid_tab[filter_id];
+ else
+ f = &adapter->tids.ftid_tab[filter_id - adapter->tids.nhpftids];
+
+ if (cls->knode.handle != f->fs.tc_cookie)
return -ERANGE;
t = adapter->tc_u32;
@@ -445,7 +455,7 @@ void cxgb4_cleanup_tc_u32(struct adapter *adap)
struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap)
{
- unsigned int max_tids = adap->tids.nftids;
+ unsigned int max_tids = adap->tids.nftids + adap->tids.nhpftids;
struct cxgb4_tc_u32_table *t;
unsigned int i;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 861b25d28ed6..d9d27bc1ae67 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -99,6 +99,7 @@ struct eotid_entry {
*/
struct tid_info {
void **tid_tab;
+ unsigned int tid_base;
unsigned int ntids;
struct serv_entry *stid_tab;
@@ -111,6 +112,11 @@ struct tid_info {
unsigned int natids;
unsigned int atid_base;
+ struct filter_entry *hpftid_tab;
+ unsigned long *hpftid_bmap;
+ unsigned int nhpftids;
+ unsigned int hpftid_base;
+
struct filter_entry *ftid_tab;
unsigned long *ftid_bmap;
unsigned int nftids;
@@ -147,9 +153,15 @@ struct tid_info {
static inline void *lookup_tid(const struct tid_info *t, unsigned int tid)
{
+ tid -= t->tid_base;
return tid < t->ntids ? t->tid_tab[tid] : NULL;
}
+static inline bool tid_out_of_range(const struct tid_info *t, unsigned int tid)
+{
+ return ((tid - t->tid_base) >= t->ntids);
+}
+
static inline void *lookup_atid(const struct tid_info *t, unsigned int atid)
{
return atid < t->natids ? t->atid_tab[atid].data : NULL;
@@ -171,7 +183,7 @@ static inline void *lookup_stid(const struct tid_info *t, unsigned int stid)
static inline void cxgb4_insert_tid(struct tid_info *t, void *data,
unsigned int tid, unsigned short family)
{
- t->tid_tab[tid] = data;
+ t->tid_tab[tid - t->tid_base] = data;
if (t->hash_base && (tid >= t->hash_base)) {
if (family == AF_INET6)
atomic_add(2, &t->hash_tids_in_use);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 19d18acfc9a6..844fdcf55118 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -4089,7 +4089,8 @@ static inline fw_port_cap32_t cc_to_fwcap_pause(enum cc_pause cc_pause)
if (cc_pause & PAUSE_TX)
fw_pause |= FW_PORT_CAP32_802_3_PAUSE;
else
- fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
+ fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR |
+ FW_PORT_CAP32_802_3_PAUSE;
} else if (cc_pause & PAUSE_TX) {
fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR;
}
@@ -8563,17 +8564,17 @@ static fw_port_cap32_t lstatus_to_fwcap(u32 lstatus)
void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
{
const struct fw_port_cmd *cmd = (const void *)rpl;
- int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
- struct adapter *adapter = pi->adapter;
+ fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
struct link_config *lc = &pi->link_cfg;
- int link_ok, linkdnrc;
- enum fw_port_type port_type;
+ struct adapter *adapter = pi->adapter;
+ unsigned int speed, fc, fec, adv_fc;
enum fw_port_module_type mod_type;
- unsigned int speed, fc, fec;
- fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+ int action, link_ok, linkdnrc;
+ enum fw_port_type port_type;
/* Extract the various fields from the Port Information message.
*/
+ action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
switch (action) {
case FW_PORT_ACTION_GET_PORT_INFO: {
u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
@@ -8611,6 +8612,7 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
}
fec = fwcap_to_cc_fec(acaps);
+ adv_fc = fwcap_to_cc_pause(acaps);
fc = fwcap_to_cc_pause(linkattr);
speed = fwcap_to_speed(linkattr);
@@ -8667,7 +8669,9 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
}
if (link_ok != lc->link_ok || speed != lc->speed ||
- fc != lc->fc || fec != lc->fec) { /* something changed */
+ fc != lc->fc || adv_fc != lc->advertised_fc ||
+ fec != lc->fec) {
+ /* something changed */
if (!link_ok && lc->link_ok) {
lc->link_down_rc = linkdnrc;
dev_warn_ratelimited(adapter->pdev_dev,
@@ -8677,6 +8681,7 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl)
}
lc->link_ok = link_ok;
lc->speed = speed;
+ lc->advertised_fc = adv_fc;
lc->fc = fc;
lc->fec = fec;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ac4fb43bdec6..accad1101ad1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1321,6 +1321,7 @@ enum fw_params_param_dev {
FW_PARAMS_PARAM_DEV_RDMA_WRITE_WITH_IMM = 0x21,
FW_PARAMS_PARAM_DEV_PPOD_EDRAM = 0x23,
FW_PARAMS_PARAM_DEV_RI_WRITE_CMPL_WR = 0x24,
+ FW_PARAMS_PARAM_DEV_HPFILTER_REGION_SUPPORT = 0x26,
FW_PARAMS_PARAM_DEV_OPAQUE_VIID_SMT_EXTN = 0x27,
FW_PARAMS_PARAM_DEV_HASHFILTER_WITH_OFLD = 0x28,
FW_PARAMS_PARAM_DEV_DBQ_TIMER = 0x29,
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index f6fc0875d5b0..f4d41f968afa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1690,8 +1690,8 @@ static void cxgb4vf_get_pauseparam(struct net_device *dev,
struct port_info *pi = netdev_priv(dev);
pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0;
- pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0;
- pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0;
+ pauseparam->rx_pause = (pi->link_cfg.advertised_fc & PAUSE_RX) != 0;
+ pauseparam->tx_pause = (pi->link_cfg.advertised_fc & PAUSE_TX) != 0;
}
/*
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
index ccca67cf4487..57cfd10a99ec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h
@@ -135,6 +135,7 @@ struct link_config {
enum cc_pause requested_fc; /* flow control user has requested */
enum cc_pause fc; /* actual link flow control */
+ enum cc_pause advertised_fc; /* actual advertised flow control */
enum cc_fec auto_fec; /* Forward Error Correction: */
enum cc_fec requested_fec; /* "automatic" (IEEE 802.3), */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
index 8a389d617a23..9d49ff211cc1 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c
@@ -1913,16 +1913,16 @@ static const char *t4vf_link_down_rc_str(unsigned char link_down_rc)
static void t4vf_handle_get_port_info(struct port_info *pi,
const struct fw_port_cmd *cmd)
{
- int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
- struct adapter *adapter = pi->adapter;
+ fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
struct link_config *lc = &pi->link_cfg;
- int link_ok, linkdnrc;
- enum fw_port_type port_type;
+ struct adapter *adapter = pi->adapter;
+ unsigned int speed, fc, fec, adv_fc;
enum fw_port_module_type mod_type;
- unsigned int speed, fc, fec;
- fw_port_cap32_t pcaps, acaps, lpacaps, linkattr;
+ int action, link_ok, linkdnrc;
+ enum fw_port_type port_type;
/* Extract the various fields from the Port Information message. */
+ action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16));
switch (action) {
case FW_PORT_ACTION_GET_PORT_INFO: {
u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype);
@@ -1982,6 +1982,7 @@ static void t4vf_handle_get_port_info(struct port_info *pi,
}
fec = fwcap_to_cc_fec(acaps);
+ adv_fc = fwcap_to_cc_pause(acaps);
fc = fwcap_to_cc_pause(linkattr);
speed = fwcap_to_speed(linkattr);
@@ -2012,7 +2013,9 @@ static void t4vf_handle_get_port_info(struct port_info *pi,
}
if (link_ok != lc->link_ok || speed != lc->speed ||
- fc != lc->fc || fec != lc->fec) { /* something changed */
+ fc != lc->fc || adv_fc != lc->advertised_fc ||
+ fec != lc->fec) {
+ /* something changed */
if (!link_ok && lc->link_ok) {
lc->link_down_rc = linkdnrc;
dev_warn_ratelimited(adapter->pdev_dev,
@@ -2022,6 +2025,7 @@ static void t4vf_handle_get_port_info(struct port_info *pi,
}
lc->link_ok = link_ok;
lc->speed = speed;
+ lc->advertised_fc = adv_fc;
lc->fc = fc;
lc->fec = fec;
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index c9aebcde403a..33ace3307059 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1128,7 +1128,7 @@ net_get_stats(struct net_device *dev)
return &dev->stats;
}
-static void net_timeout(struct net_device *dev)
+static void net_timeout(struct net_device *dev, unsigned int txqueue)
{
/* If we get here, some higher level has decided we are broken.
There should really be a "kick me" function call instead. */
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index acb2856936d2..bbd7b3175f09 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1095,7 +1095,7 @@ static void enic_set_rx_mode(struct net_device *netdev)
}
/* netif_tx_lock held, BHs disabled */
-static void enic_tx_timeout(struct net_device *netdev)
+static void enic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct enic *enic = netdev_priv(netdev);
schedule_work(&enic->tx_hang_reset);
diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index a8f4c69252ff..f30fa8e6ef80 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -576,6 +576,8 @@ static int gmac_setup_txqs(struct net_device *netdev)
if (port->txq_dma_base & ~DMA_Q_BASE_MASK) {
dev_warn(geth->dev, "TX queue base is not aligned\n");
+ dma_free_coherent(geth->dev, len * sizeof(*desc_ring),
+ desc_ring, port->txq_dma_base);
kfree(skb_tab);
return -ENOMEM;
}
@@ -1296,7 +1298,7 @@ out_drop:
return NETDEV_TX_OK;
}
-static void gmac_tx_timeout(struct net_device *netdev)
+static void gmac_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
netdev_err(netdev, "Tx timeout\n");
gmac_dump_dma_state(netdev);
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index cce90b5925d9..1ea3372775e6 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -964,7 +964,7 @@ dm9000_init_dm9000(struct net_device *dev)
}
/* Our watchdog timed out. Called by the networking layer */
-static void dm9000_timeout(struct net_device *dev)
+static void dm9000_timeout(struct net_device *dev, unsigned int txqueue)
{
struct board_info *db = netdev_priv(dev);
u8 reg_save;
diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index f1a2da15dd0a..fd3c2abf74b5 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -1436,7 +1436,7 @@ static int de_close (struct net_device *dev)
return 0;
}
-static void de_tx_timeout (struct net_device *dev)
+static void de_tx_timeout (struct net_device *dev, unsigned int txqueue)
{
struct de_private *de = netdev_priv(dev);
const int irq = de->pdev->irq;
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index 0efdbd1a4a6f..32d470d4122a 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -2214,15 +2214,16 @@ static int __init dmfe_init_module(void)
if (cr6set)
dmfe_cr6_user_set = cr6set;
- switch(mode) {
- case DMFE_10MHF:
+ switch (mode) {
+ case DMFE_10MHF:
case DMFE_100MHF:
case DMFE_10MFD:
case DMFE_100MFD:
case DMFE_1M_HPNA:
dmfe_media_mode = mode;
break;
- default:dmfe_media_mode = DMFE_AUTO;
+ default:
+ dmfe_media_mode = DMFE_AUTO;
break;
}
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index 3e3e08698876..9e9d9eee29d9 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -255,7 +255,7 @@ MODULE_DEVICE_TABLE(pci, tulip_pci_tbl);
const char tulip_media_cap[32] =
{0,0,0,16, 3,19,16,24, 27,4,7,5, 0,20,23,20, 28,31,0,0, };
-static void tulip_tx_timeout(struct net_device *dev);
+static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void tulip_init_ring(struct net_device *dev);
static void tulip_free_ring(struct net_device *dev);
static netdev_tx_t tulip_start_xmit(struct sk_buff *skb,
@@ -534,7 +534,7 @@ free_ring:
}
-static void tulip_tx_timeout(struct net_device *dev)
+static void tulip_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct tulip_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->base_addr;
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index b1f30b194300..117ffe08800d 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -1809,8 +1809,8 @@ static int __init uli526x_init_module(void)
if (cr6set)
uli526x_cr6_user_set = cr6set;
- switch (mode) {
- case ULI526X_10MHF:
+ switch (mode) {
+ case ULI526X_10MHF:
case ULI526X_100MHF:
case ULI526X_10MFD:
case ULI526X_100MFD:
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 70cb2d689c2c..7f136488e67c 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -331,7 +331,7 @@ static void netdev_timer(struct timer_list *t);
static void init_rxtx_rings(struct net_device *dev);
static void free_rxtx_rings(struct netdev_private *np);
static void init_registers(struct net_device *dev);
-static void tx_timeout(struct net_device *dev);
+static void tx_timeout(struct net_device *dev, unsigned int txqueue);
static int alloc_ringdesc(struct net_device *dev);
static void free_ringdesc(struct netdev_private *np);
static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
@@ -921,7 +921,7 @@ static void init_registers(struct net_device *dev)
iowrite32(0, ioaddr + RxStartDemand);
}
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct netdev_private *np = netdev_priv(dev);
void __iomem *ioaddr = np->base_addr;
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 55e720d2ea0c..26c5da032b1e 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -66,7 +66,7 @@ static const int multicast_filter_limit = 0x40;
static int rio_open (struct net_device *dev);
static void rio_timer (struct timer_list *t);
-static void rio_tx_timeout (struct net_device *dev);
+static void rio_tx_timeout (struct net_device *dev, unsigned int txqueue);
static netdev_tx_t start_xmit (struct sk_buff *skb, struct net_device *dev);
static irqreturn_t rio_interrupt (int irq, void *dev_instance);
static void rio_free_tx (struct net_device *dev, int irq);
@@ -696,7 +696,7 @@ rio_timer (struct timer_list *t)
}
static void
-rio_tx_timeout (struct net_device *dev)
+rio_tx_timeout (struct net_device *dev, unsigned int txqueue)
{
struct netdev_private *np = netdev_priv(dev);
void __iomem *ioaddr = np->ioaddr;
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 4a37a69764ce..b91387c456ba 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -432,7 +432,7 @@ static int mdio_wait_link(struct net_device *dev, int wait);
static int netdev_open(struct net_device *dev);
static void check_duplex(struct net_device *dev);
static void netdev_timer(struct timer_list *t);
-static void tx_timeout(struct net_device *dev);
+static void tx_timeout(struct net_device *dev, unsigned int txqueue);
static void init_ring(struct net_device *dev);
static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
static int reset_tx (struct net_device *dev);
@@ -969,7 +969,7 @@ static void netdev_timer(struct timer_list *t)
add_timer(&np->timer);
}
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct netdev_private *np = netdev_priv(dev);
void __iomem *ioaddr = np->base;
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index 5bb5abf99588..022a54a1805b 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -23,7 +23,7 @@ struct be_ethtool_stat {
};
enum {DRVSTAT_TX, DRVSTAT_RX, DRVSTAT};
-#define FIELDINFO(_struct, field) FIELD_SIZEOF(_struct, field), \
+#define FIELDINFO(_struct, field) sizeof_field(_struct, field), \
offsetof(_struct, field)
#define DRVSTAT_TX_INFO(field) #field, DRVSTAT_TX,\
FIELDINFO(struct be_tx_stats, field)
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 39eb7d525043..56f59db6ebf2 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1417,7 +1417,7 @@ drop:
return NETDEV_TX_OK;
}
-static void be_tx_timeout(struct net_device *netdev)
+static void be_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct be_adapter *adapter = netdev_priv(netdev);
struct device *dev = &adapter->pdev->dev;
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index ea4f17f5cce7..66406da16b60 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -869,7 +869,7 @@ static int ethoc_change_mtu(struct net_device *dev, int new_mtu)
return -ENOSYS;
}
-static void ethoc_tx_timeout(struct net_device *dev)
+static void ethoc_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ethoc *priv = netdev_priv(dev);
u32 pending = ethoc_read(priv, INT_SOURCE);
diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c
index 8ed85037f021..48b3b72fe02e 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.c
+++ b/drivers/net/ethernet/faraday/ftgmac100.c
@@ -1545,7 +1545,7 @@ static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int
return phy_mii_ioctl(netdev->phydev, ifr, cmd);
}
-static void ftgmac100_tx_timeout(struct net_device *netdev)
+static void ftgmac100_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct ftgmac100 *priv = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index c24fd56a2c71..84f10970299a 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -428,7 +428,7 @@ static void getlinktype(struct net_device *dev);
static void getlinkstatus(struct net_device *dev);
static void netdev_timer(struct timer_list *t);
static void reset_timer(struct timer_list *t);
-static void fealnx_tx_timeout(struct net_device *dev);
+static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void init_ring(struct net_device *dev);
static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t intr_handler(int irq, void *dev_instance);
@@ -1191,7 +1191,7 @@ static void reset_timer(struct timer_list *t)
}
-static void fealnx_tx_timeout(struct net_device *dev)
+static void fealnx_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct netdev_private *np = netdev_priv(dev);
void __iomem *ioaddr = np->mem;
diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index 6a93293d31e0..67c436400352 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -25,4 +25,5 @@ obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/
obj-$(CONFIG_FSL_DPAA2_ETH) += dpaa2/
obj-$(CONFIG_FSL_ENETC) += enetc/
+obj-$(CONFIG_FSL_ENETC_MDIO) += enetc/
obj-$(CONFIG_FSL_ENETC_VF) += enetc/
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 6a9d12dad5d9..09dbcd819d84 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -288,7 +288,7 @@ static int dpaa_stop(struct net_device *net_dev)
return err;
}
-static void dpaa_tx_timeout(struct net_device *net_dev)
+static void dpaa_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
{
struct dpaa_percpu_priv *percpu_priv;
const struct dpaa_priv *priv;
@@ -1719,7 +1719,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
int page_offset;
unsigned int sz;
int *count_ptr;
- int i;
+ int i, j;
vaddr = phys_to_virt(addr);
WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
@@ -1736,14 +1736,14 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr,
SMP_CACHE_BYTES));
+ dma_unmap_page(priv->rx_dma_dev, sg_addr,
+ DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
+
/* We may use multiple Rx pools */
dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
if (!dpaa_bp)
goto free_buffers;
- count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
- dma_unmap_page(priv->rx_dma_dev, sg_addr,
- DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
if (!skb) {
sz = dpaa_bp->size +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -1786,7 +1786,9 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
skb_add_rx_frag(skb, i - 1, head_page, frag_off,
frag_len, dpaa_bp->size);
}
+
/* Update the pool count for the current {cpu x bpool} */
+ count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
(*count_ptr)--;
if (qm_sg_entry_is_final(&sgt[i]))
@@ -1800,26 +1802,25 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
return skb;
free_buffers:
- /* compensate sw bpool counter changes */
- for (i--; i >= 0; i--) {
- dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
- if (dpaa_bp) {
- count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
- (*count_ptr)++;
- }
- }
/* free all the SG entries */
- for (i = 0; i < DPAA_SGT_MAX_ENTRIES ; i++) {
- sg_addr = qm_sg_addr(&sgt[i]);
+ for (j = 0; j < DPAA_SGT_MAX_ENTRIES ; j++) {
+ sg_addr = qm_sg_addr(&sgt[j]);
sg_vaddr = phys_to_virt(sg_addr);
+ /* all pages 0..i were unmaped */
+ if (j > i)
+ dma_unmap_page(priv->rx_dma_dev, qm_sg_addr(&sgt[j]),
+ DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
free_pages((unsigned long)sg_vaddr, 0);
- dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
- if (dpaa_bp) {
- count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
- (*count_ptr)--;
+ /* counters 0..i-1 were decremented */
+ if (j >= i) {
+ dpaa_bp = dpaa_bpid2pool(sgt[j].bpid);
+ if (dpaa_bp) {
+ count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+ (*count_ptr)--;
+ }
}
- if (qm_sg_entry_is_final(&sgt[i]))
+ if (qm_sg_entry_is_final(&sgt[j]))
break;
}
/* free the SGT fragment */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
index a9503aea527f..cc1b7f85e433 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ptp.c
@@ -27,6 +27,20 @@ static int dpaa2_ptp_enable(struct ptp_clock_info *ptp,
mc_dev = to_fsl_mc_device(dev);
switch (rq->type) {
+ case PTP_CLK_REQ_EXTTS:
+ switch (rq->extts.index) {
+ case 0:
+ bit = DPRTC_EVENT_ETS1;
+ break;
+ case 1:
+ bit = DPRTC_EVENT_ETS2;
+ break;
+ default:
+ return -EINVAL;
+ }
+ if (on)
+ extts_clean_up(ptp_qoriq, rq->extts.index, false);
+ break;
case PTP_CLK_REQ_PPS:
bit = DPRTC_EVENT_PPS;
break;
@@ -96,6 +110,12 @@ static irqreturn_t dpaa2_ptp_irq_handler_thread(int irq, void *priv)
ptp_clock_event(ptp_qoriq->clock, &event);
}
+ if (status & DPRTC_EVENT_ETS1)
+ extts_clean_up(ptp_qoriq, 0, true);
+
+ if (status & DPRTC_EVENT_ETS2)
+ extts_clean_up(ptp_qoriq, 1, true);
+
err = dprtc_clear_irq_status(mc_dev->mc_io, 0, mc_dev->mc_handle,
DPRTC_IRQ_INDEX, status);
if (unlikely(err)) {
@@ -160,10 +180,10 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
irq = mc_dev->irqs[0];
ptp_qoriq->irq = irq->msi_desc->irq;
- err = devm_request_threaded_irq(dev, ptp_qoriq->irq, NULL,
- dpaa2_ptp_irq_handler_thread,
- IRQF_NO_SUSPEND | IRQF_ONESHOT,
- dev_name(dev), ptp_qoriq);
+ err = request_threaded_irq(ptp_qoriq->irq, NULL,
+ dpaa2_ptp_irq_handler_thread,
+ IRQF_NO_SUSPEND | IRQF_ONESHOT,
+ dev_name(dev), ptp_qoriq);
if (err < 0) {
dev_err(dev, "devm_request_threaded_irq(): %d\n", err);
goto err_free_mc_irq;
@@ -173,18 +193,20 @@ static int dpaa2_ptp_probe(struct fsl_mc_device *mc_dev)
DPRTC_IRQ_INDEX, 1);
if (err < 0) {
dev_err(dev, "dprtc_set_irq_enable(): %d\n", err);
- goto err_free_mc_irq;
+ goto err_free_threaded_irq;
}
err = ptp_qoriq_init(ptp_qoriq, base, &dpaa2_ptp_caps);
if (err)
- goto err_free_mc_irq;
+ goto err_free_threaded_irq;
dpaa2_phc_index = ptp_qoriq->phc_index;
dev_set_drvdata(dev, ptp_qoriq);
return 0;
+err_free_threaded_irq:
+ free_irq(ptp_qoriq->irq, ptp_qoriq);
err_free_mc_irq:
fsl_mc_free_irqs(mc_dev);
err_unmap:
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
index 4ac05bfef338..96ffeb948f08 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc-cmd.h
@@ -9,9 +9,11 @@
/* Command versioning */
#define DPRTC_CMD_BASE_VERSION 1
+#define DPRTC_CMD_VERSION_2 2
#define DPRTC_CMD_ID_OFFSET 4
#define DPRTC_CMD(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_BASE_VERSION)
+#define DPRTC_CMD_V2(id) (((id) << DPRTC_CMD_ID_OFFSET) | DPRTC_CMD_VERSION_2)
/* Command IDs */
#define DPRTC_CMDID_CLOSE DPRTC_CMD(0x800)
@@ -19,7 +21,7 @@
#define DPRTC_CMDID_SET_IRQ_ENABLE DPRTC_CMD(0x012)
#define DPRTC_CMDID_GET_IRQ_ENABLE DPRTC_CMD(0x013)
-#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD(0x014)
+#define DPRTC_CMDID_SET_IRQ_MASK DPRTC_CMD_V2(0x014)
#define DPRTC_CMDID_GET_IRQ_MASK DPRTC_CMD(0x015)
#define DPRTC_CMDID_GET_IRQ_STATUS DPRTC_CMD(0x016)
#define DPRTC_CMDID_CLEAR_IRQ_STATUS DPRTC_CMD(0x017)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dprtc.h b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
index 311c184e1aef..05c413719e55 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dprtc.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dprtc.h
@@ -20,6 +20,8 @@ struct fsl_mc_io;
#define DPRTC_IRQ_INDEX 0
#define DPRTC_EVENT_PPS 0x08000000
+#define DPRTC_EVENT_ETS1 0x00800000
+#define DPRTC_EVENT_ETS2 0x00400000
int dprtc_open(struct fsl_mc_io *mc_io,
u32 cmd_flags,
diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig
index edad4ca46327..fe942de19597 100644
--- a/drivers/net/ethernet/freescale/enetc/Kconfig
+++ b/drivers/net/ethernet/freescale/enetc/Kconfig
@@ -2,6 +2,7 @@
config FSL_ENETC
tristate "ENETC PF driver"
depends on PCI && PCI_MSI && (ARCH_LAYERSCAPE || COMPILE_TEST)
+ select FSL_ENETC_MDIO
select PHYLIB
help
This driver supports NXP ENETC gigabit ethernet controller PCIe
diff --git a/drivers/net/ethernet/freescale/enetc/Makefile b/drivers/net/ethernet/freescale/enetc/Makefile
index d0db33e5b6b7..74f7ac253b8b 100644
--- a/drivers/net/ethernet/freescale/enetc/Makefile
+++ b/drivers/net/ethernet/freescale/enetc/Makefile
@@ -3,7 +3,7 @@
common-objs := enetc.o enetc_cbdr.o enetc_ethtool.o
obj-$(CONFIG_FSL_ENETC) += fsl-enetc.o
-fsl-enetc-y := enetc_pf.o enetc_mdio.o $(common-objs)
+fsl-enetc-y := enetc_pf.o $(common-objs)
fsl-enetc-$(CONFIG_PCI_IOV) += enetc_msg.o
fsl-enetc-$(CONFIG_FSL_ENETC_QOS) += enetc_qos.o
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 17739906c966..1f79e36116a3 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -149,11 +149,21 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
if (enetc_tx_csum(skb, &temp_bd))
flags |= ENETC_TXBD_FLAGS_CSUM | ENETC_TXBD_FLAGS_L4CS;
+ else if (tx_ring->tsd_enable)
+ flags |= ENETC_TXBD_FLAGS_TSE | ENETC_TXBD_FLAGS_TXSTART;
/* first BD needs frm_len and offload flags set */
temp_bd.frm_len = cpu_to_le16(skb->len);
temp_bd.flags = flags;
+ if (flags & ENETC_TXBD_FLAGS_TSE) {
+ u32 temp;
+
+ temp = (skb->skb_mstamp_ns >> 5 & ENETC_TXBD_TXSTART_MASK)
+ | (flags << ENETC_TXBD_FLAGS_OFFSET);
+ temp_bd.txstart = cpu_to_le32(temp);
+ }
+
if (flags & ENETC_TXBD_FLAGS_EX) {
u8 e_flags = 0;
*txbd = temp_bd;
@@ -227,6 +237,8 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb,
enetc_bdr_idx_inc(tx_ring, &i);
tx_ring->next_to_use = i;
+ skb_tx_timestamp(skb);
+
/* let H/W know BD ring has been updated */
enetc_wr_reg(tx_ring->tpir, i); /* includes wmb() */
@@ -1503,6 +1515,8 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
return enetc_setup_tc_taprio(ndev, type_data);
case TC_SETUP_QDISC_CBS:
return enetc_setup_tc_cbs(ndev, type_data);
+ case TC_SETUP_QDISC_ETF:
+ return enetc_setup_tc_txtime(ndev, type_data);
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 7ee0da6d0015..dd4a227ffc7a 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -72,6 +72,7 @@ struct enetc_bdr {
struct enetc_ring_stats stats;
dma_addr_t bd_dma_base;
+ u8 tsd_enable; /* Time specific departure */
} ____cacheline_aligned_in_smp;
static inline void enetc_bdr_idx_inc(struct enetc_bdr *bdr, int *i)
@@ -256,8 +257,10 @@ int enetc_send_cmd(struct enetc_si *si, struct enetc_cbd *cbd);
int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data);
void enetc_sched_speed_set(struct net_device *ndev);
int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data);
+int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data);
#else
#define enetc_setup_tc_taprio(ndev, type_data) -EOPNOTSUPP
#define enetc_sched_speed_set(ndev) (void)0
#define enetc_setup_tc_cbs(ndev, type_data) -EOPNOTSUPP
+#define enetc_setup_tc_txtime(ndev, type_data) -EOPNOTSUPP
#endif
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
index 880a8ed8bb47..301ee0dde02d 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c
@@ -579,6 +579,7 @@ static int enetc_get_ts_info(struct net_device *ndev,
(1 << HWTSTAMP_FILTER_ALL);
#else
info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
SOF_TIMESTAMPING_SOFTWARE;
#endif
return 0;
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 51f543ef37a8..62554f28ce07 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -200,6 +200,7 @@ enum enetc_bdr_type {TX, RX};
#define ENETC_PFPMR 0x1900
#define ENETC_PFPMR_PMACE BIT(1)
#define ENETC_PFPMR_MWLM BIT(0)
+#define ENETC_EMDIO_BASE 0x1c00
#define ENETC_PSIUMHFR0(n, err) (((err) ? 0x1d08 : 0x1d00) + (n) * 0x10)
#define ENETC_PSIUMHFR1(n) (0x1d04 + (n) * 0x10)
#define ENETC_PSIMMHFR0(n, err) (((err) ? 0x1d00 : 0x1d08) + (n) * 0x10)
@@ -358,6 +359,7 @@ union enetc_tx_bd {
u8 l4_csoff;
u8 flags;
}; /* default layout */
+ __le32 txstart;
__le32 lstatus;
};
};
@@ -378,11 +380,14 @@ union enetc_tx_bd {
};
#define ENETC_TXBD_FLAGS_L4CS BIT(0)
+#define ENETC_TXBD_FLAGS_TSE BIT(1)
#define ENETC_TXBD_FLAGS_W BIT(2)
#define ENETC_TXBD_FLAGS_CSUM BIT(3)
+#define ENETC_TXBD_FLAGS_TXSTART BIT(4)
#define ENETC_TXBD_FLAGS_EX BIT(6)
#define ENETC_TXBD_FLAGS_F BIT(7)
-
+#define ENETC_TXBD_TXSTART_MASK GENMASK(24, 0)
+#define ENETC_TXBD_FLAGS_OFFSET 24
static inline void enetc_clear_tx_bd(union enetc_tx_bd *txbd)
{
memset(txbd, 0, sizeof(*txbd));
@@ -615,3 +620,7 @@ struct enetc_cbd {
/* Port time gating capability register */
#define ENETC_QBV_PTGCAPR_OFFSET 0x11a08
#define ENETC_QBV_MAX_GCL_LEN_MASK GENMASK(15, 0)
+
+/* Port time specific departure */
+#define ENETC_PTCTSDR(n) (0x1210 + 4 * (n))
+#define ENETC_TSDE BIT(31)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
index 149883c8f0b8..48c32a171afa 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_mdio.c
@@ -1,41 +1,56 @@
// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
/* Copyright 2019 NXP */
+#include <linux/fsl/enetc_mdio.h>
#include <linux/mdio.h>
#include <linux/of_mdio.h>
#include <linux/iopoll.h>
#include <linux/of.h>
-#include "enetc_mdio.h"
+#include "enetc_pf.h"
-#define ENETC_MDIO_REG_OFFSET 0x1c00
#define ENETC_MDIO_CFG 0x0 /* MDIO configuration and status */
#define ENETC_MDIO_CTL 0x4 /* MDIO control */
#define ENETC_MDIO_DATA 0x8 /* MDIO data */
#define ENETC_MDIO_ADDR 0xc /* MDIO address */
-#define enetc_mdio_rd(hw, off) \
- enetc_port_rd(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET)
-#define enetc_mdio_wr(hw, off, val) \
- enetc_port_wr(hw, ENETC_##off + ENETC_MDIO_REG_OFFSET, val)
-#define enetc_mdio_rd_reg(off) enetc_mdio_rd(hw, off)
+static inline u32 _enetc_mdio_rd(struct enetc_mdio_priv *mdio_priv, int off)
+{
+ return enetc_port_rd(mdio_priv->hw, mdio_priv->mdio_base + off);
+}
+
+static inline void _enetc_mdio_wr(struct enetc_mdio_priv *mdio_priv, int off,
+ u32 val)
+{
+ enetc_port_wr(mdio_priv->hw, mdio_priv->mdio_base + off, val);
+}
-#define ENETC_MDC_DIV 258
+#define enetc_mdio_rd(mdio_priv, off) \
+ _enetc_mdio_rd(mdio_priv, ENETC_##off)
+#define enetc_mdio_wr(mdio_priv, off, val) \
+ _enetc_mdio_wr(mdio_priv, ENETC_##off, val)
+#define enetc_mdio_rd_reg(off) enetc_mdio_rd(mdio_priv, off)
#define MDIO_CFG_CLKDIV(x) ((((x) >> 1) & 0xff) << 8)
#define MDIO_CFG_BSY BIT(0)
#define MDIO_CFG_RD_ER BIT(1)
+#define MDIO_CFG_HOLD(x) (((x) << 2) & GENMASK(4, 2))
#define MDIO_CFG_ENC45 BIT(6)
/* external MDIO only - driven on neg MDC edge */
#define MDIO_CFG_NEG BIT(23)
+#define ENETC_EMDIO_CFG \
+ (MDIO_CFG_HOLD(2) | \
+ MDIO_CFG_CLKDIV(258) | \
+ MDIO_CFG_NEG)
+
#define MDIO_CTL_DEV_ADDR(x) ((x) & 0x1f)
#define MDIO_CTL_PORT_ADDR(x) (((x) & 0x1f) << 5)
#define MDIO_CTL_READ BIT(15)
#define MDIO_DATA(x) ((x) & 0xffff)
#define TIMEOUT 1000
-static int enetc_mdio_wait_complete(struct enetc_hw *hw)
+static int enetc_mdio_wait_complete(struct enetc_mdio_priv *mdio_priv)
{
u32 val;
@@ -46,12 +61,11 @@ static int enetc_mdio_wait_complete(struct enetc_hw *hw)
int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
{
struct enetc_mdio_priv *mdio_priv = bus->priv;
- struct enetc_hw *hw = mdio_priv->hw;
u32 mdio_ctl, mdio_cfg;
u16 dev_addr;
int ret;
- mdio_cfg = MDIO_CFG_CLKDIV(ENETC_MDC_DIV) | MDIO_CFG_NEG;
+ mdio_cfg = ENETC_EMDIO_CFG;
if (regnum & MII_ADDR_C45) {
dev_addr = (regnum >> 16) & 0x1f;
mdio_cfg |= MDIO_CFG_ENC45;
@@ -61,44 +75,44 @@ int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value)
mdio_cfg &= ~MDIO_CFG_ENC45;
}
- enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
+ enetc_mdio_wr(mdio_priv, MDIO_CFG, mdio_cfg);
- ret = enetc_mdio_wait_complete(hw);
+ ret = enetc_mdio_wait_complete(mdio_priv);
if (ret)
return ret;
/* set port and dev addr */
mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
- enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
+ enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl);
/* set the register address */
if (regnum & MII_ADDR_C45) {
- enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
+ enetc_mdio_wr(mdio_priv, MDIO_ADDR, regnum & 0xffff);
- ret = enetc_mdio_wait_complete(hw);
+ ret = enetc_mdio_wait_complete(mdio_priv);
if (ret)
return ret;
}
/* write the value */
- enetc_mdio_wr(hw, MDIO_DATA, MDIO_DATA(value));
+ enetc_mdio_wr(mdio_priv, MDIO_DATA, MDIO_DATA(value));
- ret = enetc_mdio_wait_complete(hw);
+ ret = enetc_mdio_wait_complete(mdio_priv);
if (ret)
return ret;
return 0;
}
+EXPORT_SYMBOL_GPL(enetc_mdio_write);
int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
{
struct enetc_mdio_priv *mdio_priv = bus->priv;
- struct enetc_hw *hw = mdio_priv->hw;
u32 mdio_ctl, mdio_cfg;
u16 dev_addr, value;
int ret;
- mdio_cfg = MDIO_CFG_CLKDIV(ENETC_MDC_DIV) | MDIO_CFG_NEG;
+ mdio_cfg = ENETC_EMDIO_CFG;
if (regnum & MII_ADDR_C45) {
dev_addr = (regnum >> 16) & 0x1f;
mdio_cfg |= MDIO_CFG_ENC45;
@@ -107,86 +121,56 @@ int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
mdio_cfg &= ~MDIO_CFG_ENC45;
}
- enetc_mdio_wr(hw, MDIO_CFG, mdio_cfg);
+ enetc_mdio_wr(mdio_priv, MDIO_CFG, mdio_cfg);
- ret = enetc_mdio_wait_complete(hw);
+ ret = enetc_mdio_wait_complete(mdio_priv);
if (ret)
return ret;
/* set port and device addr */
mdio_ctl = MDIO_CTL_PORT_ADDR(phy_id) | MDIO_CTL_DEV_ADDR(dev_addr);
- enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl);
+ enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl);
/* set the register address */
if (regnum & MII_ADDR_C45) {
- enetc_mdio_wr(hw, MDIO_ADDR, regnum & 0xffff);
+ enetc_mdio_wr(mdio_priv, MDIO_ADDR, regnum & 0xffff);
- ret = enetc_mdio_wait_complete(hw);
+ ret = enetc_mdio_wait_complete(mdio_priv);
if (ret)
return ret;
}
/* initiate the read */
- enetc_mdio_wr(hw, MDIO_CTL, mdio_ctl | MDIO_CTL_READ);
+ enetc_mdio_wr(mdio_priv, MDIO_CTL, mdio_ctl | MDIO_CTL_READ);
- ret = enetc_mdio_wait_complete(hw);
+ ret = enetc_mdio_wait_complete(mdio_priv);
if (ret)
return ret;
/* return all Fs if nothing was there */
- if (enetc_mdio_rd(hw, MDIO_CFG) & MDIO_CFG_RD_ER) {
+ if (enetc_mdio_rd(mdio_priv, MDIO_CFG) & MDIO_CFG_RD_ER) {
dev_dbg(&bus->dev,
"Error while reading PHY%d reg at %d.%hhu\n",
phy_id, dev_addr, regnum);
return 0xffff;
}
- value = enetc_mdio_rd(hw, MDIO_DATA) & 0xffff;
+ value = enetc_mdio_rd(mdio_priv, MDIO_DATA) & 0xffff;
return value;
}
+EXPORT_SYMBOL_GPL(enetc_mdio_read);
-int enetc_mdio_probe(struct enetc_pf *pf)
+struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs)
{
- struct device *dev = &pf->si->pdev->dev;
- struct enetc_mdio_priv *mdio_priv;
- struct device_node *np;
- struct mii_bus *bus;
- int err;
-
- bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
- if (!bus)
- return -ENOMEM;
-
- bus->name = "Freescale ENETC MDIO Bus";
- bus->read = enetc_mdio_read;
- bus->write = enetc_mdio_write;
- bus->parent = dev;
- mdio_priv = bus->priv;
- mdio_priv->hw = &pf->si->hw;
- snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
-
- np = of_get_child_by_name(dev->of_node, "mdio");
- if (!np) {
- dev_err(dev, "MDIO node missing\n");
- return -EINVAL;
- }
-
- err = of_mdiobus_register(bus, np);
- if (err) {
- of_node_put(np);
- dev_err(dev, "cannot register MDIO bus\n");
- return err;
- }
+ struct enetc_hw *hw;
- of_node_put(np);
- pf->mdio = bus;
+ hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
+ if (!hw)
+ return ERR_PTR(-ENOMEM);
- return 0;
-}
+ hw->port = port_regs;
-void enetc_mdio_remove(struct enetc_pf *pf)
-{
- if (pf->mdio)
- mdiobus_unregister(pf->mdio);
+ return hw;
}
+EXPORT_SYMBOL_GPL(enetc_hw_alloc);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h b/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
deleted file mode 100644
index 60c9a3889824..000000000000
--- a/drivers/net/ethernet/freescale/enetc/enetc_mdio.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/* Copyright 2019 NXP */
-
-#include <linux/phy.h>
-#include "enetc_pf.h"
-
-struct enetc_mdio_priv {
- struct enetc_hw *hw;
-};
-
-int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value);
-int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
index fbd41ce01f06..ebc635f8a4cc 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pci_mdio.c
@@ -1,7 +1,8 @@
// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
/* Copyright 2019 NXP */
+#include <linux/fsl/enetc_mdio.h>
#include <linux/of_mdio.h>
-#include "enetc_mdio.h"
+#include "enetc_pf.h"
#define ENETC_MDIO_DEV_ID 0xee01
#define ENETC_MDIO_DEV_NAME "FSL PCIe IE Central MDIO"
@@ -13,17 +14,29 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
{
struct enetc_mdio_priv *mdio_priv;
struct device *dev = &pdev->dev;
+ void __iomem *port_regs;
struct enetc_hw *hw;
struct mii_bus *bus;
int err;
- hw = devm_kzalloc(dev, sizeof(*hw), GFP_KERNEL);
- if (!hw)
- return -ENOMEM;
+ port_regs = pci_iomap(pdev, 0, 0);
+ if (!port_regs) {
+ dev_err(dev, "iomap failed\n");
+ err = -ENXIO;
+ goto err_ioremap;
+ }
+
+ hw = enetc_hw_alloc(dev, port_regs);
+ if (IS_ERR(hw)) {
+ err = PTR_ERR(hw);
+ goto err_hw_alloc;
+ }
bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
- if (!bus)
- return -ENOMEM;
+ if (!bus) {
+ err = -ENOMEM;
+ goto err_mdiobus_alloc;
+ }
bus->name = ENETC_MDIO_BUS_NAME;
bus->read = enetc_mdio_read;
@@ -31,13 +44,14 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
bus->parent = dev;
mdio_priv = bus->priv;
mdio_priv->hw = hw;
+ mdio_priv->mdio_base = ENETC_EMDIO_BASE;
snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
pcie_flr(pdev);
err = pci_enable_device_mem(pdev);
if (err) {
dev_err(dev, "device enable failed\n");
- return err;
+ goto err_pci_enable;
}
err = pci_request_region(pdev, 0, KBUILD_MODNAME);
@@ -46,13 +60,6 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
goto err_pci_mem_reg;
}
- hw->port = pci_iomap(pdev, 0, 0);
- if (!hw->port) {
- err = -ENXIO;
- dev_err(dev, "iomap failed\n");
- goto err_ioremap;
- }
-
err = of_mdiobus_register(bus, dev->of_node);
if (err)
goto err_mdiobus_reg;
@@ -62,12 +69,14 @@ static int enetc_pci_mdio_probe(struct pci_dev *pdev,
return 0;
err_mdiobus_reg:
- iounmap(mdio_priv->hw->port);
-err_ioremap:
pci_release_mem_regions(pdev);
err_pci_mem_reg:
pci_disable_device(pdev);
-
+err_pci_enable:
+err_mdiobus_alloc:
+ iounmap(port_regs);
+err_hw_alloc:
+err_ioremap:
return err;
}
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index e7482d483b28..fc0d7d99e9a1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -2,6 +2,7 @@
/* Copyright 2017-2019 NXP */
#include <linux/module.h>
+#include <linux/fsl/enetc_mdio.h>
#include <linux/of_mdio.h>
#include <linux/of_net.h>
#include "enetc_pf.h"
@@ -749,6 +750,52 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr);
}
+static int enetc_mdio_probe(struct enetc_pf *pf)
+{
+ struct device *dev = &pf->si->pdev->dev;
+ struct enetc_mdio_priv *mdio_priv;
+ struct device_node *np;
+ struct mii_bus *bus;
+ int err;
+
+ bus = devm_mdiobus_alloc_size(dev, sizeof(*mdio_priv));
+ if (!bus)
+ return -ENOMEM;
+
+ bus->name = "Freescale ENETC MDIO Bus";
+ bus->read = enetc_mdio_read;
+ bus->write = enetc_mdio_write;
+ bus->parent = dev;
+ mdio_priv = bus->priv;
+ mdio_priv->hw = &pf->si->hw;
+ mdio_priv->mdio_base = ENETC_EMDIO_BASE;
+ snprintf(bus->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+
+ np = of_get_child_by_name(dev->of_node, "mdio");
+ if (!np) {
+ dev_err(dev, "MDIO node missing\n");
+ return -EINVAL;
+ }
+
+ err = of_mdiobus_register(bus, np);
+ if (err) {
+ of_node_put(np);
+ dev_err(dev, "cannot register MDIO bus\n");
+ return err;
+ }
+
+ of_node_put(np);
+ pf->mdio = bus;
+
+ return 0;
+}
+
+static void enetc_mdio_remove(struct enetc_pf *pf)
+{
+ if (pf->mdio)
+ mdiobus_unregister(pf->mdio);
+}
+
static int enetc_of_get_phy(struct enetc_ndev_priv *priv)
{
struct enetc_pf *pf = enetc_si_priv(priv->si);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.h b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
index 10dd1b53bb08..59e65a6f6c3e 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.h
@@ -49,7 +49,3 @@ struct enetc_pf {
int enetc_msg_psi_init(struct enetc_pf *pf);
void enetc_msg_psi_free(struct enetc_pf *pf);
void enetc_msg_handle_rxmsg(struct enetc_pf *pf, int mbox_id, u16 *status);
-
-/* MDIO */
-int enetc_mdio_probe(struct enetc_pf *pf);
-void enetc_mdio_remove(struct enetc_pf *pf);
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 2e99438cb1bf..00382b7c5bd8 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -156,6 +156,11 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data)
int err;
int i;
+ /* TSD and Qbv are mutually exclusive in hardware */
+ for (i = 0; i < priv->num_tx_rings; i++)
+ if (priv->tx_ring[i]->tsd_enable)
+ return -EBUSY;
+
for (i = 0; i < priv->num_tx_rings; i++)
enetc_set_bdr_prio(&priv->si->hw,
priv->tx_ring[i]->index,
@@ -192,7 +197,6 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
u32 hi_credit_bit, hi_credit_reg;
u32 max_interference_size;
u32 port_frame_max_size;
- u32 tc_max_sized_frame;
u8 tc = cbs->queue;
u8 prio_top, prio_next;
int bw_sum = 0;
@@ -250,7 +254,7 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
return -EINVAL;
}
- tc_max_sized_frame = enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc));
+ enetc_port_rd(&si->hw, ENETC_PTCMSDUR(tc));
/* For top prio TC, the max_interfrence_size is maxSizedFrame.
*
@@ -298,3 +302,33 @@ int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data)
return 0;
}
+
+int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct tc_etf_qopt_offload *qopt = type_data;
+ u8 tc_nums = netdev_get_num_tc(ndev);
+ int tc;
+
+ if (!tc_nums)
+ return -EOPNOTSUPP;
+
+ tc = qopt->queue;
+
+ if (tc < 0 || tc >= priv->num_tx_rings)
+ return -EINVAL;
+
+ /* Do not support TXSTART and TX CSUM offload simutaniously */
+ if (ndev->features & NETIF_F_CSUM_MASK)
+ return -EBUSY;
+
+ /* TSD and Qbv are mutually exclusive in hardware */
+ if (enetc_rd(&priv->si->hw, ENETC_QBV_PTGCR_OFFSET) & ENETC_QBV_TGE)
+ return -EBUSY;
+
+ priv->tx_ring[tc]->tsd_enable = qopt->enable;
+ enetc_port_wr(&priv->si->hw, ENETC_PTCTSDR(tc),
+ qopt->enable ? ENETC_TSDE : 0);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 05c1899f6628..4432a59904c7 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1141,7 +1141,7 @@ fec_stop(struct net_device *ndev)
static void
-fec_timeout(struct net_device *ndev)
+fec_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct fec_enet_private *fep = netdev_priv(ndev);
@@ -2199,8 +2199,14 @@ static void fec_enet_get_regs(struct net_device *ndev,
{
struct fec_enet_private *fep = netdev_priv(ndev);
u32 __iomem *theregs = (u32 __iomem *)fep->hwp;
+ struct device *dev = &fep->pdev->dev;
u32 *buf = (u32 *)regbuf;
u32 i, off;
+ int ret;
+
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0)
+ return;
regs->version = fec_enet_register_version;
@@ -2216,6 +2222,9 @@ static void fec_enet_get_regs(struct net_device *ndev,
off >>= 2;
buf[off] = readl(&theregs[off]);
}
+
+ pm_runtime_mark_last_busy(dev);
+ pm_runtime_put_autosuspend(dev);
}
static int fec_enet_get_ts_info(struct net_device *ndev,
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index 30cdb246d020..de5278485062 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -84,7 +84,7 @@ static int debug = -1; /* the above default */
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, "debugging messages level");
-static void mpc52xx_fec_tx_timeout(struct net_device *dev)
+static void mpc52xx_fec_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mpc52xx_fec_priv *priv = netdev_priv(dev);
unsigned long flags;
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index f0806ace1ae2..55f2122c3217 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -692,7 +692,7 @@ static int mac_probe(struct platform_device *_of_dev)
mac_dev->res = __devm_request_region(dev,
fman_get_mem_region(priv->fman),
- res.start, res.end + 1 - res.start,
+ res.start, resource_size(&res),
"mac");
if (!mac_dev->res) {
dev_err(dev, "__devm_request_mem_region(mac) failed\n");
@@ -701,7 +701,7 @@ static int mac_probe(struct platform_device *_of_dev)
}
priv->vaddr = devm_ioremap(dev, mac_dev->res->start,
- mac_dev->res->end + 1 - mac_dev->res->start);
+ resource_size(mac_dev->res));
if (!priv->vaddr) {
dev_err(dev, "devm_ioremap() failed\n");
err = -EIO;
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 3981c06f082f..80903cd58468 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -641,7 +641,7 @@ static void fs_timeout_work(struct work_struct *work)
netif_wake_queue(dev);
}
-static void fs_timeout(struct net_device *dev)
+static void fs_timeout(struct net_device *dev, unsigned int txqueue)
{
struct fs_enet_private *fep = netdev_priv(dev);
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 72868a28b621..f7e5cafe89a9 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2093,7 +2093,7 @@ static void gfar_reset_task(struct work_struct *work)
reset_gfar(priv->ndev);
}
-static void gfar_timeout(struct net_device *dev)
+static void gfar_timeout(struct net_device *dev, unsigned int txqueue)
{
struct gfar_private *priv = netdev_priv(dev);
@@ -2205,13 +2205,17 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
skb_dirtytx = tx_queue->skb_dirtytx;
while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
+ bool do_tstamp;
+
+ do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
+ priv->hwts_tx_en;
frags = skb_shinfo(skb)->nr_frags;
/* When time stamping, one additional TxBD must be freed.
* Also, we need to dma_unmap_single() the TxPAL.
*/
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
+ if (unlikely(do_tstamp))
nr_txbds = frags + 2;
else
nr_txbds = frags + 1;
@@ -2225,7 +2229,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
(lstatus & BD_LENGTH_MASK))
break;
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+ if (unlikely(do_tstamp)) {
next = next_txbd(bdp, base, tx_ring_size);
buflen = be16_to_cpu(next->length) +
GMAC_FCB_LEN + GMAC_TXPAL_LEN;
@@ -2235,7 +2239,7 @@ static void gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
dma_unmap_single(priv->dev, be32_to_cpu(bdp->bufPtr),
buflen, DMA_TO_DEVICE);
- if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+ if (unlikely(do_tstamp)) {
struct skb_shared_hwtstamps shhwtstamps;
u64 *ns = (u64 *)(((uintptr_t)skb->data + 0x10) &
~0x7UL);
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index f839fa94ebdd..0d101c00286f 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3545,7 +3545,7 @@ static void ucc_geth_timeout_work(struct work_struct *work)
* ucc_geth_timeout gets called when a packet has not been
* transmitted after a set amount of time.
*/
-static void ucc_geth_timeout(struct net_device *dev)
+static void ucc_geth_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ucc_geth_private *ugeth = netdev_priv(dev);
diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index 1eca0fdb9933..a7b7a4aace79 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -93,7 +93,7 @@ static irqreturn_t fjn_interrupt(int irq, void *dev_id);
static void fjn_rx(struct net_device *dev);
static void fjn_reset(struct net_device *dev);
static void set_rx_mode(struct net_device *dev);
-static void fjn_tx_timeout(struct net_device *dev);
+static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue);
static const struct ethtool_ops netdev_ethtool_ops;
/*
@@ -774,7 +774,7 @@ static irqreturn_t fjn_interrupt(int dummy, void *dev_id)
/*====================================================================*/
-static void fjn_tx_timeout(struct net_device *dev)
+static void fjn_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct local_info *lp = netdev_priv(dev);
unsigned int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c
index 9b7a8db9860f..e032563ceefd 100644
--- a/drivers/net/ethernet/google/gve/gve_main.c
+++ b/drivers/net/ethernet/google/gve/gve_main.c
@@ -845,7 +845,7 @@ static void gve_turnup(struct gve_priv *priv)
gve_set_napi_enabled(priv);
}
-static void gve_tx_timeout(struct net_device *dev)
+static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct gve_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c
index edec61dfc868..9f52e72ff641 100644
--- a/drivers/net/ethernet/google/gve/gve_rx.c
+++ b/drivers/net/ethernet/google/gve/gve_rx.c
@@ -418,8 +418,6 @@ bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget,
rx->cnt = cnt;
rx->fill_cnt += work_done;
- /* restock desc ring slots */
- dma_wmb(); /* Ensure descs are visible before ringing doorbell */
gve_rx_write_doorbell(priv, rx);
return gve_rx_work_pending(rx);
}
diff --git a/drivers/net/ethernet/google/gve/gve_tx.c b/drivers/net/ethernet/google/gve/gve_tx.c
index f4889431f9b7..d0244feb0301 100644
--- a/drivers/net/ethernet/google/gve/gve_tx.c
+++ b/drivers/net/ethernet/google/gve/gve_tx.c
@@ -487,10 +487,6 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
* may have added descriptors without ringing the doorbell.
*/
- /* Ensure tx descs from a prior gve_tx are visible before
- * ringing doorbell.
- */
- dma_wmb();
gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
return NETDEV_TX_BUSY;
}
@@ -505,8 +501,6 @@ netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
return NETDEV_TX_OK;
- /* Ensure tx descs are visible before ringing doorbell */
- dma_wmb();
gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
return NETDEV_TX_OK;
}
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index 3e9b6d543c77..d9718b87279d 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -543,9 +543,9 @@ hip04_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
skb_tx_timestamp(skb);
hip04_set_xmit_desc(priv, phys);
- priv->tx_head = TX_NEXT(tx_head);
count++;
netdev_sent_queue(ndev, skb->len);
+ priv->tx_head = TX_NEXT(tx_head);
stats->tx_bytes += skb->len;
stats->tx_packets++;
@@ -779,7 +779,7 @@ static int hip04_mac_stop(struct net_device *ndev)
return 0;
}
-static void hip04_timeout(struct net_device *ndev)
+static void hip04_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct hip04_priv *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index 247de9105d10..4fb776920a93 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -893,7 +893,7 @@ static void hix5hd2_tx_timeout_task(struct work_struct *work)
hix5hd2_net_open(priv->netdev);
}
-static void hix5hd2_net_timeout(struct net_device *dev)
+static void hix5hd2_net_timeout(struct net_device *dev, unsigned int txqueue)
{
struct hix5hd2_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 14ab20491fd0..e45553ec114a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1485,7 +1485,7 @@ static int hns_nic_net_stop(struct net_device *ndev)
static void hns_tx_timeout_reset(struct hns_nic_priv *priv);
#define HNS_TX_TIMEO_LIMIT (40 * HZ)
-static void hns_nic_net_timeout(struct net_device *ndev)
+static void hns_nic_net_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct hns_nic_priv *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/hisilicon/hns3/Makefile b/drivers/net/ethernet/hisilicon/hns3/Makefile
index d01bf536eb86..7aa2fac76c5e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/Makefile
@@ -3,6 +3,8 @@
# Makefile for the HISILICON network device drivers.
#
+ccflags-y += -I$(srctree)/$(src)
+
obj-$(CONFIG_HNS3) += hns3pf/
obj-$(CONFIG_HNS3) += hns3vf/
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 3b5e2d7251e7..a3e4081b84ba 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -164,11 +164,7 @@ enum hnae3_reset_type {
HNAE3_IMP_RESET,
HNAE3_UNKNOWN_RESET,
HNAE3_NONE_RESET,
-};
-
-enum hnae3_flr_state {
- HNAE3_FLR_DOWN,
- HNAE3_FLR_DONE,
+ HNAE3_MAX_RESET,
};
enum hnae3_port_base_vlan_state {
@@ -575,8 +571,7 @@ struct hnae3_ae_algo {
const struct pci_device_id *pdev_id_table;
};
-#define HNAE3_INT_NAME_EXT_LEN 32 /* Max extra information length */
-#define HNAE3_INT_NAME_LEN (IFNAMSIZ + HNAE3_INT_NAME_EXT_LEN)
+#define HNAE3_INT_NAME_LEN 32
#define HNAE3_ITR_COUNTDOWN_START 100
struct hnae3_tc_info {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 69545dd6c938..e240d99f7ca8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -24,6 +24,12 @@
#include "hnae3.h"
#include "hns3_enet.h"
+/* All hns3 tracepoints are defined by the include below, which
+ * must be included exactly once across the whole kernel with
+ * CREATE_TRACE_POINTS defined
+ */
+#define CREATE_TRACE_POINTS
+#include "hns3_trace.h"
#define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift)))
#define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE)
@@ -127,18 +133,21 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
continue;
if (tqp_vectors->tx_group.ring && tqp_vectors->rx_group.ring) {
- snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
- "%s-%s-%d", priv->netdev->name, "TxRx",
- txrx_int_idx++);
+ snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN,
+ "%s-%s-%s-%d", hns3_driver_name,
+ pci_name(priv->ae_handle->pdev),
+ "TxRx", txrx_int_idx++);
txrx_int_idx++;
} else if (tqp_vectors->rx_group.ring) {
- snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
- "%s-%s-%d", priv->netdev->name, "Rx",
- rx_int_idx++);
+ snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN,
+ "%s-%s-%s-%d", hns3_driver_name,
+ pci_name(priv->ae_handle->pdev),
+ "Rx", rx_int_idx++);
} else if (tqp_vectors->tx_group.ring) {
- snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN - 1,
- "%s-%s-%d", priv->netdev->name, "Tx",
- tx_int_idx++);
+ snprintf(tqp_vectors->name, HNAE3_INT_NAME_LEN,
+ "%s-%s-%s-%d", hns3_driver_name,
+ pci_name(priv->ae_handle->pdev),
+ "Tx", tx_int_idx++);
} else {
/* Skip this unused q_vector */
continue;
@@ -155,6 +164,8 @@ static int hns3_nic_init_irq(struct hns3_nic_priv *priv)
return ret;
}
+ disable_irq(tqp_vectors->vector_irq);
+
irq_set_affinity_hint(tqp_vectors->vector_irq,
&tqp_vectors->affinity_mask);
@@ -173,6 +184,7 @@ static void hns3_mask_vector_irq(struct hns3_enet_tqp_vector *tqp_vector,
static void hns3_vector_enable(struct hns3_enet_tqp_vector *tqp_vector)
{
napi_enable(&tqp_vector->napi);
+ enable_irq(tqp_vector->vector_irq);
/* enable vector */
hns3_mask_vector_irq(tqp_vector, 1);
@@ -372,18 +384,6 @@ static int hns3_nic_net_up(struct net_device *netdev)
if (ret)
return ret;
- /* the device can work without cpu rmap, only aRFS needs it */
- ret = hns3_set_rx_cpu_rmap(netdev);
- if (ret)
- netdev_warn(netdev, "set rx cpu rmap fail, ret=%d!\n", ret);
-
- /* get irq resource for all vectors */
- ret = hns3_nic_init_irq(priv);
- if (ret) {
- netdev_err(netdev, "init irq failed! ret=%d\n", ret);
- goto free_rmap;
- }
-
clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
/* enable the vectors */
@@ -396,22 +396,15 @@ static int hns3_nic_net_up(struct net_device *netdev)
/* start the ae_dev */
ret = h->ae_algo->ops->start ? h->ae_algo->ops->start(h) : 0;
- if (ret)
- goto out_start_err;
-
- return 0;
-
-out_start_err:
- set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
- while (j--)
- hns3_tqp_disable(h->kinfo.tqp[j]);
+ if (ret) {
+ set_bit(HNS3_NIC_STATE_DOWN, &priv->state);
+ while (j--)
+ hns3_tqp_disable(h->kinfo.tqp[j]);
- for (j = i - 1; j >= 0; j--)
- hns3_vector_disable(&priv->tqp_vector[j]);
+ for (j = i - 1; j >= 0; j--)
+ hns3_vector_disable(&priv->tqp_vector[j]);
+ }
- hns3_nic_uninit_irq(priv);
-free_rmap:
- hns3_free_rx_cpu_rmap(netdev);
return ret;
}
@@ -508,11 +501,6 @@ static void hns3_nic_net_down(struct net_device *netdev)
if (ops->stop)
ops->stop(priv->ae_handle);
- hns3_free_rx_cpu_rmap(netdev);
-
- /* free irq resources */
- hns3_nic_uninit_irq(priv);
-
/* delay ring buffer clearing to hns3_reset_notify_uninit_enet
* during reset process, because driver may not be able
* to disable the ring through firmware when downing the netdev.
@@ -734,6 +722,8 @@ static int hns3_set_tso(struct sk_buff *skb, u32 *paylen,
/* get MSS for TSO */
*mss = skb_shinfo(skb)->gso_size;
+ trace_hns3_tso(skb);
+
return 0;
}
@@ -1138,6 +1128,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
desc->tx.bdtp_fe_sc_vld_ra_ri =
cpu_to_le16(BIT(HNS3_TXD_VLD_B));
+ trace_hns3_tx_desc(ring, ring->next_to_use);
ring_ptr_move_fw(ring, next_to_use);
return HNS3_LIKELY_BD_NUM;
}
@@ -1161,6 +1152,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv,
desc->tx.bdtp_fe_sc_vld_ra_ri =
cpu_to_le16(BIT(HNS3_TXD_VLD_B));
+ trace_hns3_tx_desc(ring, ring->next_to_use);
/* move ring pointer to next */
ring_ptr_move_fw(ring, next_to_use);
@@ -1286,6 +1278,14 @@ static bool hns3_skb_need_linearized(struct sk_buff *skb, unsigned int *bd_size,
return false;
}
+void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size)
+{
+ int i = 0;
+
+ for (i = 0; i < MAX_SKB_FRAGS; i++)
+ size[i] = skb_frag_size(&shinfo->frags[i]);
+}
+
static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
struct net_device *netdev,
struct sk_buff *skb)
@@ -1297,8 +1297,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
bd_num = hns3_tx_bd_num(skb, bd_size);
if (unlikely(bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) &&
- !hns3_skb_need_linearized(skb, bd_size, bd_num))
+ !hns3_skb_need_linearized(skb, bd_size, bd_num)) {
+ trace_hns3_over_8bd(skb);
goto out;
+ }
if (__skb_linearize(skb))
return -ENOMEM;
@@ -1306,8 +1308,10 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
bd_num = hns3_tx_bd_count(skb->len);
if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) ||
(!skb_is_gso(skb) &&
- bd_num > HNS3_MAX_NON_TSO_BD_NUM))
+ bd_num > HNS3_MAX_NON_TSO_BD_NUM)) {
+ trace_hns3_over_8bd(skb);
return -ENOMEM;
+ }
u64_stats_update_begin(&ring->syncp);
ring->stats.tx_copy++;
@@ -1448,6 +1452,7 @@ out:
(ring->desc_num - 1);
ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
cpu_to_le16(BIT(HNS3_TXD_FE_B));
+ trace_hns3_tx_desc(ring, pre_ntu);
/* Complete translate all packets */
dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
@@ -1556,6 +1561,37 @@ static int hns3_nic_set_features(struct net_device *netdev,
return 0;
}
+static netdev_features_t hns3_features_check(struct sk_buff *skb,
+ struct net_device *dev,
+ netdev_features_t features)
+{
+#define HNS3_MAX_HDR_LEN 480U
+#define HNS3_MAX_L4_HDR_LEN 60U
+
+ size_t len;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
+ return features;
+
+ if (skb->encapsulation)
+ len = skb_inner_transport_header(skb) - skb->data;
+ else
+ len = skb_transport_header(skb) - skb->data;
+
+ /* Assume L4 is 60 byte as TCP is the only protocol with a
+ * a flexible value, and it's max len is 60 bytes.
+ */
+ len += HNS3_MAX_L4_HDR_LEN;
+
+ /* Hardware only supports checksum on the skb with a max header
+ * len of 480 bytes.
+ */
+ if (len > HNS3_MAX_HDR_LEN)
+ features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+
+ return features;
+}
+
static void hns3_nic_get_stats64(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
@@ -1869,7 +1905,7 @@ static bool hns3_get_tx_timeo_queue_info(struct net_device *ndev)
return true;
}
-static void hns3_nic_net_timeout(struct net_device *ndev)
+static void hns3_nic_net_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct hns3_nic_priv *priv = netdev_priv(ndev);
struct hnae3_handle *h = priv->ae_handle;
@@ -1970,6 +2006,7 @@ static const struct net_device_ops hns3_nic_netdev_ops = {
.ndo_do_ioctl = hns3_nic_do_ioctl,
.ndo_change_mtu = hns3_nic_change_mtu,
.ndo_set_features = hns3_nic_set_features,
+ .ndo_features_check = hns3_features_check,
.ndo_get_stats64 = hns3_nic_get_stats64,
.ndo_setup_tc = hns3_nic_setup_tc,
.ndo_set_rx_mode = hns3_nic_set_rx_mode,
@@ -2668,6 +2705,9 @@ static int hns3_gro_complete(struct sk_buff *skb, u32 l234info)
skb->csum_start = (unsigned char *)th - skb->head;
skb->csum_offset = offsetof(struct tcphdr, check);
skb->ip_summed = CHECKSUM_PARTIAL;
+
+ trace_hns3_gro(skb);
+
return 0;
}
@@ -2788,7 +2828,6 @@ static bool hns3_parse_vlan_tag(struct hns3_enet_ring *ring,
static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
unsigned char *va)
{
-#define HNS3_NEED_ADD_FRAG 1
struct hns3_desc_cb *desc_cb = &ring->desc_cb[ring->next_to_clean];
struct net_device *netdev = ring_to_netdev(ring);
struct sk_buff *skb;
@@ -2805,6 +2844,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
return -ENOMEM;
}
+ trace_hns3_rx_desc(ring);
prefetchw(skb->data);
ring->pending_buf = 1;
@@ -2832,33 +2872,19 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, unsigned int length,
desc_cb);
ring_ptr_move_fw(ring, next_to_clean);
- return HNS3_NEED_ADD_FRAG;
+ return 0;
}
-static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
- bool pending)
+static int hns3_add_frag(struct hns3_enet_ring *ring)
{
struct sk_buff *skb = ring->skb;
struct sk_buff *head_skb = skb;
struct sk_buff *new_skb;
struct hns3_desc_cb *desc_cb;
- struct hns3_desc *pre_desc;
+ struct hns3_desc *desc;
u32 bd_base_info;
- int pre_bd;
- /* if there is pending bd, the SW param next_to_clean has moved
- * to next and the next is NULL
- */
- if (pending) {
- pre_bd = (ring->next_to_clean - 1 + ring->desc_num) %
- ring->desc_num;
- pre_desc = &ring->desc[pre_bd];
- bd_base_info = le32_to_cpu(pre_desc->rx.bd_base_info);
- } else {
- bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
- }
-
- while (!(bd_base_info & BIT(HNS3_RXD_FE_B))) {
+ do {
desc = &ring->desc[ring->next_to_clean];
desc_cb = &ring->desc_cb[ring->next_to_clean];
bd_base_info = le32_to_cpu(desc->rx.bd_base_info);
@@ -2893,9 +2919,10 @@ static int hns3_add_frag(struct hns3_enet_ring *ring, struct hns3_desc *desc,
}
hns3_nic_reuse_page(skb, ring->frag_num++, ring, 0, desc_cb);
+ trace_hns3_rx_desc(ring);
ring_ptr_move_fw(ring, next_to_clean);
ring->pending_buf++;
- }
+ } while (!(bd_base_info & BIT(HNS3_RXD_FE_B)));
return 0;
}
@@ -3063,28 +3090,23 @@ static int hns3_handle_rx_bd(struct hns3_enet_ring *ring)
if (ret < 0) /* alloc buffer fail */
return ret;
- if (ret > 0) { /* need add frag */
- ret = hns3_add_frag(ring, desc, false);
+ if (!(bd_base_info & BIT(HNS3_RXD_FE_B))) { /* need add frag */
+ ret = hns3_add_frag(ring);
if (ret)
return ret;
-
- /* As the head data may be changed when GRO enable, copy
- * the head data in after other data rx completed
- */
- memcpy(skb->data, ring->va,
- ALIGN(ring->pull_len, sizeof(long)));
}
} else {
- ret = hns3_add_frag(ring, desc, true);
+ ret = hns3_add_frag(ring);
if (ret)
return ret;
+ }
- /* As the head data may be changed when GRO enable, copy
- * the head data in after other data rx completed
- */
+ /* As the head data may be changed when GRO enable, copy
+ * the head data in after other data rx completed
+ */
+ if (skb->len > HNS3_RX_HEAD_SIZE)
memcpy(skb->data, ring->va,
ALIGN(ring->pull_len, sizeof(long)));
- }
ret = hns3_handle_bdinfo(ring, skb);
if (unlikely(ret)) {
@@ -3590,26 +3612,25 @@ static void hns3_nic_uninit_vector_data(struct hns3_nic_priv *priv)
if (!tqp_vector->rx_group.ring && !tqp_vector->tx_group.ring)
continue;
- hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain);
+ /* Since the mapping can be overwritten, when fail to get the
+ * chain between vector and ring, we should go on to deal with
+ * the remaining options.
+ */
+ if (hns3_get_vector_ring_chain(tqp_vector, &vector_ring_chain))
+ dev_warn(priv->dev, "failed to get ring chain\n");
h->ae_algo->ops->unmap_ring_from_vector(h,
tqp_vector->vector_irq, &vector_ring_chain);
hns3_free_vector_ring_chain(tqp_vector, &vector_ring_chain);
- if (tqp_vector->irq_init_flag == HNS3_VECTOR_INITED) {
- irq_set_affinity_hint(tqp_vector->vector_irq, NULL);
- free_irq(tqp_vector->vector_irq, tqp_vector);
- tqp_vector->irq_init_flag = HNS3_VECTOR_NOT_INITED;
- }
-
hns3_clear_ring_group(&tqp_vector->rx_group);
hns3_clear_ring_group(&tqp_vector->tx_group);
netif_napi_del(&priv->tqp_vector[i].napi);
}
}
-static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
+static void hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
{
struct hnae3_handle *h = priv->ae_handle;
struct pci_dev *pdev = h->pdev;
@@ -3621,11 +3642,10 @@ static int hns3_nic_dealloc_vector_data(struct hns3_nic_priv *priv)
tqp_vector = &priv->tqp_vector[i];
ret = h->ae_algo->ops->put_vector(h, tqp_vector->vector_irq);
if (ret)
- return ret;
+ return;
}
devm_kfree(&pdev->dev, priv->tqp_vector);
- return 0;
}
static void hns3_ring_get_cfg(struct hnae3_queue *q, struct hns3_nic_priv *priv,
@@ -4024,6 +4044,18 @@ static int hns3_client_init(struct hnae3_handle *handle)
goto out_reg_netdev_fail;
}
+ /* the device can work without cpu rmap, only aRFS needs it */
+ ret = hns3_set_rx_cpu_rmap(netdev);
+ if (ret)
+ dev_warn(priv->dev, "set rx cpu rmap fail, ret=%d\n", ret);
+
+ ret = hns3_nic_init_irq(priv);
+ if (ret) {
+ dev_err(priv->dev, "init irq failed! ret=%d\n", ret);
+ hns3_free_rx_cpu_rmap(netdev);
+ goto out_init_irq_fail;
+ }
+
ret = hns3_client_start(handle);
if (ret) {
dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
@@ -4045,6 +4077,9 @@ static int hns3_client_init(struct hnae3_handle *handle)
return ret;
out_client_start:
+ hns3_free_rx_cpu_rmap(netdev);
+ hns3_nic_uninit_irq(priv);
+out_init_irq_fail:
unregister_netdev(netdev);
out_reg_netdev_fail:
hns3_uninit_phy(netdev);
@@ -4082,15 +4117,17 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
goto out_netdev_free;
}
+ hns3_free_rx_cpu_rmap(netdev);
+
+ hns3_nic_uninit_irq(priv);
+
hns3_del_all_fd_rules(netdev, true);
hns3_clear_all_ring(handle, true);
hns3_nic_uninit_vector_data(priv);
- ret = hns3_nic_dealloc_vector_data(priv);
- if (ret)
- netdev_err(netdev, "dealloc vector error\n");
+ hns3_nic_dealloc_vector_data(priv);
ret = hns3_uninit_all_ring(priv);
if (ret)
@@ -4417,17 +4454,32 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
if (ret)
goto err_uninit_vector;
+ /* the device can work without cpu rmap, only aRFS needs it */
+ ret = hns3_set_rx_cpu_rmap(netdev);
+ if (ret)
+ dev_warn(priv->dev, "set rx cpu rmap fail, ret=%d\n", ret);
+
+ ret = hns3_nic_init_irq(priv);
+ if (ret) {
+ dev_err(priv->dev, "init irq failed! ret=%d\n", ret);
+ hns3_free_rx_cpu_rmap(netdev);
+ goto err_init_irq_fail;
+ }
+
ret = hns3_client_start(handle);
if (ret) {
dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
- goto err_uninit_ring;
+ goto err_client_start_fail;
}
set_bit(HNS3_NIC_STATE_INITED, &priv->state);
return ret;
-err_uninit_ring:
+err_client_start_fail:
+ hns3_free_rx_cpu_rmap(netdev);
+ hns3_nic_uninit_irq(priv);
+err_init_irq_fail:
hns3_uninit_all_ring(priv);
err_uninit_vector:
hns3_nic_uninit_vector_data(priv);
@@ -4477,6 +4529,8 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
return 0;
}
+ hns3_free_rx_cpu_rmap(netdev);
+ hns3_nic_uninit_irq(priv);
hns3_clear_all_ring(handle, true);
hns3_reset_tx_queue(priv->ae_handle);
@@ -4484,9 +4538,7 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
hns3_store_coal(priv);
- ret = hns3_nic_dealloc_vector_data(priv);
- if (ret)
- netdev_err(netdev, "dealloc vector error\n");
+ hns3_nic_dealloc_vector_data(priv);
ret = hns3_uninit_all_ring(priv);
if (ret)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 9d47abd5c37c..abefd7a179f7 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -673,4 +673,5 @@ void hns3_dbg_init(struct hnae3_handle *handle);
void hns3_dbg_uninit(struct hnae3_handle *handle);
void hns3_dbg_register_debugfs(const char *debugfs_dir_name);
void hns3_dbg_unregister_debugfs(void);
+void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size);
#endif
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h
new file mode 100644
index 000000000000..7bddcca148a5
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_trace.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2018-2019 Hisilicon Limited. */
+
+/* This must be outside ifdef _HNS3_TRACE_H */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hns3
+
+#if !defined(_HNS3_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _HNS3_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#define DESC_NR (sizeof(struct hns3_desc) / sizeof(u32))
+
+DECLARE_EVENT_CLASS(hns3_skb_template,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, headlen)
+ __field(unsigned int, len)
+ __field(__u8, nr_frags)
+ __field(__u8, ip_summed)
+ __field(unsigned int, hdr_len)
+ __field(unsigned short, gso_size)
+ __field(unsigned short, gso_segs)
+ __field(unsigned int, gso_type)
+ __field(bool, fraglist)
+ __array(__u32, size, MAX_SKB_FRAGS)
+ ),
+
+ TP_fast_assign(
+ __entry->headlen = skb_headlen(skb);
+ __entry->len = skb->len;
+ __entry->nr_frags = skb_shinfo(skb)->nr_frags;
+ __entry->gso_size = skb_shinfo(skb)->gso_size;
+ __entry->gso_segs = skb_shinfo(skb)->gso_segs;
+ __entry->gso_type = skb_shinfo(skb)->gso_type;
+ __entry->hdr_len = skb->encapsulation ?
+ skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb) :
+ skb_transport_offset(skb) + tcp_hdrlen(skb);
+ __entry->ip_summed = skb->ip_summed;
+ __entry->fraglist = skb_has_frag_list(skb);
+ hns3_shinfo_pack(skb_shinfo(skb), __entry->size);
+ ),
+
+ TP_printk(
+ "len: %u, %u, %u, cs: %u, gso: %u, %u, %x, frag(%d %u): %s",
+ __entry->headlen, __entry->len, __entry->hdr_len,
+ __entry->ip_summed, __entry->gso_size, __entry->gso_segs,
+ __entry->gso_type, __entry->fraglist, __entry->nr_frags,
+ __print_array(__entry->size, MAX_SKB_FRAGS, sizeof(__u32))
+ )
+);
+
+DEFINE_EVENT(hns3_skb_template, hns3_over_8bd,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb));
+
+DEFINE_EVENT(hns3_skb_template, hns3_gro,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb));
+
+DEFINE_EVENT(hns3_skb_template, hns3_tso,
+ TP_PROTO(struct sk_buff *skb),
+ TP_ARGS(skb));
+
+TRACE_EVENT(hns3_tx_desc,
+ TP_PROTO(struct hns3_enet_ring *ring, int cur_ntu),
+ TP_ARGS(ring, cur_ntu),
+
+ TP_STRUCT__entry(
+ __field(int, index)
+ __field(int, ntu)
+ __field(int, ntc)
+ __field(dma_addr_t, desc_dma)
+ __array(u32, desc, DESC_NR)
+ __string(devname, ring->tqp->handle->kinfo.netdev->name)
+ ),
+
+ TP_fast_assign(
+ __entry->index = ring->tqp->tqp_index;
+ __entry->ntu = ring->next_to_use;
+ __entry->ntc = ring->next_to_clean;
+ __entry->desc_dma = ring->desc_dma_addr,
+ memcpy(__entry->desc, &ring->desc[cur_ntu],
+ sizeof(struct hns3_desc));
+ __assign_str(devname, ring->tqp->handle->kinfo.netdev->name);
+ ),
+
+ TP_printk(
+ "%s-%d-%d/%d desc(%pad): %s",
+ __get_str(devname), __entry->index, __entry->ntu,
+ __entry->ntc, &__entry->desc_dma,
+ __print_array(__entry->desc, DESC_NR, sizeof(u32))
+ )
+);
+
+TRACE_EVENT(hns3_rx_desc,
+ TP_PROTO(struct hns3_enet_ring *ring),
+ TP_ARGS(ring),
+
+ TP_STRUCT__entry(
+ __field(int, index)
+ __field(int, ntu)
+ __field(int, ntc)
+ __field(dma_addr_t, desc_dma)
+ __field(dma_addr_t, buf_dma)
+ __array(u32, desc, DESC_NR)
+ __string(devname, ring->tqp->handle->kinfo.netdev->name)
+ ),
+
+ TP_fast_assign(
+ __entry->index = ring->tqp->tqp_index;
+ __entry->ntu = ring->next_to_use;
+ __entry->ntc = ring->next_to_clean;
+ __entry->desc_dma = ring->desc_dma_addr;
+ __entry->buf_dma = ring->desc_cb[ring->next_to_clean].dma;
+ memcpy(__entry->desc, &ring->desc[ring->next_to_clean],
+ sizeof(struct hns3_desc));
+ __assign_str(devname, ring->tqp->handle->kinfo.netdev->name);
+ ),
+
+ TP_printk(
+ "%s-%d-%d/%d desc(%pad) buf(%pad): %s",
+ __get_str(devname), __entry->index, __entry->ntu,
+ __entry->ntc, &__entry->desc_dma, &__entry->buf_dma,
+ __print_array(__entry->desc, DESC_NR, sizeof(u32))
+ )
+);
+
+#endif /* _HNS3_TRACE_H_ */
+
+/* This must be outside ifdef _HNS3_TRACE_H */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hns3_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 940ead3970d1..7f509eff562e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -479,19 +479,6 @@ static void hclge_cmd_uninit_regs(struct hclge_hw *hw)
hclge_write_dev(hw, HCLGE_NIC_CRQ_TAIL_REG, 0);
}
-static void hclge_destroy_queue(struct hclge_cmq_ring *ring)
-{
- spin_lock(&ring->lock);
- hclge_free_cmd_desc(ring);
- spin_unlock(&ring->lock);
-}
-
-static void hclge_destroy_cmd_queue(struct hclge_hw *hw)
-{
- hclge_destroy_queue(&hw->cmq.csq);
- hclge_destroy_queue(&hw->cmq.crq);
-}
-
void hclge_cmd_uninit(struct hclge_dev *hdev)
{
spin_lock_bh(&hdev->hw.cmq.csq.lock);
@@ -501,5 +488,6 @@ void hclge_cmd_uninit(struct hclge_dev *hdev)
spin_unlock(&hdev->hw.cmq.crq.lock);
spin_unlock_bh(&hdev->hw.cmq.csq.lock);
- hclge_destroy_cmd_queue(&hdev->hw);
+ hclge_free_cmd_desc(&hdev->hw.cmq.csq);
+ hclge_free_cmd_desc(&hdev->hw.cmq.crq);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index d97da67f07a1..96498d9b4754 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -6,6 +6,7 @@
#include <linux/types.h>
#include <linux/io.h>
#include <linux/etherdevice.h>
+#include "hnae3.h"
#define HCLGE_CMDQ_TX_TIMEOUT 30000
#define HCLGE_DESC_DATA_LEN 6
@@ -63,6 +64,7 @@ enum hclge_cmd_status {
struct hclge_misc_vector {
u8 __iomem *addr;
int vector_irq;
+ char name[HNAE3_INT_NAME_LEN];
};
struct hclge_cmq {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 112df34b3869..f3d4cbd28913 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -886,8 +886,8 @@ static void hclge_dbg_dump_mng_table(struct hclge_dev *hdev)
}
}
-static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
- bool sel_x, u32 loc)
+static int hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
+ bool sel_x, u32 loc)
{
struct hclge_fd_tcam_config_1_cmd *req1;
struct hclge_fd_tcam_config_2_cmd *req2;
@@ -912,7 +912,7 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
ret = hclge_cmd_send(&hdev->hw, desc, 3);
if (ret)
- return;
+ return ret;
dev_info(&hdev->pdev->dev, " read result tcam key %s(%u):\n",
sel_x ? "x" : "y", loc);
@@ -931,16 +931,76 @@ static void hclge_dbg_fd_tcam_read(struct hclge_dev *hdev, u8 stage,
req = (u32 *)req3->tcam_data;
for (i = 0; i < 5; i++)
dev_info(&hdev->pdev->dev, "%08x\n", *req++);
+
+ return ret;
+}
+
+static int hclge_dbg_get_rules_location(struct hclge_dev *hdev, u16 *rule_locs)
+{
+ struct hclge_fd_rule *rule;
+ struct hlist_node *node;
+ int cnt = 0;
+
+ spin_lock_bh(&hdev->fd_rule_lock);
+ hlist_for_each_entry_safe(rule, node, &hdev->fd_rule_list, rule_node) {
+ rule_locs[cnt] = rule->location;
+ cnt++;
+ }
+ spin_unlock_bh(&hdev->fd_rule_lock);
+
+ if (cnt != hdev->hclge_fd_rule_num)
+ return -EINVAL;
+
+ return cnt;
}
static void hclge_dbg_fd_tcam(struct hclge_dev *hdev)
{
- u32 i;
+ int i, ret, rule_cnt;
+ u16 *rule_locs;
- for (i = 0; i < hdev->fd_cfg.rule_num[0]; i++) {
- hclge_dbg_fd_tcam_read(hdev, 0, true, i);
- hclge_dbg_fd_tcam_read(hdev, 0, false, i);
+ if (!hnae3_dev_fd_supported(hdev)) {
+ dev_err(&hdev->pdev->dev,
+ "Only FD-supported dev supports dump fd tcam\n");
+ return;
}
+
+ if (!hdev->hclge_fd_rule_num ||
+ !hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+ return;
+
+ rule_locs = kcalloc(hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1],
+ sizeof(u16), GFP_KERNEL);
+ if (!rule_locs)
+ return;
+
+ rule_cnt = hclge_dbg_get_rules_location(hdev, rule_locs);
+ if (rule_cnt <= 0) {
+ dev_err(&hdev->pdev->dev,
+ "failed to get rule number, ret = %d\n", rule_cnt);
+ kfree(rule_locs);
+ return;
+ }
+
+ for (i = 0; i < rule_cnt; i++) {
+ ret = hclge_dbg_fd_tcam_read(hdev, 0, true, rule_locs[i]);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to get fd tcam key x, ret = %d\n", ret);
+ kfree(rule_locs);
+ return;
+ }
+
+ ret = hclge_dbg_fd_tcam_read(hdev, 0, false, rule_locs[i]);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to get fd tcam key y, ret = %d\n", ret);
+ kfree(rule_locs);
+ return;
+ }
+ }
+
+ kfree(rule_locs);
}
void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
@@ -976,6 +1036,14 @@ void hclge_dbg_dump_rst_info(struct hclge_dev *hdev)
dev_info(&hdev->pdev->dev, "hdev state: 0x%lx\n", hdev->state);
}
+static void hclge_dbg_dump_serv_info(struct hclge_dev *hdev)
+{
+ dev_info(&hdev->pdev->dev, "last_serv_processed: %lu\n",
+ hdev->last_serv_processed);
+ dev_info(&hdev->pdev->dev, "last_serv_cnt: %lu\n",
+ hdev->serv_processed_cnt);
+}
+
static void hclge_dbg_get_m7_stats_info(struct hclge_dev *hdev)
{
struct hclge_desc *desc_src, *desc_tmp;
@@ -1227,6 +1295,8 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
hclge_dbg_dump_reg_cmd(hdev, &cmd_buf[sizeof(DUMP_REG)]);
} else if (strncmp(cmd_buf, "dump reset info", 15) == 0) {
hclge_dbg_dump_rst_info(hdev);
+ } else if (strncmp(cmd_buf, "dump serv info", 14) == 0) {
+ hclge_dbg_dump_serv_info(hdev);
} else if (strncmp(cmd_buf, "dump m7 info", 12) == 0) {
hclge_dbg_get_m7_stats_info(hdev);
} else if (strncmp(cmd_buf, "dump ncl_config", 15) == 0) {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
index dc66b4e13377..f8127d72d645 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c
@@ -505,7 +505,7 @@ static const struct hclge_hw_error hclge_ssu_mem_ecc_err_int[] = {
static const struct hclge_hw_error hclge_ssu_port_based_err_int[] = {
{ .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
- .reset_level = HNAE3_GLOBAL_RESET },
+ .reset_level = HNAE3_FUNC_RESET },
{ .int_msk = BIT(1), .msg = "tpu_pkt_without_key_port",
.reset_level = HNAE3_GLOBAL_RESET },
{ .int_msk = BIT(2), .msg = "igu_pkt_without_key_port",
@@ -599,7 +599,7 @@ static const struct hclge_hw_error hclge_ssu_ets_tcg_int[] = {
static const struct hclge_hw_error hclge_ssu_port_based_pf_int[] = {
{ .int_msk = BIT(0), .msg = "roc_pkt_without_key_port",
- .reset_level = HNAE3_GLOBAL_RESET },
+ .reset_level = HNAE3_FUNC_RESET },
{ .int_msk = BIT(9), .msg = "low_water_line_err_port",
.reset_level = HNAE3_NONE_RESET },
{ .int_msk = BIT(10), .msg = "hi_water_line_err_port",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index d862e9ba27e1..76e8aa40e1bc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -72,6 +72,8 @@ static int hclge_set_default_loopback(struct hclge_dev *hdev);
static struct hnae3_ae_algo ae_algo;
+static struct workqueue_struct *hclge_wq;
+
static const struct pci_device_id ae_algo_pci_tbl[] = {
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_GE), 0},
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_25GE), 0},
@@ -416,7 +418,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
{
#define HCLGE_MAC_CMD_NUM 21
- u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
+ u64 *data = (u64 *)(&hdev->mac_stats);
struct hclge_desc desc[HCLGE_MAC_CMD_NUM];
__le64 *desc_data;
int i, k, n;
@@ -453,7 +455,7 @@ static int hclge_mac_update_stats_defective(struct hclge_dev *hdev)
static int hclge_mac_update_stats_complete(struct hclge_dev *hdev, u32 desc_num)
{
- u64 *data = (u64 *)(&hdev->hw_stats.mac_stats);
+ u64 *data = (u64 *)(&hdev->mac_stats);
struct hclge_desc *desc;
__le64 *desc_data;
u16 i, k, n;
@@ -802,7 +804,7 @@ static void hclge_get_stats(struct hnae3_handle *handle, u64 *data)
struct hclge_dev *hdev = vport->back;
u64 *p;
- p = hclge_comm_get_stats(&hdev->hw_stats.mac_stats, g_mac_stats_string,
+ p = hclge_comm_get_stats(&hdev->mac_stats, g_mac_stats_string,
ARRAY_SIZE(g_mac_stats_string), data);
p = hclge_tqps_get_stats(handle, p);
}
@@ -815,8 +817,8 @@ static void hclge_get_mac_stat(struct hnae3_handle *handle,
hclge_update_stats(handle, NULL);
- mac_stats->tx_pause_cnt = hdev->hw_stats.mac_stats.mac_tx_mac_pause_num;
- mac_stats->rx_pause_cnt = hdev->hw_stats.mac_stats.mac_rx_mac_pause_num;
+ mac_stats->tx_pause_cnt = hdev->mac_stats.mac_tx_mac_pause_num;
+ mac_stats->rx_pause_cnt = hdev->mac_stats.mac_rx_mac_pause_num;
}
static int hclge_parse_func_status(struct hclge_dev *hdev,
@@ -2665,31 +2667,27 @@ static int hclge_mac_init(struct hclge_dev *hdev)
static void hclge_mbx_task_schedule(struct hclge_dev *hdev)
{
- if (!test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) &&
+ if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
!test_and_set_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state))
- queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
- &hdev->mbx_service_task);
+ mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
+ hclge_wq, &hdev->service_task, 0);
}
static void hclge_reset_task_schedule(struct hclge_dev *hdev)
{
if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
!test_and_set_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
- queue_work_on(cpumask_first(&hdev->affinity_mask), system_wq,
- &hdev->rst_service_task);
+ mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
+ hclge_wq, &hdev->service_task, 0);
}
void hclge_task_schedule(struct hclge_dev *hdev, unsigned long delay_time)
{
- if (!test_bit(HCLGE_STATE_DOWN, &hdev->state) &&
- !test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
- !test_and_set_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state)) {
- hdev->hw_stats.stats_timer++;
- hdev->fd_arfs_expire_timer++;
+ if (!test_bit(HCLGE_STATE_REMOVING, &hdev->state) &&
+ !test_bit(HCLGE_STATE_RST_FAIL, &hdev->state))
mod_delayed_work_on(cpumask_first(&hdev->affinity_mask),
- system_wq, &hdev->service_task,
+ hclge_wq, &hdev->service_task,
delay_time);
- }
}
static int hclge_get_mac_link_status(struct hclge_dev *hdev)
@@ -2748,6 +2746,10 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
if (!client)
return;
+
+ if (test_and_set_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state))
+ return;
+
state = hclge_get_mac_phy_link(hdev);
if (state != hdev->hw.mac.link) {
for (i = 0; i < hdev->num_vmdq_vport + 1; i++) {
@@ -2761,6 +2763,8 @@ static void hclge_update_link_status(struct hclge_dev *hdev)
}
hdev->hw.mac.link = state;
}
+
+ clear_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state);
}
static void hclge_update_port_capability(struct hclge_mac *mac)
@@ -2831,6 +2835,12 @@ static int hclge_get_sfp_info(struct hclge_dev *hdev, struct hclge_mac *mac)
return ret;
}
+ /* In some case, mac speed get from IMP may be 0, it shouldn't be
+ * set to mac->speed.
+ */
+ if (!le32_to_cpu(resp->speed))
+ return 0;
+
mac->speed = le32_to_cpu(resp->speed);
/* if resp->speed_ability is 0, it means it's an old version
* firmware, do not update these params
@@ -2940,6 +2950,9 @@ static int hclge_get_vf_config(struct hnae3_handle *handle, int vf,
ivf->trusted = vport->vf_info.trusted;
ivf->min_tx_rate = 0;
ivf->max_tx_rate = vport->vf_info.max_tx_rate;
+ ivf->vlan = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
+ ivf->vlan_proto = htons(vport->port_base_vlan_cfg.vlan_info.vlan_proto);
+ ivf->qos = vport->port_base_vlan_cfg.vlan_info.qos;
ether_addr_copy(ivf->mac, vport->vf_info.mac);
return 0;
@@ -2998,8 +3011,6 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
/* check for vector0 msix event source */
if (msix_src_reg & HCLGE_VECTOR0_REG_MSIX_MASK) {
- dev_info(&hdev->pdev->dev, "received event 0x%x\n",
- msix_src_reg);
*clearval = msix_src_reg;
return HCLGE_VECTOR0_EVENT_ERR;
}
@@ -3172,8 +3183,10 @@ static int hclge_misc_irq_init(struct hclge_dev *hdev)
hclge_get_misc_vector(hdev);
/* this would be explicitly freed in the end */
+ snprintf(hdev->misc_vector.name, HNAE3_INT_NAME_LEN, "%s-misc-%s",
+ HCLGE_NAME, pci_name(hdev->pdev));
ret = request_irq(hdev->misc_vector.vector_irq, hclge_misc_irq_handle,
- 0, "hclge_misc", hdev);
+ 0, hdev->misc_vector.name, hdev);
if (ret) {
hclge_free_vector(hdev, 0);
dev_err(&hdev->pdev->dev, "request misc irq(%d) fail\n",
@@ -3247,7 +3260,8 @@ static int hclge_notify_roce_client(struct hclge_dev *hdev,
static int hclge_reset_wait(struct hclge_dev *hdev)
{
#define HCLGE_RESET_WATI_MS 100
-#define HCLGE_RESET_WAIT_CNT 200
+#define HCLGE_RESET_WAIT_CNT 350
+
u32 val, reg, reg_bit;
u32 cnt = 0;
@@ -3264,8 +3278,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
reg = HCLGE_FUN_RST_ING;
reg_bit = HCLGE_FUN_RST_ING_B;
break;
- case HNAE3_FLR_RESET:
- break;
default:
dev_err(&hdev->pdev->dev,
"Wait for unsupported reset type: %d\n",
@@ -3273,20 +3285,6 @@ static int hclge_reset_wait(struct hclge_dev *hdev)
return -EINVAL;
}
- if (hdev->reset_type == HNAE3_FLR_RESET) {
- while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) &&
- cnt++ < HCLGE_RESET_WAIT_CNT)
- msleep(HCLGE_RESET_WATI_MS);
-
- if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
- dev_err(&hdev->pdev->dev,
- "flr wait timeout: %u\n", cnt);
- return -EBUSY;
- }
-
- return 0;
- }
-
val = hclge_read_dev(&hdev->hw, reg);
while (hnae3_get_bit(val, reg_bit) && cnt < HCLGE_RESET_WAIT_CNT) {
msleep(HCLGE_RESET_WATI_MS);
@@ -3352,7 +3350,19 @@ static int hclge_set_all_vf_rst(struct hclge_dev *hdev, bool reset)
return 0;
}
-static int hclge_func_reset_sync_vf(struct hclge_dev *hdev)
+static void hclge_mailbox_service_task(struct hclge_dev *hdev)
+{
+ if (!test_and_clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state) ||
+ test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state) ||
+ test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state))
+ return;
+
+ hclge_mbx_handler(hdev);
+
+ clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
+}
+
+static void hclge_func_reset_sync_vf(struct hclge_dev *hdev)
{
struct hclge_pf_rst_sync_cmd *req;
struct hclge_desc desc;
@@ -3363,26 +3373,28 @@ static int hclge_func_reset_sync_vf(struct hclge_dev *hdev)
hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_QUERY_VF_RST_RDY, true);
do {
+ /* vf need to down netdev by mbx during PF or FLR reset */
+ hclge_mailbox_service_task(hdev);
+
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
/* for compatible with old firmware, wait
* 100 ms for VF to stop IO
*/
if (ret == -EOPNOTSUPP) {
msleep(HCLGE_RESET_SYNC_TIME);
- return 0;
+ return;
} else if (ret) {
- dev_err(&hdev->pdev->dev, "sync with VF fail %d!\n",
- ret);
- return ret;
+ dev_warn(&hdev->pdev->dev, "sync with VF fail %d!\n",
+ ret);
+ return;
} else if (req->all_vf_ready) {
- return 0;
+ return;
}
msleep(HCLGE_PF_RESET_SYNC_TIME);
hclge_cmd_reuse_desc(&desc, true);
} while (cnt++ < HCLGE_PF_RESET_SYNC_CNT);
- dev_err(&hdev->pdev->dev, "sync with VF timeout!\n");
- return -ETIME;
+ dev_warn(&hdev->pdev->dev, "sync with VF timeout!\n");
}
void hclge_report_hw_error(struct hclge_dev *hdev,
@@ -3462,12 +3474,6 @@ static void hclge_do_reset(struct hclge_dev *hdev)
set_bit(HNAE3_FUNC_RESET, &hdev->reset_pending);
hclge_reset_task_schedule(hdev);
break;
- case HNAE3_FLR_RESET:
- dev_info(&pdev->dev, "FLR requested\n");
- /* schedule again to check later */
- set_bit(HNAE3_FLR_RESET, &hdev->reset_pending);
- hclge_reset_task_schedule(hdev);
- break;
default:
dev_warn(&pdev->dev,
"Unsupported reset type: %d\n", hdev->reset_type);
@@ -3483,10 +3489,15 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
/* first, resolve any unknown reset type to the known type(s) */
if (test_bit(HNAE3_UNKNOWN_RESET, addr)) {
+ u32 msix_sts_reg = hclge_read_dev(&hdev->hw,
+ HCLGE_VECTOR0_PF_OTHER_INT_STS_REG);
/* we will intentionally ignore any errors from this function
* as we will end up in *some* reset request in any case
*/
- hclge_handle_hw_msix_error(hdev, addr);
+ if (hclge_handle_hw_msix_error(hdev, addr))
+ dev_info(&hdev->pdev->dev, "received msix interrupt 0x%x\n",
+ msix_sts_reg);
+
clear_bit(HNAE3_UNKNOWN_RESET, addr);
/* We defered the clearing of the error event which caused
* interrupt since it was not posssible to do that in
@@ -3551,23 +3562,6 @@ static void hclge_clear_reset_cause(struct hclge_dev *hdev)
hclge_enable_vector(&hdev->misc_vector, true);
}
-static int hclge_reset_prepare_down(struct hclge_dev *hdev)
-{
- int ret = 0;
-
- switch (hdev->reset_type) {
- case HNAE3_FUNC_RESET:
- /* fall through */
- case HNAE3_FLR_RESET:
- ret = hclge_set_all_vf_rst(hdev, true);
- break;
- default:
- break;
- }
-
- return ret;
-}
-
static void hclge_reset_handshake(struct hclge_dev *hdev, bool enable)
{
u32 reg_val;
@@ -3581,6 +3575,19 @@ static void hclge_reset_handshake(struct hclge_dev *hdev, bool enable)
hclge_write_dev(&hdev->hw, HCLGE_NIC_CSQ_DEPTH_REG, reg_val);
}
+static int hclge_func_reset_notify_vf(struct hclge_dev *hdev)
+{
+ int ret;
+
+ ret = hclge_set_all_vf_rst(hdev, true);
+ if (ret)
+ return ret;
+
+ hclge_func_reset_sync_vf(hdev);
+
+ return 0;
+}
+
static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
{
u32 reg_val;
@@ -3588,10 +3595,7 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
switch (hdev->reset_type) {
case HNAE3_FUNC_RESET:
- /* to confirm whether all running VF is ready
- * before request PF reset
- */
- ret = hclge_func_reset_sync_vf(hdev);
+ ret = hclge_func_reset_notify_vf(hdev);
if (ret)
return ret;
@@ -3611,16 +3615,9 @@ static int hclge_reset_prepare_wait(struct hclge_dev *hdev)
hdev->rst_stats.pf_rst_cnt++;
break;
case HNAE3_FLR_RESET:
- /* to confirm whether all running VF is ready
- * before request PF reset
- */
- ret = hclge_func_reset_sync_vf(hdev);
+ ret = hclge_func_reset_notify_vf(hdev);
if (ret)
return ret;
-
- set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
- set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
- hdev->rst_stats.flr_rst_cnt++;
break;
case HNAE3_IMP_RESET:
hclge_handle_imp_error(hdev);
@@ -3672,6 +3669,8 @@ static bool hclge_reset_err_handle(struct hclge_dev *hdev)
hclge_dbg_dump_rst_info(hdev);
+ set_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
+
return false;
}
@@ -3747,10 +3746,9 @@ static int hclge_reset_stack(struct hclge_dev *hdev)
return hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT);
}
-static void hclge_reset(struct hclge_dev *hdev)
+static int hclge_reset_prepare(struct hclge_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
- enum hnae3_reset_type reset_level;
int ret;
/* Initialize ae_dev reset status as well, in case enet layer wants to
@@ -3761,45 +3759,41 @@ static void hclge_reset(struct hclge_dev *hdev)
/* perform reset of the stack & ae device for a client */
ret = hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
if (ret)
- goto err_reset;
-
- ret = hclge_reset_prepare_down(hdev);
- if (ret)
- goto err_reset;
+ return ret;
rtnl_lock();
ret = hclge_notify_client(hdev, HNAE3_DOWN_CLIENT);
- if (ret)
- goto err_reset_lock;
-
rtnl_unlock();
-
- ret = hclge_reset_prepare_wait(hdev);
if (ret)
- goto err_reset;
+ return ret;
- if (hclge_reset_wait(hdev))
- goto err_reset;
+ return hclge_reset_prepare_wait(hdev);
+}
+
+static int hclge_reset_rebuild(struct hclge_dev *hdev)
+{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+ enum hnae3_reset_type reset_level;
+ int ret;
hdev->rst_stats.hw_reset_done_cnt++;
ret = hclge_notify_roce_client(hdev, HNAE3_UNINIT_CLIENT);
if (ret)
- goto err_reset;
+ return ret;
rtnl_lock();
-
ret = hclge_reset_stack(hdev);
+ rtnl_unlock();
if (ret)
- goto err_reset_lock;
+ return ret;
hclge_clear_reset_cause(hdev);
ret = hclge_reset_prepare_up(hdev);
if (ret)
- goto err_reset_lock;
+ return ret;
- rtnl_unlock();
ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
/* ignore RoCE notify error if it fails HCLGE_RESET_MAX_FAIL_CNT - 1
@@ -3807,24 +3801,23 @@ static void hclge_reset(struct hclge_dev *hdev)
*/
if (ret &&
hdev->rst_stats.reset_fail_cnt < HCLGE_RESET_MAX_FAIL_CNT - 1)
- goto err_reset;
+ return ret;
rtnl_lock();
-
ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
- if (ret)
- goto err_reset_lock;
-
rtnl_unlock();
+ if (ret)
+ return ret;
ret = hclge_notify_roce_client(hdev, HNAE3_UP_CLIENT);
if (ret)
- goto err_reset;
+ return ret;
hdev->last_reset_time = jiffies;
hdev->rst_stats.reset_fail_cnt = 0;
hdev->rst_stats.reset_done_cnt++;
ae_dev->reset_type = HNAE3_NONE_RESET;
+ clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
/* if default_reset_request has a higher level reset request,
* it should be handled as soon as possible. since some errors
@@ -3835,10 +3828,22 @@ static void hclge_reset(struct hclge_dev *hdev)
if (reset_level != HNAE3_NONE_RESET)
set_bit(reset_level, &hdev->reset_request);
+ return 0;
+}
+
+static void hclge_reset(struct hclge_dev *hdev)
+{
+ if (hclge_reset_prepare(hdev))
+ goto err_reset;
+
+ if (hclge_reset_wait(hdev))
+ goto err_reset;
+
+ if (hclge_reset_rebuild(hdev))
+ goto err_reset;
+
return;
-err_reset_lock:
- rtnl_unlock();
err_reset:
if (hclge_reset_err_handle(hdev))
hclge_reset_task_schedule(hdev);
@@ -3939,34 +3944,18 @@ static void hclge_reset_subtask(struct hclge_dev *hdev)
hdev->reset_type = HNAE3_NONE_RESET;
}
-static void hclge_reset_service_task(struct work_struct *work)
+static void hclge_reset_service_task(struct hclge_dev *hdev)
{
- struct hclge_dev *hdev =
- container_of(work, struct hclge_dev, rst_service_task);
-
- if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state))
+ if (!test_and_clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state))
return;
- clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state);
+ down(&hdev->reset_sem);
+ set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
hclge_reset_subtask(hdev);
clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
-}
-
-static void hclge_mailbox_service_task(struct work_struct *work)
-{
- struct hclge_dev *hdev =
- container_of(work, struct hclge_dev, mbx_service_task);
-
- if (test_and_set_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state))
- return;
-
- clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state);
-
- hclge_mbx_handler(hdev);
-
- clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
+ up(&hdev->reset_sem);
}
static void hclge_update_vport_alive(struct hclge_dev *hdev)
@@ -3986,29 +3975,62 @@ static void hclge_update_vport_alive(struct hclge_dev *hdev)
}
}
-static void hclge_service_task(struct work_struct *work)
+static void hclge_periodic_service_task(struct hclge_dev *hdev)
{
- struct hclge_dev *hdev =
- container_of(work, struct hclge_dev, service_task.work);
+ unsigned long delta = round_jiffies_relative(HZ);
- clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+ /* Always handle the link updating to make sure link state is
+ * updated when it is triggered by mbx.
+ */
+ hclge_update_link_status(hdev);
- if (hdev->hw_stats.stats_timer >= HCLGE_STATS_TIMER_INTERVAL) {
- hclge_update_stats_for_all(hdev);
- hdev->hw_stats.stats_timer = 0;
+ if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
+ delta = jiffies - hdev->last_serv_processed;
+
+ if (delta < round_jiffies_relative(HZ)) {
+ delta = round_jiffies_relative(HZ) - delta;
+ goto out;
+ }
}
- hclge_update_port_info(hdev);
- hclge_update_link_status(hdev);
+ hdev->serv_processed_cnt++;
hclge_update_vport_alive(hdev);
+
+ if (test_bit(HCLGE_STATE_DOWN, &hdev->state)) {
+ hdev->last_serv_processed = jiffies;
+ goto out;
+ }
+
+ if (!(hdev->serv_processed_cnt % HCLGE_STATS_TIMER_INTERVAL))
+ hclge_update_stats_for_all(hdev);
+
+ hclge_update_port_info(hdev);
hclge_sync_vlan_filter(hdev);
- if (hdev->fd_arfs_expire_timer >= HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL) {
+ if (!(hdev->serv_processed_cnt % HCLGE_ARFS_EXPIRE_INTERVAL))
hclge_rfs_filter_expire(hdev);
- hdev->fd_arfs_expire_timer = 0;
- }
- hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+ hdev->last_serv_processed = jiffies;
+
+out:
+ hclge_task_schedule(hdev, delta);
+}
+
+static void hclge_service_task(struct work_struct *work)
+{
+ struct hclge_dev *hdev =
+ container_of(work, struct hclge_dev, service_task.work);
+
+ hclge_reset_service_task(hdev);
+ hclge_mailbox_service_task(hdev);
+ hclge_periodic_service_task(hdev);
+
+ /* Handle reset and mbx again in case periodical task delays the
+ * handling by calling hclge_task_schedule() in
+ * hclge_periodic_service_task().
+ */
+ hclge_reset_service_task(hdev);
+ hclge_mailbox_service_task(hdev);
}
struct hclge_vport *hclge_get_vport(struct hnae3_handle *handle)
@@ -4654,7 +4676,7 @@ static int hclge_map_ring_to_vector(struct hnae3_handle *handle, int vector,
vector_id = hclge_get_vector_index(hdev, vector);
if (vector_id < 0) {
dev_err(&hdev->pdev->dev,
- "Get vector index fail. vector_id =%d\n", vector_id);
+ "failed to get vector index. vector=%d\n", vector);
return vector_id;
}
@@ -6734,6 +6756,19 @@ static void hclge_reset_tqp_stats(struct hnae3_handle *handle)
}
}
+static void hclge_flush_link_update(struct hclge_dev *hdev)
+{
+#define HCLGE_FLUSH_LINK_TIMEOUT 100000
+
+ unsigned long last = hdev->serv_processed_cnt;
+ int i = 0;
+
+ while (test_bit(HCLGE_STATE_LINK_UPDATING, &hdev->state) &&
+ i++ < HCLGE_FLUSH_LINK_TIMEOUT &&
+ last == hdev->serv_processed_cnt)
+ usleep_range(1, 1);
+}
+
static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable)
{
struct hclge_vport *vport = hclge_get_vport(handle);
@@ -6742,12 +6777,12 @@ static void hclge_set_timer_task(struct hnae3_handle *handle, bool enable)
if (enable) {
hclge_task_schedule(hdev, round_jiffies_relative(HZ));
} else {
- /* Set the DOWN flag here to disable the service to be
- * scheduled again
- */
+ /* Set the DOWN flag here to disable link updating */
set_bit(HCLGE_STATE_DOWN, &hdev->state);
- cancel_delayed_work_sync(&hdev->service_task);
- clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
+
+ /* flush memory to make sure DOWN is seen by service task */
+ smp_mb__before_atomic();
+ hclge_flush_link_update(hdev);
}
}
@@ -7483,7 +7518,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev)
struct hclge_vport *vport;
int i;
- mutex_lock(&hdev->vport_cfg_mutex);
for (i = 0; i < hdev->num_alloc_vport; i++) {
vport = &hdev->vport[i];
list_for_each_entry_safe(mac, tmp, &vport->uc_mac_list, node) {
@@ -7496,7 +7530,6 @@ void hclge_uninit_vport_mac_table(struct hclge_dev *hdev)
kfree(mac);
}
}
- mutex_unlock(&hdev->vport_cfg_mutex);
}
static int hclge_get_mac_ethertype_cmd_status(struct hclge_dev *hdev,
@@ -8257,7 +8290,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
struct hclge_vport *vport;
int i;
- mutex_lock(&hdev->vport_cfg_mutex);
for (i = 0; i < hdev->num_alloc_vport; i++) {
vport = &hdev->vport[i];
list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
@@ -8265,7 +8297,6 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
kfree(vlan);
}
}
- mutex_unlock(&hdev->vport_cfg_mutex);
}
static void hclge_restore_vlan_table(struct hnae3_handle *handle)
@@ -8277,7 +8308,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle)
u16 state, vlan_id;
int i;
- mutex_lock(&hdev->vport_cfg_mutex);
for (i = 0; i < hdev->num_alloc_vport; i++) {
vport = &hdev->vport[i];
vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto;
@@ -8303,8 +8333,6 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle)
break;
}
}
-
- mutex_unlock(&hdev->vport_cfg_mutex);
}
int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
@@ -9256,6 +9284,7 @@ static void hclge_state_init(struct hclge_dev *hdev)
set_bit(HCLGE_STATE_DOWN, &hdev->state);
clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state);
clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+ clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
clear_bit(HCLGE_STATE_MBX_SERVICE_SCHED, &hdev->state);
clear_bit(HCLGE_STATE_MBX_HANDLING, &hdev->state);
}
@@ -9269,38 +9298,57 @@ static void hclge_state_uninit(struct hclge_dev *hdev)
del_timer_sync(&hdev->reset_timer);
if (hdev->service_task.work.func)
cancel_delayed_work_sync(&hdev->service_task);
- if (hdev->rst_service_task.func)
- cancel_work_sync(&hdev->rst_service_task);
- if (hdev->mbx_service_task.func)
- cancel_work_sync(&hdev->mbx_service_task);
}
static void hclge_flr_prepare(struct hnae3_ae_dev *ae_dev)
{
-#define HCLGE_FLR_WAIT_MS 100
-#define HCLGE_FLR_WAIT_CNT 50
- struct hclge_dev *hdev = ae_dev->priv;
- int cnt = 0;
+#define HCLGE_FLR_RETRY_WAIT_MS 500
+#define HCLGE_FLR_RETRY_CNT 5
- clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
- clear_bit(HNAE3_FLR_DONE, &hdev->flr_state);
- set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request);
- hclge_reset_event(hdev->pdev, NULL);
+ struct hclge_dev *hdev = ae_dev->priv;
+ int retry_cnt = 0;
+ int ret;
- while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) &&
- cnt++ < HCLGE_FLR_WAIT_CNT)
- msleep(HCLGE_FLR_WAIT_MS);
+retry:
+ down(&hdev->reset_sem);
+ set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+ hdev->reset_type = HNAE3_FLR_RESET;
+ ret = hclge_reset_prepare(hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n",
+ ret);
+ if (hdev->reset_pending ||
+ retry_cnt++ < HCLGE_FLR_RETRY_CNT) {
+ dev_err(&hdev->pdev->dev,
+ "reset_pending:0x%lx, retry_cnt:%d\n",
+ hdev->reset_pending, retry_cnt);
+ clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+ up(&hdev->reset_sem);
+ msleep(HCLGE_FLR_RETRY_WAIT_MS);
+ goto retry;
+ }
+ }
- if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state))
- dev_err(&hdev->pdev->dev,
- "flr wait down timeout: %d\n", cnt);
+ /* disable misc vector before FLR done */
+ hclge_enable_vector(&hdev->misc_vector, false);
+ set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+ hdev->rst_stats.flr_rst_cnt++;
}
static void hclge_flr_done(struct hnae3_ae_dev *ae_dev)
{
struct hclge_dev *hdev = ae_dev->priv;
+ int ret;
+
+ hclge_enable_vector(&hdev->misc_vector, true);
- set_bit(HNAE3_FLR_DONE, &hdev->flr_state);
+ ret = hclge_reset_rebuild(hdev);
+ if (ret)
+ dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret);
+
+ hdev->reset_type = HNAE3_NONE_RESET;
+ clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);
+ up(&hdev->reset_sem);
}
static void hclge_clear_resetting_state(struct hclge_dev *hdev)
@@ -9342,8 +9390,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
hdev->mps = ETH_FRAME_LEN + ETH_FCS_LEN + 2 * VLAN_HLEN;
mutex_init(&hdev->vport_lock);
- mutex_init(&hdev->vport_cfg_mutex);
spin_lock_init(&hdev->fd_rule_lock);
+ sema_init(&hdev->reset_sem, 1);
ret = hclge_pci_init(hdev);
if (ret) {
@@ -9477,8 +9525,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
timer_setup(&hdev->reset_timer, hclge_reset_timer, 0);
INIT_DELAYED_WORK(&hdev->service_task, hclge_service_task);
- INIT_WORK(&hdev->rst_service_task, hclge_reset_service_task);
- INIT_WORK(&hdev->mbx_service_task, hclge_mailbox_service_task);
/* Setup affinity after service timer setup because add_timer_on
* is called in affinity notify.
@@ -9512,6 +9558,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
dev_info(&hdev->pdev->dev, "%s driver initialization finished.\n",
HCLGE_DRIVER_NAME);
+ hclge_task_schedule(hdev, round_jiffies_relative(HZ));
+
return 0;
err_mdiobus_unreg:
@@ -9534,7 +9582,7 @@ out:
static void hclge_stats_clear(struct hclge_dev *hdev)
{
- memset(&hdev->hw_stats, 0, sizeof(hdev->hw_stats));
+ memset(&hdev->mac_stats, 0, sizeof(hdev->mac_stats));
}
static int hclge_set_mac_spoofchk(struct hclge_dev *hdev, int vf, bool enable)
@@ -9895,7 +9943,6 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
mutex_destroy(&hdev->vport_lock);
hclge_uninit_vport_mac_table(hdev);
hclge_uninit_vport_vlan_table(hdev);
- mutex_destroy(&hdev->vport_cfg_mutex);
ae_dev->priv = NULL;
}
@@ -10240,7 +10287,7 @@ static int hclge_get_dfx_reg_len(struct hclge_dev *hdev, int *len)
return ret;
}
- data_len_per_desc = FIELD_SIZEOF(struct hclge_desc, data);
+ data_len_per_desc = sizeof_field(struct hclge_desc, data);
*len = 0;
for (i = 0; i < dfx_reg_type_num; i++) {
bd_num = bd_num_list[i];
@@ -10611,6 +10658,12 @@ static int hclge_init(void)
{
pr_info("%s is initializing\n", HCLGE_NAME);
+ hclge_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGE_NAME);
+ if (!hclge_wq) {
+ pr_err("%s: failed to create workqueue\n", HCLGE_NAME);
+ return -ENOMEM;
+ }
+
hnae3_register_ae_algo(&ae_algo);
return 0;
@@ -10619,6 +10672,7 @@ static int hclge_init(void)
static void hclge_exit(void)
{
hnae3_unregister_ae_algo(&ae_algo);
+ destroy_workqueue(hclge_wq);
}
module_init(hclge_init);
module_exit(hclge_exit);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index ebb4c6e9aed3..1c1d6b358897 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -208,13 +208,14 @@ enum HCLGE_DEV_STATE {
HCLGE_STATE_NIC_REGISTERED,
HCLGE_STATE_ROCE_REGISTERED,
HCLGE_STATE_SERVICE_INITED,
- HCLGE_STATE_SERVICE_SCHED,
HCLGE_STATE_RST_SERVICE_SCHED,
HCLGE_STATE_RST_HANDLING,
HCLGE_STATE_MBX_SERVICE_SCHED,
HCLGE_STATE_MBX_HANDLING,
HCLGE_STATE_STATISTICS_UPDATING,
HCLGE_STATE_CMD_DISABLE,
+ HCLGE_STATE_LINK_UPDATING,
+ HCLGE_STATE_RST_FAIL,
HCLGE_STATE_MAX
};
@@ -454,11 +455,7 @@ struct hclge_mac_stats {
u64 mac_rx_ctrl_pkt_num;
};
-#define HCLGE_STATS_TIMER_INTERVAL (60 * 5)
-struct hclge_hw_stats {
- struct hclge_mac_stats mac_stats;
- u32 stats_timer;
-};
+#define HCLGE_STATS_TIMER_INTERVAL 300UL
struct hclge_vlan_type_cfg {
u16 rx_ot_fst_vlan_type;
@@ -549,7 +546,7 @@ struct key_info {
/* assigned by firmware, the real filter number for each pf may be less */
#define MAX_FD_FILTER_NUM 4096
-#define HCLGE_FD_ARFS_EXPIRE_TIMER_INTERVAL 5
+#define HCLGE_ARFS_EXPIRE_INTERVAL 5UL
enum HCLGE_FD_ACTIVE_RULE_TYPE {
HCLGE_FD_RULE_NONE,
@@ -712,7 +709,7 @@ struct hclge_dev {
struct hnae3_ae_dev *ae_dev;
struct hclge_hw hw;
struct hclge_misc_vector misc_vector;
- struct hclge_hw_stats hw_stats;
+ struct hclge_mac_stats mac_stats;
unsigned long state;
unsigned long flr_state;
unsigned long last_reset_time;
@@ -723,6 +720,7 @@ struct hclge_dev {
unsigned long reset_request; /* reset has been requested */
unsigned long reset_pending; /* client rst is pending to be served */
struct hclge_rst_stats rst_stats;
+ struct semaphore reset_sem; /* protect reset process */
u32 fw_version;
u16 num_vmdq_vport; /* Num vmdq vport this PF has set up */
u16 num_tqps; /* Num task queue pairs of this PF */
@@ -774,8 +772,6 @@ struct hclge_dev {
unsigned long service_timer_previous;
struct timer_list reset_timer;
struct delayed_work service_task;
- struct work_struct rst_service_task;
- struct work_struct mbx_service_task;
bool cur_promisc;
int num_alloc_vfs; /* Actual number of VFs allocated */
@@ -811,7 +807,8 @@ struct hclge_dev {
struct hlist_head fd_rule_list;
spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */
u16 hclge_fd_rule_num;
- u16 fd_arfs_expire_timer;
+ unsigned long serv_processed_cnt;
+ unsigned long last_serv_processed;
unsigned long fd_bmap[BITS_TO_LONGS(MAX_FD_FILTER_NUM)];
enum HCLGE_FD_ACTIVE_RULE_TYPE fd_active_type;
u8 fd_en;
@@ -825,8 +822,6 @@ struct hclge_dev {
u16 share_umv_size;
struct mutex umv_mutex; /* protect share_umv_size */
- struct mutex vport_cfg_mutex; /* Protect stored vf table */
-
DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
HCLGE_MAC_TNL_LOG_SIZE);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 0b433ebe6a2d..a3c0822191a9 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -86,10 +86,12 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
int hclge_inform_reset_assert_to_vf(struct hclge_vport *vport)
{
struct hclge_dev *hdev = vport->back;
- enum hnae3_reset_type reset_type;
+ u16 reset_type;
u8 msg_data[2];
u8 dest_vfid;
+ BUILD_BUG_ON(HNAE3_MAX_RESET > U16_MAX);
+
dest_vfid = (u8)vport->vport_id;
if (hdev->reset_type == HNAE3_FUNC_RESET)
@@ -635,7 +637,6 @@ static void hclge_handle_link_change_event(struct hclge_dev *hdev,
#define LINK_STATUS_OFFSET 1
#define LINK_FAIL_CODE_OFFSET 2
- clear_bit(HCLGE_STATE_SERVICE_SCHED, &hdev->state);
hclge_task_schedule(hdev, 0);
if (!req->msg[LINK_STATUS_OFFSET])
@@ -798,13 +799,11 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
hclge_get_link_mode(vport, req);
break;
case HCLGE_MBX_GET_VF_FLR_STATUS:
- mutex_lock(&hdev->vport_cfg_mutex);
hclge_rm_vport_all_mac_table(vport, true,
HCLGE_MAC_ADDR_UC);
hclge_rm_vport_all_mac_table(vport, true,
HCLGE_MAC_ADDR_MC);
hclge_rm_vport_all_vlan_table(vport, true);
- mutex_unlock(&hdev->vport_cfg_mutex);
break;
case HCLGE_MBX_GET_MEDIA_TYPE:
ret = hclge_get_vf_media_type(vport, req);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index fbc39a2480d0..180224eab1ca 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -614,7 +614,7 @@ static void hclge_tm_vport_tc_info_update(struct hclge_vport *vport)
}
memcpy(kinfo->prio_tc, hdev->tm_info.prio_tc,
- FIELD_SIZEOF(struct hnae3_knic_private_info, prio_tc));
+ sizeof_field(struct hnae3_knic_private_info, prio_tc));
}
static void hclge_tm_vport_info_update(struct hclge_dev *hdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index af2245e3bb95..f38d236ebf4f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -443,7 +443,7 @@ void hclgevf_cmd_uninit(struct hclgevf_dev *hdev)
{
spin_lock_bh(&hdev->hw.cmq.csq.lock);
spin_lock(&hdev->hw.cmq.crq.lock);
- clear_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
+ set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
hclgevf_cmd_uninit_regs(&hdev->hw);
spin_unlock(&hdev->hw.cmq.crq.lock);
spin_unlock_bh(&hdev->hw.cmq.csq.lock);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 25d78a5aaa34..b26b8ad170d0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -16,6 +16,8 @@
static int hclgevf_reset_hdev(struct hclgevf_dev *hdev);
static struct hnae3_ae_algo ae_algovf;
+static struct workqueue_struct *hclgevf_wq;
+
static const struct pci_device_id ae_algovf_pci_tbl[] = {
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_VF), 0},
{PCI_VDEVICE(HUAWEI, HNAE3_DEV_ID_100G_RDMA_DCB_PFC_VF), 0},
@@ -440,6 +442,9 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state)
struct hnae3_client *rclient;
struct hnae3_client *client;
+ if (test_and_set_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state))
+ return;
+
client = handle->client;
rclient = hdev->roce_client;
@@ -452,6 +457,8 @@ void hclgevf_update_link_status(struct hclgevf_dev *hdev, int link_state)
rclient->ops->link_status_change(rhandle, !!link_state);
hdev->hw.mac.link = link_state;
}
+
+ clear_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state);
}
static void hclgevf_update_link_mode(struct hclgevf_dev *hdev)
@@ -1404,32 +1411,6 @@ static int hclgevf_notify_client(struct hclgevf_dev *hdev,
return ret;
}
-static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev)
-{
- struct hclgevf_dev *hdev = ae_dev->priv;
-
- set_bit(HNAE3_FLR_DONE, &hdev->flr_state);
-}
-
-static int hclgevf_flr_poll_timeout(struct hclgevf_dev *hdev,
- unsigned long delay_us,
- unsigned long wait_cnt)
-{
- unsigned long cnt = 0;
-
- while (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state) &&
- cnt++ < wait_cnt)
- usleep_range(delay_us, delay_us * 2);
-
- if (!test_bit(HNAE3_FLR_DONE, &hdev->flr_state)) {
- dev_err(&hdev->pdev->dev,
- "flr wait timeout\n");
- return -ETIMEDOUT;
- }
-
- return 0;
-}
-
static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
{
#define HCLGEVF_RESET_WAIT_US 20000
@@ -1440,11 +1421,7 @@ static int hclgevf_reset_wait(struct hclgevf_dev *hdev)
u32 val;
int ret;
- if (hdev->reset_type == HNAE3_FLR_RESET)
- return hclgevf_flr_poll_timeout(hdev,
- HCLGEVF_RESET_WAIT_US,
- HCLGEVF_RESET_WAIT_CNT);
- else if (hdev->reset_type == HNAE3_VF_RESET)
+ if (hdev->reset_type == HNAE3_VF_RESET)
ret = readl_poll_timeout(hdev->hw.io_base +
HCLGEVF_VF_RST_ING, val,
!(val & HCLGEVF_VF_RST_ING_BIT),
@@ -1516,7 +1493,8 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
/* clear handshake status with IMP */
hclgevf_reset_handshake(hdev, false);
- return 0;
+ /* bring up the nic to enable TX/RX again */
+ return hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
}
static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
@@ -1525,18 +1503,10 @@ static int hclgevf_reset_prepare_wait(struct hclgevf_dev *hdev)
int ret = 0;
- switch (hdev->reset_type) {
- case HNAE3_VF_FUNC_RESET:
+ if (hdev->reset_type == HNAE3_VF_FUNC_RESET) {
ret = hclgevf_send_mbx_msg(hdev, HCLGE_MBX_RESET, 0, NULL,
0, true, NULL, sizeof(u8));
hdev->rst_stats.vf_func_rst_cnt++;
- break;
- case HNAE3_FLR_RESET:
- set_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
- hdev->rst_stats.flr_rst_cnt++;
- break;
- default:
- break;
}
set_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state);
@@ -1591,11 +1561,12 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
set_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state);
hclgevf_reset_task_schedule(hdev);
} else {
+ set_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state);
hclgevf_dump_rst_info(hdev);
}
}
-static int hclgevf_reset(struct hclgevf_dev *hdev)
+static int hclgevf_reset_prepare(struct hclgevf_dev *hdev)
{
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
int ret;
@@ -1605,61 +1576,64 @@ static int hclgevf_reset(struct hclgevf_dev *hdev)
*/
ae_dev->reset_type = hdev->reset_type;
hdev->rst_stats.rst_cnt++;
- rtnl_lock();
+ rtnl_lock();
/* bring down the nic to stop any ongoing TX/RX */
ret = hclgevf_notify_client(hdev, HNAE3_DOWN_CLIENT);
- if (ret)
- goto err_reset_lock;
-
rtnl_unlock();
-
- ret = hclgevf_reset_prepare_wait(hdev);
if (ret)
- goto err_reset;
+ return ret;
- /* check if VF could successfully fetch the hardware reset completion
- * status from the hardware
- */
- ret = hclgevf_reset_wait(hdev);
- if (ret) {
- /* can't do much in this situation, will disable VF */
- dev_err(&hdev->pdev->dev,
- "VF failed(=%d) to fetch H/W reset completion status\n",
- ret);
- goto err_reset;
- }
+ return hclgevf_reset_prepare_wait(hdev);
+}
+
+static int hclgevf_reset_rebuild(struct hclgevf_dev *hdev)
+{
+ struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
+ int ret;
hdev->rst_stats.hw_rst_done_cnt++;
rtnl_lock();
-
/* now, re-initialize the nic client and ae device */
ret = hclgevf_reset_stack(hdev);
+ rtnl_unlock();
if (ret) {
dev_err(&hdev->pdev->dev, "failed to reset VF stack\n");
- goto err_reset_lock;
+ return ret;
}
- /* bring up the nic to enable TX/RX again */
- ret = hclgevf_notify_client(hdev, HNAE3_UP_CLIENT);
- if (ret)
- goto err_reset_lock;
-
- rtnl_unlock();
-
hdev->last_reset_time = jiffies;
ae_dev->reset_type = HNAE3_NONE_RESET;
hdev->rst_stats.rst_done_cnt++;
hdev->rst_stats.rst_fail_cnt = 0;
+ clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state);
+
+ return 0;
+}
+
+static void hclgevf_reset(struct hclgevf_dev *hdev)
+{
+ if (hclgevf_reset_prepare(hdev))
+ goto err_reset;
+
+ /* check if VF could successfully fetch the hardware reset completion
+ * status from the hardware
+ */
+ if (hclgevf_reset_wait(hdev)) {
+ /* can't do much in this situation, will disable VF */
+ dev_err(&hdev->pdev->dev,
+ "failed to fetch H/W reset completion status\n");
+ goto err_reset;
+ }
+
+ if (hclgevf_reset_rebuild(hdev))
+ goto err_reset;
+
+ return;
- return ret;
-err_reset_lock:
- rtnl_unlock();
err_reset:
hclgevf_reset_err_handle(hdev);
-
- return ret;
}
static enum hnae3_reset_type hclgevf_get_reset_level(struct hclgevf_dev *hdev,
@@ -1722,25 +1696,60 @@ static void hclgevf_set_def_reset_request(struct hnae3_ae_dev *ae_dev,
set_bit(rst_type, &hdev->default_reset_request);
}
+static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en)
+{
+ writel(en ? 1 : 0, vector->addr);
+}
+
static void hclgevf_flr_prepare(struct hnae3_ae_dev *ae_dev)
{
-#define HCLGEVF_FLR_WAIT_MS 100
-#define HCLGEVF_FLR_WAIT_CNT 50
+#define HCLGEVF_FLR_RETRY_WAIT_MS 500
+#define HCLGEVF_FLR_RETRY_CNT 5
+
+ struct hclgevf_dev *hdev = ae_dev->priv;
+ int retry_cnt = 0;
+ int ret;
+
+retry:
+ down(&hdev->reset_sem);
+ set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+ hdev->reset_type = HNAE3_FLR_RESET;
+ ret = hclgevf_reset_prepare(hdev);
+ if (ret) {
+ dev_err(&hdev->pdev->dev, "fail to prepare FLR, ret=%d\n",
+ ret);
+ if (hdev->reset_pending ||
+ retry_cnt++ < HCLGEVF_FLR_RETRY_CNT) {
+ dev_err(&hdev->pdev->dev,
+ "reset_pending:0x%lx, retry_cnt:%d\n",
+ hdev->reset_pending, retry_cnt);
+ clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+ up(&hdev->reset_sem);
+ msleep(HCLGEVF_FLR_RETRY_WAIT_MS);
+ goto retry;
+ }
+ }
+
+ /* disable misc vector before FLR done */
+ hclgevf_enable_vector(&hdev->misc_vector, false);
+ hdev->rst_stats.flr_rst_cnt++;
+}
+
+static void hclgevf_flr_done(struct hnae3_ae_dev *ae_dev)
+{
struct hclgevf_dev *hdev = ae_dev->priv;
- int cnt = 0;
+ int ret;
- clear_bit(HNAE3_FLR_DOWN, &hdev->flr_state);
- clear_bit(HNAE3_FLR_DONE, &hdev->flr_state);
- set_bit(HNAE3_FLR_RESET, &hdev->default_reset_request);
- hclgevf_reset_event(hdev->pdev, NULL);
+ hclgevf_enable_vector(&hdev->misc_vector, true);
- while (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state) &&
- cnt++ < HCLGEVF_FLR_WAIT_CNT)
- msleep(HCLGEVF_FLR_WAIT_MS);
+ ret = hclgevf_reset_rebuild(hdev);
+ if (ret)
+ dev_warn(&hdev->pdev->dev, "fail to rebuild, ret=%d\n",
+ ret);
- if (!test_bit(HNAE3_FLR_DOWN, &hdev->flr_state))
- dev_err(&hdev->pdev->dev,
- "flr wait down timeout: %d\n", cnt);
+ hdev->reset_type = HNAE3_NONE_RESET;
+ clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+ up(&hdev->reset_sem);
}
static u32 hclgevf_get_fw_version(struct hnae3_handle *handle)
@@ -1767,62 +1776,37 @@ static void hclgevf_get_misc_vector(struct hclgevf_dev *hdev)
void hclgevf_reset_task_schedule(struct hclgevf_dev *hdev)
{
- if (!test_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state) &&
- !test_bit(HCLGEVF_STATE_REMOVING, &hdev->state)) {
- set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
- schedule_work(&hdev->rst_service_task);
- }
+ if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) &&
+ !test_and_set_bit(HCLGEVF_STATE_RST_SERVICE_SCHED,
+ &hdev->state))
+ mod_delayed_work(hclgevf_wq, &hdev->service_task, 0);
}
void hclgevf_mbx_task_schedule(struct hclgevf_dev *hdev)
{
- if (!test_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state) &&
- !test_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state)) {
- set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
- schedule_work(&hdev->mbx_service_task);
- }
+ if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) &&
+ !test_and_set_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED,
+ &hdev->state))
+ mod_delayed_work(hclgevf_wq, &hdev->service_task, 0);
}
-static void hclgevf_task_schedule(struct hclgevf_dev *hdev)
+static void hclgevf_task_schedule(struct hclgevf_dev *hdev,
+ unsigned long delay)
{
- if (!test_bit(HCLGEVF_STATE_DOWN, &hdev->state) &&
- !test_and_set_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state))
- schedule_work(&hdev->service_task);
-}
-
-static void hclgevf_deferred_task_schedule(struct hclgevf_dev *hdev)
-{
- /* if we have any pending mailbox event then schedule the mbx task */
- if (hdev->mbx_event_pending)
- hclgevf_mbx_task_schedule(hdev);
-
- if (test_bit(HCLGEVF_RESET_PENDING, &hdev->reset_state))
- hclgevf_reset_task_schedule(hdev);
+ if (!test_bit(HCLGEVF_STATE_REMOVING, &hdev->state) &&
+ !test_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state))
+ mod_delayed_work(hclgevf_wq, &hdev->service_task, delay);
}
-static void hclgevf_service_timer(struct timer_list *t)
-{
- struct hclgevf_dev *hdev = from_timer(hdev, t, service_timer);
-
- mod_timer(&hdev->service_timer, jiffies +
- HCLGEVF_GENERAL_TASK_INTERVAL * HZ);
-
- hdev->stats_timer++;
- hclgevf_task_schedule(hdev);
-}
-
-static void hclgevf_reset_service_task(struct work_struct *work)
+static void hclgevf_reset_service_task(struct hclgevf_dev *hdev)
{
#define HCLGEVF_MAX_RESET_ATTEMPTS_CNT 3
- struct hclgevf_dev *hdev =
- container_of(work, struct hclgevf_dev, rst_service_task);
- int ret;
-
- if (test_and_set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
+ if (!test_and_clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state))
return;
- clear_bit(HCLGEVF_STATE_RST_SERVICE_SCHED, &hdev->state);
+ down(&hdev->reset_sem);
+ set_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
if (test_and_clear_bit(HCLGEVF_RESET_PENDING,
&hdev->reset_state)) {
@@ -1836,12 +1820,8 @@ static void hclgevf_reset_service_task(struct work_struct *work)
hdev->last_reset_time = jiffies;
while ((hdev->reset_type =
hclgevf_get_reset_level(hdev, &hdev->reset_pending))
- != HNAE3_NONE_RESET) {
- ret = hclgevf_reset(hdev);
- if (ret)
- dev_err(&hdev->pdev->dev,
- "VF stack reset failed %d.\n", ret);
- }
+ != HNAE3_NONE_RESET)
+ hclgevf_reset(hdev);
} else if (test_and_clear_bit(HCLGEVF_RESET_REQUESTED,
&hdev->reset_state)) {
/* we could be here when either of below happens:
@@ -1883,41 +1863,27 @@ static void hclgevf_reset_service_task(struct work_struct *work)
}
clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state);
+ up(&hdev->reset_sem);
}
-static void hclgevf_mailbox_service_task(struct work_struct *work)
+static void hclgevf_mailbox_service_task(struct hclgevf_dev *hdev)
{
- struct hclgevf_dev *hdev;
-
- hdev = container_of(work, struct hclgevf_dev, mbx_service_task);
+ if (!test_and_clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state))
+ return;
if (test_and_set_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state))
return;
- clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
-
hclgevf_mbx_async_handler(hdev);
clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
}
-static void hclgevf_keep_alive_timer(struct timer_list *t)
-{
- struct hclgevf_dev *hdev = from_timer(hdev, t, keep_alive_timer);
-
- schedule_work(&hdev->keep_alive_task);
- mod_timer(&hdev->keep_alive_timer, jiffies +
- HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ);
-}
-
-static void hclgevf_keep_alive_task(struct work_struct *work)
+static void hclgevf_keep_alive(struct hclgevf_dev *hdev)
{
- struct hclgevf_dev *hdev;
u8 respmsg;
int ret;
- hdev = container_of(work, struct hclgevf_dev, keep_alive_task);
-
if (test_bit(HCLGEVF_STATE_CMD_DISABLE, &hdev->state))
return;
@@ -1928,19 +1894,32 @@ static void hclgevf_keep_alive_task(struct work_struct *work)
"VF sends keep alive cmd failed(=%d)\n", ret);
}
-static void hclgevf_service_task(struct work_struct *work)
+static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev)
{
- struct hnae3_handle *handle;
- struct hclgevf_dev *hdev;
+ unsigned long delta = round_jiffies_relative(HZ);
+ struct hnae3_handle *handle = &hdev->nic;
- hdev = container_of(work, struct hclgevf_dev, service_task);
- handle = &hdev->nic;
+ if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
+ delta = jiffies - hdev->last_serv_processed;
- if (hdev->stats_timer >= HCLGEVF_STATS_TIMER_INTERVAL) {
- hclgevf_tqps_update_stats(handle);
- hdev->stats_timer = 0;
+ if (delta < round_jiffies_relative(HZ)) {
+ delta = round_jiffies_relative(HZ) - delta;
+ goto out;
+ }
}
+ hdev->serv_processed_cnt++;
+ if (!(hdev->serv_processed_cnt % HCLGEVF_KEEP_ALIVE_TASK_INTERVAL))
+ hclgevf_keep_alive(hdev);
+
+ if (test_bit(HCLGEVF_STATE_DOWN, &hdev->state)) {
+ hdev->last_serv_processed = jiffies;
+ goto out;
+ }
+
+ if (!(hdev->serv_processed_cnt % HCLGEVF_STATS_TIMER_INTERVAL))
+ hclgevf_tqps_update_stats(handle);
+
/* request the link status from the PF. PF would be able to tell VF
* about such updates in future so we might remove this later
*/
@@ -1950,9 +1929,27 @@ static void hclgevf_service_task(struct work_struct *work)
hclgevf_sync_vlan_filter(hdev);
- hclgevf_deferred_task_schedule(hdev);
+ hdev->last_serv_processed = jiffies;
- clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
+out:
+ hclgevf_task_schedule(hdev, delta);
+}
+
+static void hclgevf_service_task(struct work_struct *work)
+{
+ struct hclgevf_dev *hdev = container_of(work, struct hclgevf_dev,
+ service_task.work);
+
+ hclgevf_reset_service_task(hdev);
+ hclgevf_mailbox_service_task(hdev);
+ hclgevf_periodic_service_task(hdev);
+
+ /* Handle reset and mbx again in case periodical task delays the
+ * handling by calling hclgevf_task_schedule() in
+ * hclgevf_periodic_service_task()
+ */
+ hclgevf_reset_service_task(hdev);
+ hclgevf_mailbox_service_task(hdev);
}
static void hclgevf_clear_event_cause(struct hclgevf_dev *hdev, u32 regclr)
@@ -2010,11 +2007,6 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
return HCLGEVF_VECTOR0_EVENT_OTHER;
}
-static void hclgevf_enable_vector(struct hclgevf_misc_vector *vector, bool en)
-{
- writel(en ? 1 : 0, vector->addr);
-}
-
static irqreturn_t hclgevf_misc_irq_handle(int irq, void *data)
{
enum hclgevf_evt_cause event_cause;
@@ -2189,16 +2181,31 @@ static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev)
false);
}
+static void hclgevf_flush_link_update(struct hclgevf_dev *hdev)
+{
+#define HCLGEVF_FLUSH_LINK_TIMEOUT 100000
+
+ unsigned long last = hdev->serv_processed_cnt;
+ int i = 0;
+
+ while (test_bit(HCLGEVF_STATE_LINK_UPDATING, &hdev->state) &&
+ i++ < HCLGEVF_FLUSH_LINK_TIMEOUT &&
+ last == hdev->serv_processed_cnt)
+ usleep_range(1, 1);
+}
+
static void hclgevf_set_timer_task(struct hnae3_handle *handle, bool enable)
{
struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
if (enable) {
- mod_timer(&hdev->service_timer, jiffies + HZ);
+ hclgevf_task_schedule(hdev, 0);
} else {
- del_timer_sync(&hdev->service_timer);
- cancel_work_sync(&hdev->service_task);
- clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
+ set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
+
+ /* flush memory to make sure DOWN is seen by service task */
+ smp_mb__before_atomic();
+ hclgevf_flush_link_update(hdev);
}
}
@@ -2245,16 +2252,12 @@ static int hclgevf_set_alive(struct hnae3_handle *handle, bool alive)
static int hclgevf_client_start(struct hnae3_handle *handle)
{
- struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
int ret;
ret = hclgevf_set_alive(handle, true);
if (ret)
return ret;
- mod_timer(&hdev->keep_alive_timer, jiffies +
- HCLGEVF_KEEP_ALIVE_TASK_INTERVAL * HZ);
-
return 0;
}
@@ -2267,27 +2270,18 @@ static void hclgevf_client_stop(struct hnae3_handle *handle)
if (ret)
dev_warn(&hdev->pdev->dev,
"%s failed %d\n", __func__, ret);
-
- del_timer_sync(&hdev->keep_alive_timer);
- cancel_work_sync(&hdev->keep_alive_task);
}
static void hclgevf_state_init(struct hclgevf_dev *hdev)
{
- /* setup tasks for the MBX */
- INIT_WORK(&hdev->mbx_service_task, hclgevf_mailbox_service_task);
clear_bit(HCLGEVF_STATE_MBX_SERVICE_SCHED, &hdev->state);
clear_bit(HCLGEVF_STATE_MBX_HANDLING, &hdev->state);
+ clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state);
- /* setup tasks for service timer */
- timer_setup(&hdev->service_timer, hclgevf_service_timer, 0);
-
- INIT_WORK(&hdev->service_task, hclgevf_service_task);
- clear_bit(HCLGEVF_STATE_SERVICE_SCHED, &hdev->state);
-
- INIT_WORK(&hdev->rst_service_task, hclgevf_reset_service_task);
+ INIT_DELAYED_WORK(&hdev->service_task, hclgevf_service_task);
mutex_init(&hdev->mbx_resp.mbx_mutex);
+ sema_init(&hdev->reset_sem, 1);
/* bring the device down */
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
@@ -2298,18 +2292,8 @@ static void hclgevf_state_uninit(struct hclgevf_dev *hdev)
set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
set_bit(HCLGEVF_STATE_REMOVING, &hdev->state);
- if (hdev->keep_alive_timer.function)
- del_timer_sync(&hdev->keep_alive_timer);
- if (hdev->keep_alive_task.func)
- cancel_work_sync(&hdev->keep_alive_task);
- if (hdev->service_timer.function)
- del_timer_sync(&hdev->service_timer);
- if (hdev->service_task.func)
- cancel_work_sync(&hdev->service_task);
- if (hdev->mbx_service_task.func)
- cancel_work_sync(&hdev->mbx_service_task);
- if (hdev->rst_service_task.func)
- cancel_work_sync(&hdev->rst_service_task);
+ if (hdev->service_task.work.func)
+ cancel_delayed_work_sync(&hdev->service_task);
mutex_destroy(&hdev->mbx_resp.mbx_mutex);
}
@@ -2383,8 +2367,10 @@ static int hclgevf_misc_irq_init(struct hclgevf_dev *hdev)
hclgevf_get_misc_vector(hdev);
+ snprintf(hdev->misc_vector.name, HNAE3_INT_NAME_LEN, "%s-misc-%s",
+ HCLGEVF_NAME, pci_name(hdev->pdev));
ret = request_irq(hdev->misc_vector.vector_irq, hclgevf_misc_irq_handle,
- 0, "hclgevf_cmd", hdev);
+ 0, hdev->misc_vector.name, hdev);
if (ret) {
dev_err(&hdev->pdev->dev, "VF failed to request misc irq(%d)\n",
hdev->misc_vector.vector_irq);
@@ -2807,6 +2793,8 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
dev_info(&hdev->pdev->dev, "finished initializing %s driver\n",
HCLGEVF_DRIVER_NAME);
+ hclgevf_task_schedule(hdev, round_jiffies_relative(HZ));
+
return 0;
err_config:
@@ -2838,7 +2826,6 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
{
struct pci_dev *pdev = ae_dev->pdev;
- struct hclgevf_dev *hdev;
int ret;
ret = hclgevf_alloc_hdev(ae_dev);
@@ -2853,10 +2840,6 @@ static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
return ret;
}
- hdev = ae_dev->priv;
- timer_setup(&hdev->keep_alive_timer, hclgevf_keep_alive_timer, 0);
- INIT_WORK(&hdev->keep_alive_task, hclgevf_keep_alive_task);
-
return 0;
}
@@ -3213,6 +3196,12 @@ static int hclgevf_init(void)
{
pr_info("%s is initializing\n", HCLGEVF_NAME);
+ hclgevf_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, HCLGEVF_NAME);
+ if (!hclgevf_wq) {
+ pr_err("%s: failed to create workqueue\n", HCLGEVF_NAME);
+ return -ENOMEM;
+ }
+
hnae3_register_ae_algo(&ae_algovf);
return 0;
@@ -3221,6 +3210,7 @@ static int hclgevf_init(void)
static void hclgevf_exit(void)
{
hnae3_unregister_ae_algo(&ae_algovf);
+ destroy_workqueue(hclgevf_wq);
}
module_init(hclgevf_init);
module_exit(hclgevf_exit);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 2f4c81bf4169..fee8d97f323c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -142,12 +142,13 @@ enum hclgevf_states {
HCLGEVF_STATE_REMOVING,
HCLGEVF_STATE_NIC_REGISTERED,
/* task states */
- HCLGEVF_STATE_SERVICE_SCHED,
HCLGEVF_STATE_RST_SERVICE_SCHED,
HCLGEVF_STATE_RST_HANDLING,
HCLGEVF_STATE_MBX_SERVICE_SCHED,
HCLGEVF_STATE_MBX_HANDLING,
HCLGEVF_STATE_CMD_DISABLE,
+ HCLGEVF_STATE_LINK_UPDATING,
+ HCLGEVF_STATE_RST_FAIL,
};
struct hclgevf_mac {
@@ -220,6 +221,7 @@ struct hclgevf_rss_cfg {
struct hclgevf_misc_vector {
u8 __iomem *addr;
int vector_irq;
+ char name[HNAE3_INT_NAME_LEN];
};
struct hclgevf_rst_stats {
@@ -251,6 +253,7 @@ struct hclgevf_dev {
unsigned long reset_state; /* requested, pending */
struct hclgevf_rst_stats rst_stats;
u32 reset_attempts;
+ struct semaphore reset_sem; /* protect reset process */
u32 fw_version;
u16 num_tqps; /* num task queue pairs of this PF */
@@ -283,12 +286,7 @@ struct hclgevf_dev {
struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
- struct timer_list service_timer;
- struct timer_list keep_alive_timer;
- struct work_struct service_task;
- struct work_struct keep_alive_task;
- struct work_struct rst_service_task;
- struct work_struct mbx_service_task;
+ struct delayed_work service_task;
struct hclgevf_tqp *htqp;
@@ -298,7 +296,8 @@ struct hclgevf_dev {
struct hnae3_client *nic_client;
struct hnae3_client *roce_client;
u32 flag;
- u32 stats_timer;
+ unsigned long serv_processed_cnt;
+ unsigned long last_serv_processed;
};
static inline bool hclgevf_is_reset_pending(struct hclgevf_dev *hdev)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index 60ec48fe4144..966aea949c0b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -450,7 +450,7 @@ static u32 hinic_get_rxfh_indir_size(struct net_device *netdev)
#define HINIC_FUNC_STAT(_stat_item) { \
.name = #_stat_item, \
- .size = FIELD_SIZEOF(struct hinic_vport_stats, _stat_item), \
+ .size = sizeof_field(struct hinic_vport_stats, _stat_item), \
.offset = offsetof(struct hinic_vport_stats, _stat_item) \
}
@@ -477,7 +477,7 @@ static struct hinic_stats hinic_function_stats[] = {
#define HINIC_PORT_STAT(_stat_item) { \
.name = #_stat_item, \
- .size = FIELD_SIZEOF(struct hinic_phy_port_stats, _stat_item), \
+ .size = sizeof_field(struct hinic_phy_port_stats, _stat_item), \
.offset = offsetof(struct hinic_phy_port_stats, _stat_item) \
}
@@ -571,7 +571,7 @@ static struct hinic_stats hinic_port_stats[] = {
#define HINIC_TXQ_STAT(_stat_item) { \
.name = "txq%d_"#_stat_item, \
- .size = FIELD_SIZEOF(struct hinic_txq_stats, _stat_item), \
+ .size = sizeof_field(struct hinic_txq_stats, _stat_item), \
.offset = offsetof(struct hinic_txq_stats, _stat_item) \
}
@@ -586,7 +586,7 @@ static struct hinic_stats hinic_tx_queue_stats[] = {
#define HINIC_RXQ_STAT(_stat_item) { \
.name = "rxq%d_"#_stat_item, \
- .size = FIELD_SIZEOF(struct hinic_rxq_stats, _stat_item), \
+ .size = sizeof_field(struct hinic_rxq_stats, _stat_item), \
.offset = offsetof(struct hinic_rxq_stats, _stat_item) \
}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 2411ad270c98..02a14f5e7fe3 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -766,7 +766,7 @@ static void hinic_set_rx_mode(struct net_device *netdev)
queue_work(nic_dev->workq, &rx_mode_work->work);
}
-static void hinic_tx_timeout(struct net_device *netdev)
+static void hinic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct hinic_dev *nic_dev = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c
index 92929750f832..bef676d93339 100644
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c
@@ -363,7 +363,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t i596_interrupt(int irq, void *dev_id);
static int i596_close(struct net_device *dev);
static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd);
-static void i596_tx_timeout (struct net_device *dev);
+static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue);
static void print_eth(unsigned char *buf, char *str);
static void set_multicast_list(struct net_device *dev);
@@ -1019,7 +1019,7 @@ err_irq_dev:
return res;
}
-static void i596_tx_timeout (struct net_device *dev)
+static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue)
{
struct i596_private *lp = dev->ml_priv;
int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c
index bb3b8adbe4f0..a0bfb509e002 100644
--- a/drivers/net/ethernet/i825xx/ether1.c
+++ b/drivers/net/ethernet/i825xx/ether1.c
@@ -66,7 +66,7 @@ static netdev_tx_t ether1_sendpacket(struct sk_buff *skb,
static irqreturn_t ether1_interrupt(int irq, void *dev_id);
static int ether1_close(struct net_device *dev);
static void ether1_setmulticastlist(struct net_device *dev);
-static void ether1_timeout(struct net_device *dev);
+static void ether1_timeout(struct net_device *dev, unsigned int txqueue);
/* ------------------------------------------------------------------------- */
@@ -650,7 +650,7 @@ ether1_open (struct net_device *dev)
}
static void
-ether1_timeout(struct net_device *dev)
+ether1_timeout(struct net_device *dev, unsigned int txqueue)
{
printk(KERN_WARNING "%s: transmit timeout, network cable problem?\n",
dev->name);
diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c
index f9742af7f142..b03757e169e4 100644
--- a/drivers/net/ethernet/i825xx/lib82596.c
+++ b/drivers/net/ethernet/i825xx/lib82596.c
@@ -351,7 +351,7 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev);
static irqreturn_t i596_interrupt(int irq, void *dev_id);
static int i596_close(struct net_device *dev);
static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd);
-static void i596_tx_timeout (struct net_device *dev);
+static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue);
static void print_eth(unsigned char *buf, char *str);
static void set_multicast_list(struct net_device *dev);
static inline void ca(struct net_device *dev);
@@ -936,7 +936,7 @@ out_remove_rx_bufs:
return -EAGAIN;
}
-static void i596_tx_timeout (struct net_device *dev)
+static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue)
{
struct i596_private *lp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
index 1a86184d44c0..4564ee02c95f 100644
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -125,7 +125,7 @@ static netdev_tx_t sun3_82586_send_packet(struct sk_buff *,
struct net_device *);
static struct net_device_stats *sun3_82586_get_stats(struct net_device *dev);
static void set_multicast_list(struct net_device *dev);
-static void sun3_82586_timeout(struct net_device *dev);
+static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue);
#if 0
static void sun3_82586_dump(struct net_device *,void *);
#endif
@@ -965,7 +965,7 @@ static void startrecv586(struct net_device *dev)
WAIT_4_SCB_CMD_RUC(); /* wait for accept cmd. (no timeout!!) */
}
-static void sun3_82586_timeout(struct net_device *dev)
+static void sun3_82586_timeout(struct net_device *dev, unsigned int txqueue)
{
struct priv *p = netdev_priv(dev);
#ifndef NO_NOPCOMMANDS
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 13e30eba5349..0273fb7a9d01 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2786,7 +2786,7 @@ out:
return;
}
-static void ehea_tx_watchdog(struct net_device *dev)
+static void ehea_tx_watchdog(struct net_device *dev, unsigned int txqueue)
{
struct ehea_port *port = netdev_priv(dev);
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 2e40425d8a34..b7fc17756c51 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -776,7 +776,7 @@ static void emac_reset_work(struct work_struct *work)
mutex_unlock(&dev->link_lock);
}
-static void emac_tx_timeout(struct net_device *ndev)
+static void emac_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct emac_instance *dev = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index c90080781924..c75239d8820f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -184,7 +184,7 @@ static int ibmvnic_wait_for_completion(struct ibmvnic_adapter *adapter,
netdev_err(netdev, "Device down!\n");
return -ENODEV;
}
- if (retry--)
+ if (!retry--)
break;
if (wait_for_completion_timeout(comp_done, div_timeout))
return 0;
@@ -2282,7 +2282,7 @@ err:
return -ret;
}
-static void ibmvnic_tx_timeout(struct net_device *dev)
+static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ibmvnic_adapter *adapter = netdev_priv(dev);
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index a65d5a9ba7db..1b8d015ebfb0 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -2316,7 +2316,7 @@ static void e100_down(struct nic *nic)
e100_rx_clean_list(nic);
}
-static void e100_tx_timeout(struct net_device *netdev)
+static void e100_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct nic *nic = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index aca97b084003..2bced34c19ba 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -134,7 +134,7 @@ static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
int cmd);
static void e1000_enter_82542_rst(struct e1000_adapter *adapter);
static void e1000_leave_82542_rst(struct e1000_adapter *adapter);
-static void e1000_tx_timeout(struct net_device *dev);
+static void e1000_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void e1000_reset_task(struct work_struct *work);
static void e1000_smartspeed(struct e1000_adapter *adapter);
static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
@@ -3488,7 +3488,7 @@ exit:
* e1000_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
-static void e1000_tx_timeout(struct net_device *netdev)
+static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index fe7997c18a10..8797913b2702 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -4723,7 +4723,7 @@ int e1000e_close(struct net_device *netdev)
e1000_free_irq(adapter);
/* Link status message must follow this format */
- pr_info("%s NIC Link is Down\n", netdev->name);
+ netdev_info(netdev, "NIC Link is Down\n");
}
napi_disable(&adapter->napi);
@@ -5073,12 +5073,13 @@ static void e1000_print_link_info(struct e1000_adapter *adapter)
u32 ctrl = er32(CTRL);
/* Link status message must follow this format for user tools */
- pr_info("%s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
- adapter->netdev->name, adapter->link_speed,
- adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half",
- (ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE) ? "Rx/Tx" :
- (ctrl & E1000_CTRL_RFCE) ? "Rx" :
- (ctrl & E1000_CTRL_TFCE) ? "Tx" : "None");
+ netdev_info(adapter->netdev,
+ "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
+ adapter->link_speed,
+ adapter->link_duplex == FULL_DUPLEX ? "Full" : "Half",
+ (ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE) ? "Rx/Tx" :
+ (ctrl & E1000_CTRL_RFCE) ? "Rx" :
+ (ctrl & E1000_CTRL_TFCE) ? "Tx" : "None");
}
static bool e1000e_has_link(struct e1000_adapter *adapter)
@@ -5307,7 +5308,7 @@ static void e1000_watchdog_task(struct work_struct *work)
adapter->link_speed = 0;
adapter->link_duplex = 0;
/* Link status message must follow this format */
- pr_info("%s NIC Link is Down\n", adapter->netdev->name);
+ netdev_info(netdev, "NIC Link is Down\n");
netif_carrier_off(netdev);
netif_stop_queue(netdev);
if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -5929,7 +5930,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
* e1000_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
-static void e1000_tx_timeout(struct net_device *netdev)
+static void e1000_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index c681d2d28107..68edf55ac906 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -18,7 +18,7 @@ struct fm10k_stats {
#define FM10K_STAT_FIELDS(_type, _name, _stat) { \
.stat_string = _name, \
- .sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+ .sizeof_stat = sizeof_field(_type, _stat), \
.stat_offset = offsetof(_type, _stat) \
}
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 68baee04dc58..ba2566e2123d 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -697,7 +697,7 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
* fm10k_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
-static void fm10k_tx_timeout(struct net_device *netdev)
+static void fm10k_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct fm10k_intfc *interface = netdev_priv(netdev);
bool real_tx_hang = false;
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index cb6367334ca7..4833187bd259 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -1152,7 +1152,7 @@ void i40e_set_fec_in_flags(u8 fec_cfg, u32 *flags);
static inline bool i40e_enabled_xdp_vsi(struct i40e_vsi *vsi)
{
- return !!vsi->xdp_prog;
+ return !!READ_ONCE(vsi->xdp_prog);
}
int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index d24d8731bef0..317f3f1458db 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -43,7 +43,7 @@ struct i40e_stats {
*/
#define I40E_STAT(_type, _name, _stat) { \
.stat_string = _name, \
- .sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+ .sizeof_stat = sizeof_field(_type, _stat), \
.stat_offset = offsetof(_type, _stat) \
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
index be24d42280d8..a3da422ab05b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
@@ -659,7 +659,7 @@ i40e_status i40e_shutdown_lan_hmc(struct i40e_hw *hw)
#define I40E_HMC_STORE(_struct, _ele) \
offsetof(struct _struct, _ele), \
- FIELD_SIZEOF(struct _struct, _ele)
+ sizeof_field(struct _struct, _ele)
struct i40e_context_ele {
u16 offset;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 1ccabeafa44c..33912cf964eb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -301,7 +301,7 @@ void i40e_service_event_schedule(struct i40e_pf *pf)
* device is munged, not just the one netdev port, so go for the full
* reset.
**/
-static void i40e_tx_timeout(struct net_device *netdev)
+static void i40e_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct i40e_netdev_priv *np = netdev_priv(netdev);
struct i40e_vsi *vsi = np->vsi;
@@ -6823,8 +6823,8 @@ void i40e_down(struct i40e_vsi *vsi)
for (i = 0; i < vsi->num_queue_pairs; i++) {
i40e_clean_tx_ring(vsi->tx_rings[i]);
if (i40e_enabled_xdp_vsi(vsi)) {
- /* Make sure that in-progress ndo_xdp_xmit
- * calls are completed.
+ /* Make sure that in-progress ndo_xdp_xmit and
+ * ndo_xsk_wakeup calls are completed.
*/
synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[i]);
@@ -12546,8 +12546,12 @@ static int i40e_xdp_setup(struct i40e_vsi *vsi,
old_prog = xchg(&vsi->xdp_prog, prog);
- if (need_reset)
+ if (need_reset) {
+ if (!prog)
+ /* Wait until ndo_xsk_wakeup completes. */
+ synchronize_rcu();
i40e_reset_and_rebuild(pf, true, true);
+ }
for (i = 0; i < vsi->num_queue_pairs; i++)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index d07e1a890428..42058fad6a3c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -269,7 +269,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
- xsk_umem_discard_addr(umem);
+ xsk_umem_release_addr(umem);
return true;
}
@@ -306,7 +306,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
- xsk_umem_discard_addr_rq(umem);
+ xsk_umem_release_addr_rq(umem);
return true;
}
@@ -787,8 +787,12 @@ int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
{
struct i40e_netdev_priv *np = netdev_priv(dev);
struct i40e_vsi *vsi = np->vsi;
+ struct i40e_pf *pf = vsi->back;
struct i40e_ring *ring;
+ if (test_bit(__I40E_CONFIG_BUSY, pf->state))
+ return -ENETDOWN;
+
if (test_bit(__I40E_VSI_DOWN, vsi->state))
return -ENETDOWN;
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index dad3eec8ccd8..84c3d8d97ef6 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -42,7 +42,7 @@ struct iavf_stats {
*/
#define IAVF_STAT(_type, _name, _stat) { \
.stat_string = _name, \
- .sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+ .sizeof_stat = sizeof_field(_type, _stat), \
.stat_offset = offsetof(_type, _stat) \
}
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index 821987da5698..0a8824871618 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -159,7 +159,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter)
* iavf_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
-static void iavf_tx_timeout(struct net_device *netdev)
+static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index f972dce8aebb..cb10abb14e11 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -174,6 +174,8 @@ struct ice_sw {
struct ice_pf *pf;
u16 sw_id; /* switch ID for this switch */
u16 bridge_mode; /* VEB/VEPA/Port Virtualizer */
+ struct ice_vsi *dflt_vsi; /* default VSI for this switch */
+ u8 dflt_vsi_ena:1; /* true if above dflt_vsi is enabled */
};
enum ice_state {
@@ -275,6 +277,7 @@ struct ice_vsi {
u8 current_isup:1; /* Sync 'link up' logging */
u8 stat_offsets_loaded:1;
u8 vlan_ena:1;
+ u16 num_vlan;
/* queue information */
u8 tx_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */
@@ -462,12 +465,13 @@ static inline void ice_set_ring_xdp(struct ice_ring *ring)
static inline struct xdp_umem *ice_xsk_umem(struct ice_ring *ring)
{
struct xdp_umem **umems = ring->vsi->xsk_umems;
- int qid = ring->q_index;
+ u16 qid = ring->q_index;
if (ice_ring_is_xdp(ring))
qid -= ring->vsi->num_xdp_txq;
- if (!umems || !umems[qid] || !ice_is_xdp_ena_vsi(ring->vsi))
+ if (qid >= ring->vsi->num_xsk_umems || !umems || !umems[qid] ||
+ !ice_is_xdp_ena_vsi(ring->vsi))
return NULL;
return umems[qid];
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 77d6a0291e97..d8e975cceb21 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -93,7 +93,8 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
* @vsi: the VSI being configured
* @v_idx: index of the vector in the VSI struct
*
- * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ * We allocate one q_vector and set default value for ITR setting associated
+ * with this q_vector. If allocation fails we return -ENOMEM.
*/
static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
{
@@ -108,6 +109,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
q_vector->vsi = vsi;
q_vector->v_idx = v_idx;
+ q_vector->tx.itr_setting = ICE_DFLT_TX_ITR;
+ q_vector->rx.itr_setting = ICE_DFLT_RX_ITR;
if (vsi->type == ICE_VSI_VF)
goto out;
/* only set affinity_mask if the CPU is online */
@@ -299,6 +302,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
if (ring->vsi->type == ICE_VSI_PF) {
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ /* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index);
@@ -323,7 +327,9 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
dev_info(&vsi->back->pdev->dev, "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
ring->q_index);
} else {
+ ring->zca.free = NULL;
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
+ /* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq,
ring->netdev,
ring->q_index);
@@ -674,10 +680,6 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
if (q_vector->num_ring_rx) {
struct ice_ring_container *rc = &q_vector->rx;
- /* if this value is set then don't overwrite with default */
- if (!rc->itr_setting)
- rc->itr_setting = ICE_DFLT_RX_ITR;
-
rc->target_itr = ITR_TO_REG(rc->itr_setting);
rc->next_update = jiffies + 1;
rc->current_itr = rc->target_itr;
@@ -688,10 +690,6 @@ void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector)
if (q_vector->num_ring_tx) {
struct ice_ring_container *rc = &q_vector->tx;
- /* if this value is set then don't overwrite with default */
- if (!rc->itr_setting)
- rc->itr_setting = ICE_DFLT_TX_ITR;
-
rc->target_itr = ITR_TO_REG(rc->itr_setting);
rc->next_update = jiffies + 1;
rc->current_itr = rc->target_itr;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index fb1d930470c7..a03b4fdc01e6 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -7,25 +7,6 @@
#define ICE_PF_RESET_WAIT_COUNT 200
-#define ICE_PROG_FLEX_ENTRY(hw, rxdid, mdid, idx) \
- wr32((hw), GLFLXP_RXDID_FLX_WRD_##idx(rxdid), \
- ((ICE_RX_OPC_MDID << \
- GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_S) & \
- GLFLXP_RXDID_FLX_WRD_##idx##_RXDID_OPCODE_M) | \
- (((mdid) << GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_S) & \
- GLFLXP_RXDID_FLX_WRD_##idx##_PROT_MDID_M))
-
-#define ICE_PROG_FLG_ENTRY(hw, rxdid, flg_0, flg_1, flg_2, flg_3, idx) \
- wr32((hw), GLFLXP_RXDID_FLAGS(rxdid, idx), \
- (((flg_0) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S) & \
- GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M) | \
- (((flg_1) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S) & \
- GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M) | \
- (((flg_2) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S) & \
- GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M) | \
- (((flg_3) << GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S) & \
- GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M))
-
/**
* ice_set_mac_type - Sets MAC type
* @hw: pointer to the HW structure
@@ -348,88 +329,6 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
}
/**
- * ice_init_flex_flags
- * @hw: pointer to the hardware structure
- * @prof_id: Rx Descriptor Builder profile ID
- *
- * Function to initialize Rx flex flags
- */
-static void ice_init_flex_flags(struct ice_hw *hw, enum ice_rxdid prof_id)
-{
- u8 idx = 0;
-
- /* Flex-flag fields (0-2) are programmed with FLG64 bits with layout:
- * flexiflags0[5:0] - TCP flags, is_packet_fragmented, is_packet_UDP_GRE
- * flexiflags1[3:0] - Not used for flag programming
- * flexiflags2[7:0] - Tunnel and VLAN types
- * 2 invalid fields in last index
- */
- switch (prof_id) {
- /* Rx flex flags are currently programmed for the NIC profiles only.
- * Different flag bit programming configurations can be added per
- * profile as needed.
- */
- case ICE_RXDID_FLEX_NIC:
- case ICE_RXDID_FLEX_NIC_2:
- ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_FRG,
- ICE_FLG_UDP_GRE, ICE_FLG_PKT_DSI,
- ICE_FLG_FIN, idx++);
- /* flex flag 1 is not used for flexi-flag programming, skipping
- * these four FLG64 bits.
- */
- ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_SYN, ICE_FLG_RST,
- ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx++);
- ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_PKT_DSI,
- ICE_FLG_PKT_DSI, ICE_FLG_EVLAN_x8100,
- ICE_FLG_EVLAN_x9100, idx++);
- ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_VLAN_x8100,
- ICE_FLG_TNL_VLAN, ICE_FLG_TNL_MAC,
- ICE_FLG_TNL0, idx++);
- ICE_PROG_FLG_ENTRY(hw, prof_id, ICE_FLG_TNL1, ICE_FLG_TNL2,
- ICE_FLG_PKT_DSI, ICE_FLG_PKT_DSI, idx);
- break;
-
- default:
- ice_debug(hw, ICE_DBG_INIT,
- "Flag programming for profile ID %d not supported\n",
- prof_id);
- }
-}
-
-/**
- * ice_init_flex_flds
- * @hw: pointer to the hardware structure
- * @prof_id: Rx Descriptor Builder profile ID
- *
- * Function to initialize flex descriptors
- */
-static void ice_init_flex_flds(struct ice_hw *hw, enum ice_rxdid prof_id)
-{
- enum ice_flex_rx_mdid mdid;
-
- switch (prof_id) {
- case ICE_RXDID_FLEX_NIC:
- case ICE_RXDID_FLEX_NIC_2:
- ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_LOW, 0);
- ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_HASH_HIGH, 1);
- ICE_PROG_FLEX_ENTRY(hw, prof_id, ICE_RX_MDID_FLOW_ID_LOWER, 2);
-
- mdid = (prof_id == ICE_RXDID_FLEX_NIC_2) ?
- ICE_RX_MDID_SRC_VSI : ICE_RX_MDID_FLOW_ID_HIGH;
-
- ICE_PROG_FLEX_ENTRY(hw, prof_id, mdid, 3);
-
- ice_init_flex_flags(hw, prof_id);
- break;
-
- default:
- ice_debug(hw, ICE_DBG_INIT,
- "Field init for profile ID %d not supported\n",
- prof_id);
- }
-}
-
-/**
* ice_init_fltr_mgmt_struct - initializes filter management list and locks
* @hw: pointer to the HW struct
*/
@@ -882,9 +781,6 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
if (status)
goto err_unroll_fltr_mgmt_struct;
-
- ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC);
- ice_init_flex_flds(hw, ICE_RXDID_FLEX_NIC_2);
status = ice_init_hw_tbls(hw);
if (status)
goto err_unroll_fltr_mgmt_struct;
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index d3d3ec29def9..0664e5b8d130 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -396,6 +396,12 @@ dcb_error:
prev_cfg->etscfg.tcbwtable[0] = ICE_TC_MAX_BW;
prev_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS;
memcpy(&prev_cfg->etsrec, &prev_cfg->etscfg, sizeof(prev_cfg->etsrec));
+ /* Coverity warns the return code of ice_pf_dcb_cfg() is not checked
+ * here as is done for other calls to that function. That check is
+ * not necessary since this is in this function's error cleanup path.
+ * Suppress the Coverity warning with the following comment...
+ */
+ /* coverity[check_return] */
ice_pf_dcb_cfg(pf, prev_cfg, false);
kfree(prev_cfg);
}
diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h
index f8d5c661d0ba..ce63017c56c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_devids.h
+++ b/drivers/net/ethernet/intel/ice/ice_devids.h
@@ -11,5 +11,23 @@
#define ICE_DEV_ID_E810C_QSFP 0x1592
/* Intel(R) Ethernet Controller E810-C for SFP */
#define ICE_DEV_ID_E810C_SFP 0x1593
+/* Intel(R) Ethernet Connection E822-C for backplane */
+#define ICE_DEV_ID_E822C_BACKPLANE 0x1890
+/* Intel(R) Ethernet Connection E822-C for QSFP */
+#define ICE_DEV_ID_E822C_QSFP 0x1891
+/* Intel(R) Ethernet Connection E822-C for SFP */
+#define ICE_DEV_ID_E822C_SFP 0x1892
+/* Intel(R) Ethernet Connection E822-C/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822C_10G_BASE_T 0x1893
+/* Intel(R) Ethernet Connection E822-C 1GbE */
+#define ICE_DEV_ID_E822C_SGMII 0x1894
+/* Intel(R) Ethernet Connection E822-X for backplane */
+#define ICE_DEV_ID_E822X_BACKPLANE 0x1897
+/* Intel(R) Ethernet Connection E822-L for SFP */
+#define ICE_DEV_ID_E822L_SFP 0x1898
+/* Intel(R) Ethernet Connection E822-L/X557-AT 10GBASE-T */
+#define ICE_DEV_ID_E822L_10G_BASE_T 0x1899
+/* Intel(R) Ethernet Connection E822-L 1GbE */
+#define ICE_DEV_ID_E822L_SGMII 0x189A
#endif /* _ICE_DEVIDS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index aec3c6c379df..f395457b728f 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -15,7 +15,7 @@ struct ice_stats {
#define ICE_STAT(_type, _name, _stat) { \
.stat_string = _name, \
- .sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+ .sizeof_stat = sizeof_field(_type, _stat), \
.stat_offset = offsetof(_type, _stat) \
}
@@ -36,10 +36,10 @@ static int ice_q_stats_len(struct net_device *netdev)
#define ICE_VSI_STATS_LEN ARRAY_SIZE(ice_gstrings_vsi_stats)
#define ICE_PFC_STATS_LEN ( \
- (FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_rx) + \
- FIELD_SIZEOF(struct ice_pf, stats.priority_xon_rx) + \
- FIELD_SIZEOF(struct ice_pf, stats.priority_xoff_tx) + \
- FIELD_SIZEOF(struct ice_pf, stats.priority_xon_tx)) \
+ (sizeof_field(struct ice_pf, stats.priority_xoff_rx) + \
+ sizeof_field(struct ice_pf, stats.priority_xon_rx) + \
+ sizeof_field(struct ice_pf, stats.priority_xoff_tx) + \
+ sizeof_field(struct ice_pf, stats.priority_xon_tx)) \
/ sizeof(u64))
#define ICE_ALL_STATS_LEN(n) (ICE_PF_STATS_LEN + ICE_PFC_STATS_LEN + \
ICE_VSI_STATS_LEN + ice_q_stats_len(n))
@@ -283,12 +283,15 @@ out:
*/
static bool ice_active_vfs(struct ice_pf *pf)
{
- struct ice_vf *vf = pf->vf;
int i;
- for (i = 0; i < pf->num_alloc_vfs; i++, vf++)
+ ice_for_each_vf(pf, i) {
+ struct ice_vf *vf = &pf->vf[i];
+
if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
return true;
+ }
+
return false;
}
@@ -3585,6 +3588,53 @@ ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num)
}
/**
+ * ice_is_coalesce_param_invalid - check for unsupported coalesce parameters
+ * @netdev: pointer to the netdev associated with this query
+ * @ec: ethtool structure to fill with driver's coalesce settings
+ *
+ * Print netdev info if driver doesn't support one of the parameters
+ * and return error. When any parameters will be implemented, remove only
+ * this parameter from param array.
+ */
+static int
+ice_is_coalesce_param_invalid(struct net_device *netdev,
+ struct ethtool_coalesce *ec)
+{
+ struct ice_ethtool_not_used {
+ u32 value;
+ const char *name;
+ } param[] = {
+ {ec->stats_block_coalesce_usecs, "stats-block-usecs"},
+ {ec->rate_sample_interval, "sample-interval"},
+ {ec->pkt_rate_low, "pkt-rate-low"},
+ {ec->pkt_rate_high, "pkt-rate-high"},
+ {ec->rx_max_coalesced_frames, "rx-frames"},
+ {ec->rx_coalesce_usecs_irq, "rx-usecs-irq"},
+ {ec->rx_max_coalesced_frames_irq, "rx-frames-irq"},
+ {ec->tx_max_coalesced_frames, "tx-frames"},
+ {ec->tx_coalesce_usecs_irq, "tx-usecs-irq"},
+ {ec->tx_max_coalesced_frames_irq, "tx-frames-irq"},
+ {ec->rx_coalesce_usecs_low, "rx-usecs-low"},
+ {ec->rx_max_coalesced_frames_low, "rx-frames-low"},
+ {ec->tx_coalesce_usecs_low, "tx-usecs-low"},
+ {ec->tx_max_coalesced_frames_low, "tx-frames-low"},
+ {ec->rx_max_coalesced_frames_high, "rx-frames-high"},
+ {ec->tx_max_coalesced_frames_high, "tx-frames-high"}
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(param); i++) {
+ if (param[i].value) {
+ netdev_info(netdev, "Setting %s not supported\n",
+ param[i].name);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/**
* __ice_set_coalesce - set ITR/INTRL values for the device
* @netdev: pointer to the netdev associated with this query
* @ec: ethtool structure to fill with driver's coalesce settings
@@ -3600,6 +3650,9 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec,
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_vsi *vsi = np->vsi;
+ if (ice_is_coalesce_param_invalid(netdev, ec))
+ return -EINVAL;
+
if (q_num < 0) {
int v_idx;
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index e8f32350fed2..f2cababf2561 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -60,15 +60,6 @@
#define PRTDCB_GENS_DCBX_STATUS_M ICE_M(0x7, 0)
#define GL_PREEXT_L2_PMASK0(_i) (0x0020F0FC + ((_i) * 4))
#define GL_PREEXT_L2_PMASK1(_i) (0x0020F108 + ((_i) * 4))
-#define GLFLXP_RXDID_FLAGS(_i, _j) (0x0045D000 + ((_i) * 4 + (_j) * 256))
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_S 0
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_M ICE_M(0x3F, 0)
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_S 8
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_1_M ICE_M(0x3F, 8)
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_S 16
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_2_M ICE_M(0x3F, 16)
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_S 24
-#define GLFLXP_RXDID_FLAGS_FLEXIFLAG_4N_3_M ICE_M(0x3F, 24)
#define GLFLXP_RXDID_FLX_WRD_0(_i) (0x0045c800 + ((_i) * 4))
#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_S 0
#define GLFLXP_RXDID_FLX_WRD_0_PROT_MDID_M ICE_M(0xFF, 0)
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index ad34f22d44ef..0997d352709b 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -302,7 +302,7 @@ struct ice_ctx_ele {
#define ICE_CTX_STORE(_struct, _ele, _width, _lsb) { \
.offset = offsetof(struct _struct, _ele), \
- .size_of = FIELD_SIZEOF(struct _struct, _ele), \
+ .size_of = sizeof_field(struct _struct, _ele), \
.width = _width, \
.lsb = _lsb, \
}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index e7449248fab4..4cfad81ba496 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -817,12 +817,23 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
ctxt->info.valid_sections |=
cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
- /* Enable MAC Antispoof with new VSI being initialized or updated */
- if (vsi->type == ICE_VSI_VF && pf->vf[vsi->vf_id].spoofchk) {
+ /* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off
+ * respectively
+ */
+ if (vsi->type == ICE_VSI_VF) {
ctxt->info.valid_sections |=
cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
- ctxt->info.sec_flags |=
- ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
+ if (pf->vf[vsi->vf_id].spoofchk) {
+ ctxt->info.sec_flags |=
+ ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+ (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+ } else {
+ ctxt->info.sec_flags &=
+ ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+ (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
+ }
}
/* Allow control frames out of main VSI */
@@ -1636,22 +1647,14 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
ctxt->info = vsi->info;
- if (ena) {
- ctxt->info.sec_flags |=
- ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
- ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S;
+ if (ena)
ctxt->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
- } else {
- ctxt->info.sec_flags &=
- ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
- ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+ else
ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA;
- }
if (!vlan_promisc)
ctxt->info.valid_sections =
- cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID |
- ICE_AQ_VSI_PROP_SW_VALID);
+ cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID);
status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
if (status) {
@@ -1661,7 +1664,6 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
goto err_out;
}
- vsi->info.sec_flags = ctxt->info.sec_flags;
vsi->info.sw_flags2 = ctxt->info.sw_flags2;
kfree(ctxt);
@@ -2402,6 +2404,97 @@ int ice_vsi_release(struct ice_vsi *vsi)
}
/**
+ * ice_vsi_rebuild_update_coalesce - set coalesce for a q_vector
+ * @q_vector: pointer to q_vector which is being updated
+ * @coalesce: pointer to array of struct with stored coalesce
+ *
+ * Set coalesce param in q_vector and update these parameters in HW.
+ */
+static void
+ice_vsi_rebuild_update_coalesce(struct ice_q_vector *q_vector,
+ struct ice_coalesce_stored *coalesce)
+{
+ struct ice_ring_container *rx_rc = &q_vector->rx;
+ struct ice_ring_container *tx_rc = &q_vector->tx;
+ struct ice_hw *hw = &q_vector->vsi->back->hw;
+
+ tx_rc->itr_setting = coalesce->itr_tx;
+ rx_rc->itr_setting = coalesce->itr_rx;
+
+ /* dynamic ITR values will be updated during Tx/Rx */
+ if (!ITR_IS_DYNAMIC(tx_rc->itr_setting))
+ wr32(hw, GLINT_ITR(tx_rc->itr_idx, q_vector->reg_idx),
+ ITR_REG_ALIGN(tx_rc->itr_setting) >>
+ ICE_ITR_GRAN_S);
+ if (!ITR_IS_DYNAMIC(rx_rc->itr_setting))
+ wr32(hw, GLINT_ITR(rx_rc->itr_idx, q_vector->reg_idx),
+ ITR_REG_ALIGN(rx_rc->itr_setting) >>
+ ICE_ITR_GRAN_S);
+
+ q_vector->intrl = coalesce->intrl;
+ wr32(hw, GLINT_RATE(q_vector->reg_idx),
+ ice_intrl_usec_to_reg(q_vector->intrl, hw->intrl_gran));
+}
+
+/**
+ * ice_vsi_rebuild_get_coalesce - get coalesce from all q_vectors
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: array of struct with stored coalesce
+ *
+ * Returns array size.
+ */
+static int
+ice_vsi_rebuild_get_coalesce(struct ice_vsi *vsi,
+ struct ice_coalesce_stored *coalesce)
+{
+ int i;
+
+ ice_for_each_q_vector(vsi, i) {
+ struct ice_q_vector *q_vector = vsi->q_vectors[i];
+
+ coalesce[i].itr_tx = q_vector->tx.itr_setting;
+ coalesce[i].itr_rx = q_vector->rx.itr_setting;
+ coalesce[i].intrl = q_vector->intrl;
+ }
+
+ return vsi->num_q_vectors;
+}
+
+/**
+ * ice_vsi_rebuild_set_coalesce - set coalesce from earlier saved arrays
+ * @vsi: VSI connected with q_vectors
+ * @coalesce: pointer to array of struct with stored coalesce
+ * @size: size of coalesce array
+ *
+ * Before this function, ice_vsi_rebuild_get_coalesce should be called to save
+ * ITR params in arrays. If size is 0 or coalesce wasn't stored set coalesce
+ * to default value.
+ */
+static void
+ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
+ struct ice_coalesce_stored *coalesce, int size)
+{
+ int i;
+
+ if ((size && !coalesce) || !vsi)
+ return;
+
+ for (i = 0; i < size && i < vsi->num_q_vectors; i++)
+ ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
+ &coalesce[i]);
+
+ for (; i < vsi->num_q_vectors; i++) {
+ struct ice_coalesce_stored coalesce_dflt = {
+ .itr_tx = ICE_DFLT_TX_ITR,
+ .itr_rx = ICE_DFLT_RX_ITR,
+ .intrl = 0
+ };
+ ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
+ &coalesce_dflt);
+ }
+}
+
+/**
* ice_vsi_rebuild - Rebuild VSI after reset
* @vsi: VSI to be rebuild
* @init_vsi: is this an initialization or a reconfigure of the VSI
@@ -2411,6 +2504,8 @@ int ice_vsi_release(struct ice_vsi *vsi)
int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
{
u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 };
+ struct ice_coalesce_stored *coalesce;
+ int prev_num_q_vectors = 0;
struct ice_vf *vf = NULL;
enum ice_status status;
struct ice_pf *pf;
@@ -2423,6 +2518,11 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
if (vsi->type == ICE_VSI_VF)
vf = &pf->vf[vsi->vf_id];
+ coalesce = kcalloc(vsi->num_q_vectors,
+ sizeof(struct ice_coalesce_stored), GFP_KERNEL);
+ if (coalesce)
+ prev_num_q_vectors = ice_vsi_rebuild_get_coalesce(vsi,
+ coalesce);
ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
ice_vsi_free_q_vectors(vsi);
@@ -2535,6 +2635,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
return ice_schedule_reset(pf, ICE_RESET_PFR);
}
}
+ ice_vsi_rebuild_set_coalesce(vsi, coalesce, prev_num_q_vectors);
+ kfree(coalesce);
+
return 0;
err_vectors:
@@ -2549,6 +2652,7 @@ err_rings:
err_vsi:
ice_vsi_clear(vsi);
set_bit(__ICE_RESET_FAILED, pf->state);
+ kfree(coalesce);
return ret;
}
@@ -2740,3 +2844,121 @@ cfg_mac_fltr_exit:
ice_free_fltr_list(&vsi->back->pdev->dev, &tmp_add_list);
return status;
}
+
+/**
+ * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used
+ * @sw: switch to check if its default forwarding VSI is free
+ *
+ * Return true if the default forwarding VSI is already being used, else returns
+ * false signalling that it's available to use.
+ */
+bool ice_is_dflt_vsi_in_use(struct ice_sw *sw)
+{
+ return (sw->dflt_vsi && sw->dflt_vsi_ena);
+}
+
+/**
+ * ice_is_vsi_dflt_vsi - check if the VSI passed in is the default VSI
+ * @sw: switch for the default forwarding VSI to compare against
+ * @vsi: VSI to compare against default forwarding VSI
+ *
+ * If this VSI passed in is the default forwarding VSI then return true, else
+ * return false
+ */
+bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
+{
+ return (sw->dflt_vsi == vsi && sw->dflt_vsi_ena);
+}
+
+/**
+ * ice_set_dflt_vsi - set the default forwarding VSI
+ * @sw: switch used to assign the default forwarding VSI
+ * @vsi: VSI getting set as the default forwarding VSI on the switch
+ *
+ * If the VSI passed in is already the default VSI and it's enabled just return
+ * success.
+ *
+ * If there is already a default VSI on the switch and it's enabled then return
+ * -EEXIST since there can only be one default VSI per switch.
+ *
+ * Otherwise try to set the VSI passed in as the switch's default VSI and
+ * return the result.
+ */
+int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
+{
+ enum ice_status status;
+ struct device *dev;
+
+ if (!sw || !vsi)
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(vsi->back);
+
+ /* the VSI passed in is already the default VSI */
+ if (ice_is_vsi_dflt_vsi(sw, vsi)) {
+ dev_dbg(dev, "VSI %d passed in is already the default forwarding VSI, nothing to do\n",
+ vsi->vsi_num);
+ return 0;
+ }
+
+ /* another VSI is already the default VSI for this switch */
+ if (ice_is_dflt_vsi_in_use(sw)) {
+ dev_err(dev,
+ "Default forwarding VSI %d already in use, disable it and try again\n",
+ sw->dflt_vsi->vsi_num);
+ return -EEXIST;
+ }
+
+ status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX);
+ if (status) {
+ dev_err(dev,
+ "Failed to set VSI %d as the default forwarding VSI, error %d\n",
+ vsi->vsi_num, status);
+ return -EIO;
+ }
+
+ sw->dflt_vsi = vsi;
+ sw->dflt_vsi_ena = true;
+
+ return 0;
+}
+
+/**
+ * ice_clear_dflt_vsi - clear the default forwarding VSI
+ * @sw: switch used to clear the default VSI
+ *
+ * If the switch has no default VSI or it's not enabled then return error.
+ *
+ * Otherwise try to clear the default VSI and return the result.
+ */
+int ice_clear_dflt_vsi(struct ice_sw *sw)
+{
+ struct ice_vsi *dflt_vsi;
+ enum ice_status status;
+ struct device *dev;
+
+ if (!sw)
+ return -EINVAL;
+
+ dev = ice_pf_to_dev(sw->pf);
+
+ dflt_vsi = sw->dflt_vsi;
+
+ /* there is no default VSI configured */
+ if (!ice_is_dflt_vsi_in_use(sw))
+ return -ENODEV;
+
+ status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false,
+ ICE_FLTR_RX);
+ if (status) {
+ dev_err(dev,
+ "Failed to clear the default forwarding VSI %d, error %d\n",
+ dflt_vsi->vsi_num, status);
+ return -EIO;
+ }
+
+ sw->dflt_vsi = NULL;
+ sw->dflt_vsi_ena = false;
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 6e31e30aba39..68fd0d4505c2 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -103,4 +103,12 @@ enum ice_status
ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set);
bool ice_is_safe_mode(struct ice_pf *pf);
+
+bool ice_is_dflt_vsi_in_use(struct ice_sw *sw);
+
+bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
+
+int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi);
+
+int ice_clear_dflt_vsi(struct ice_sw *sw);
#endif /* !_ICE_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 69bff085acf7..bf539483e25e 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -379,25 +379,29 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
clear_bit(ICE_VSI_FLAG_PROMISC_CHANGED, vsi->flags);
if (vsi->current_netdev_flags & IFF_PROMISC) {
/* Apply Rx filter rule to get traffic from wire */
- status = ice_cfg_dflt_vsi(hw, vsi->idx, true,
- ICE_FLTR_RX);
- if (status) {
- netdev_err(netdev, "Error setting default VSI %i Rx rule\n",
- vsi->vsi_num);
- vsi->current_netdev_flags &= ~IFF_PROMISC;
- err = -EIO;
- goto out_promisc;
+ if (!ice_is_dflt_vsi_in_use(pf->first_sw)) {
+ err = ice_set_dflt_vsi(pf->first_sw, vsi);
+ if (err && err != -EEXIST) {
+ netdev_err(netdev,
+ "Error %d setting default VSI %i Rx rule\n",
+ err, vsi->vsi_num);
+ vsi->current_netdev_flags &=
+ ~IFF_PROMISC;
+ goto out_promisc;
+ }
}
} else {
/* Clear Rx filter to remove traffic from wire */
- status = ice_cfg_dflt_vsi(hw, vsi->idx, false,
- ICE_FLTR_RX);
- if (status) {
- netdev_err(netdev, "Error clearing default VSI %i Rx rule\n",
- vsi->vsi_num);
- vsi->current_netdev_flags |= IFF_PROMISC;
- err = -EIO;
- goto out_promisc;
+ if (ice_is_vsi_dflt_vsi(pf->first_sw, vsi)) {
+ err = ice_clear_dflt_vsi(pf->first_sw);
+ if (err) {
+ netdev_err(netdev,
+ "Error %d clearing default VSI %i Rx rule\n",
+ err, vsi->vsi_num);
+ vsi->current_netdev_flags |=
+ IFF_PROMISC;
+ goto out_promisc;
+ }
}
}
}
@@ -472,7 +476,7 @@ ice_prepare_for_reset(struct ice_pf *pf)
ice_vc_notify_reset(pf);
/* Disable VFs until reset is completed */
- for (i = 0; i < pf->num_alloc_vfs; i++)
+ ice_for_each_vf(pf, i)
ice_set_vf_state_qs_dis(&pf->vf[i]);
/* clear SW filtering DB */
@@ -840,8 +844,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up,
ice_vsi_link_event(vsi, link_up);
ice_print_link_msg(vsi, link_up);
- if (pf->num_alloc_vfs)
- ice_vc_notify_link_state(pf);
+ ice_vc_notify_link_state(pf);
return result;
}
@@ -1291,7 +1294,7 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
}
/* check to see if one of the VFs caused the MDD */
- for (i = 0; i < pf->num_alloc_vfs; i++) {
+ ice_for_each_vf(pf, i) {
struct ice_vf *vf = &pf->vf[i];
bool vf_mdd_detected = false;
@@ -2330,7 +2333,8 @@ static void ice_set_netdev_features(struct net_device *netdev)
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_CTAG_RX;
- tso_features = NETIF_F_TSO;
+ tso_features = NETIF_F_TSO |
+ NETIF_F_GSO_UDP_L4;
/* set features that user can change */
netdev->hw_features = dflt_features | csumo_features |
@@ -3568,6 +3572,15 @@ static const struct pci_device_id ice_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_10G_BASE_T), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822C_SGMII), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822X_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_10G_BASE_T), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822L_SGMII), 0 },
/* required last entry */
{ 0, }
};
@@ -4670,6 +4683,13 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
goto err_init_ctrlq;
}
+ if (pf->first_sw->dflt_vsi_ena)
+ dev_info(dev,
+ "Clearing default VSI, re-enable after reset completes\n");
+ /* clear the default VSI configuration if it exists */
+ pf->first_sw->dflt_vsi = NULL;
+ pf->first_sw->dflt_vsi_ena = false;
+
ice_clear_pxe_mode(hw);
ret = ice_get_caps(hw);
@@ -4825,7 +4845,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
}
}
- netdev_info(netdev, "changed MTU to %d\n", new_mtu);
+ netdev_dbg(netdev, "changed MTU to %d\n", new_mtu);
return 0;
}
@@ -5060,7 +5080,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
* ice_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
*/
-static void ice_tx_timeout(struct net_device *netdev)
+static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_ring *tx_ring = NULL;
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 57c73f613f32..7525ac50742e 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -289,6 +289,18 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
nvm->eetrack = (eetrack_hi << 16) | eetrack_lo;
+ /* the following devices do not have boot_cfg_tlv yet */
+ if (hw->device_id == ICE_DEV_ID_E822C_BACKPLANE ||
+ hw->device_id == ICE_DEV_ID_E822C_QSFP ||
+ hw->device_id == ICE_DEV_ID_E822C_10G_BASE_T ||
+ hw->device_id == ICE_DEV_ID_E822C_SGMII ||
+ hw->device_id == ICE_DEV_ID_E822C_SFP ||
+ hw->device_id == ICE_DEV_ID_E822X_BACKPLANE ||
+ hw->device_id == ICE_DEV_ID_E822L_SFP ||
+ hw->device_id == ICE_DEV_ID_E822L_10G_BASE_T ||
+ hw->device_id == ICE_DEV_ID_E822L_SGMII)
+ return status;
+
status = ice_get_pfa_module_tlv(hw, &boot_cfg_tlv, &boot_cfg_tlv_len,
ICE_SR_BOOT_CFG_PTR);
if (status) {
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 2c212f64d99f..fd17ace6b226 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1071,13 +1071,16 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
ice_put_rx_buf(rx_ring, rx_buf);
continue;
construct_skb:
- if (skb)
+ if (skb) {
ice_add_rx_frag(rx_ring, rx_buf, skb, size);
- else if (ice_ring_uses_build_skb(rx_ring))
- skb = ice_build_skb(rx_ring, rx_buf, &xdp);
- else
+ } else if (likely(xdp.data)) {
+ if (ice_ring_uses_build_skb(rx_ring))
+ skb = ice_build_skb(rx_ring, rx_buf, &xdp);
+ else
+ skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
+ } else {
skb = ice_construct_skb(rx_ring, rx_buf, &xdp);
-
+ }
/* exit if we failed to retrieve a buffer */
if (!skb) {
rx_ring->rx_stats.alloc_buf_failed++;
@@ -1925,6 +1928,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
} ip;
union {
struct tcphdr *tcp;
+ struct udphdr *udp;
unsigned char *hdr;
} l4;
u64 cd_mss, cd_tso_len;
@@ -1958,10 +1962,18 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
/* remove payload length from checksum */
paylen = skb->len - l4_start;
- csum_replace_by_diff(&l4.tcp->check, (__force __wsum)htonl(paylen));
- /* compute length of segmentation header */
- off->header_len = (l4.tcp->doff * 4) + l4_start;
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
+ csum_replace_by_diff(&l4.udp->check,
+ (__force __wsum)htonl(paylen));
+ /* compute length of UDP segmentation header */
+ off->header_len = sizeof(l4.udp) + l4_start;
+ } else {
+ csum_replace_by_diff(&l4.tcp->check,
+ (__force __wsum)htonl(paylen));
+ /* compute length of TCP segmentation header */
+ off->header_len = (l4.tcp->doff * 4) + l4_start;
+ }
/* update gso_segs and bytecount */
first->gso_segs = skb_shinfo(skb)->gso_segs;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index a84cc0e6dd27..a86270696df1 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -341,6 +341,12 @@ struct ice_ring_container {
u16 itr_setting;
};
+struct ice_coalesce_stored {
+ u16 itr_tx;
+ u16 itr_rx;
+ u8 intrl;
+};
+
/* iterator for handling rings in ring container */
#define ice_for_each_ring(pos, head) \
for (pos = (head).ring; pos; pos = pos->next)
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index edb374296d1f..82b1e7a4cb92 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -35,37 +35,6 @@ static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf)
}
/**
- * ice_err_to_virt err - translate errors for VF return code
- * @ice_err: error return code
- */
-static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err)
-{
- switch (ice_err) {
- case ICE_SUCCESS:
- return VIRTCHNL_STATUS_SUCCESS;
- case ICE_ERR_BAD_PTR:
- case ICE_ERR_INVAL_SIZE:
- case ICE_ERR_DEVICE_NOT_SUPPORTED:
- case ICE_ERR_PARAM:
- case ICE_ERR_CFG:
- return VIRTCHNL_STATUS_ERR_PARAM;
- case ICE_ERR_NO_MEMORY:
- return VIRTCHNL_STATUS_ERR_NO_MEMORY;
- case ICE_ERR_NOT_READY:
- case ICE_ERR_RESET_FAILED:
- case ICE_ERR_FW_API_VER:
- case ICE_ERR_AQ_ERROR:
- case ICE_ERR_AQ_TIMEOUT:
- case ICE_ERR_AQ_FULL:
- case ICE_ERR_AQ_NO_WORK:
- case ICE_ERR_AQ_EMPTY:
- return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
- default:
- return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
- }
-}
-
-/**
* ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
* @pf: pointer to the PF structure
* @v_opcode: operation code
@@ -78,10 +47,11 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode,
enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
{
struct ice_hw *hw = &pf->hw;
- struct ice_vf *vf = pf->vf;
int i;
- for (i = 0; i < pf->num_alloc_vfs; i++, vf++) {
+ ice_for_each_vf(pf, i) {
+ struct ice_vf *vf = &pf->vf[i];
+
/* Not all vfs are enabled so skip the ones that are not */
if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states) &&
!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
@@ -121,26 +91,6 @@ ice_set_pfe_link(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
}
/**
- * ice_set_pfe_link_forced - Force the virtchnl_pf_event link speed/status
- * @vf: pointer to the VF structure
- * @pfe: pointer to the virtchnl_pf_event to set link speed/status for
- * @link_up: whether or not to set the link up/down
- */
-static void
-ice_set_pfe_link_forced(struct ice_vf *vf, struct virtchnl_pf_event *pfe,
- bool link_up)
-{
- u16 link_speed;
-
- if (link_up)
- link_speed = ICE_AQ_LINK_SPEED_100GB;
- else
- link_speed = ICE_AQ_LINK_SPEED_UNKNOWN;
-
- ice_set_pfe_link(vf, pfe, link_speed, link_up);
-}
-
-/**
* ice_vc_notify_vf_link_state - Inform a VF of link status
* @vf: pointer to the VF structure
*
@@ -160,13 +110,17 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
pfe.severity = PF_EVENT_SEVERITY_INFO;
/* Always report link is down if the VF queues aren't enabled */
- if (!vf->num_qs_ena)
+ if (!vf->num_qs_ena) {
ice_set_pfe_link(vf, &pfe, ICE_AQ_LINK_SPEED_UNKNOWN, false);
- else if (vf->link_forced)
- ice_set_pfe_link_forced(vf, &pfe, vf->link_up);
- else
- ice_set_pfe_link(vf, &pfe, ls->link_speed, ls->link_info &
- ICE_AQ_LINK_UP);
+ } else if (vf->link_forced) {
+ u16 link_speed = vf->link_up ?
+ ls->link_speed : ICE_AQ_LINK_SPEED_UNKNOWN;
+
+ ice_set_pfe_link(vf, &pfe, link_speed, vf->link_up);
+ } else {
+ ice_set_pfe_link(vf, &pfe, ls->link_speed,
+ ls->link_info & ICE_AQ_LINK_UP);
+ }
ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
@@ -331,7 +285,7 @@ void ice_free_vfs(struct ice_pf *pf)
usleep_range(1000, 2000);
/* Avoid wait time by stopping all VFs at the same time */
- for (i = 0; i < pf->num_alloc_vfs; i++)
+ ice_for_each_vf(pf, i)
if (test_bit(ICE_VF_STATE_QS_ENA, pf->vf[i].vf_states))
ice_dis_vf_qs(&pf->vf[i]);
@@ -991,10 +945,17 @@ static void ice_cleanup_and_realloc_vf(struct ice_vf *vf)
/* reallocate VF resources to finish resetting the VSI state */
if (!ice_alloc_vf_res(vf)) {
+ struct ice_vsi *vsi;
+
ice_ena_vf_mappings(vf);
set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
- vf->num_vlan = 0;
+
+ vsi = pf->vsi[vf->lan_vsi_idx];
+ if (ice_vsi_add_vlan(vsi, 0))
+ dev_warn(ice_pf_to_dev(pf),
+ "Failed to add VLAN 0 filter for VF %d, MDD events will trigger. Reset the VF, disable spoofchk, or enable 8021q module on the guest",
+ vf->vf_id);
}
/* Tell the VF driver the reset is done. This needs to be done only
@@ -1023,7 +984,7 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
struct ice_hw *hw;
hw = &pf->hw;
- if (vf->num_vlan) {
+ if (vsi->num_vlan) {
status = ice_set_vlan_vsi_promisc(hw, vsi->idx, promisc_m,
rm_promisc);
} else if (vf->port_vlan_id) {
@@ -1070,7 +1031,7 @@ static bool ice_config_res_vfs(struct ice_pf *pf)
ice_irq_dynamic_ena(hw, NULL, NULL);
/* Finish resetting each VF and allocate resources */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ ice_for_each_vf(pf, v) {
struct ice_vf *vf = &pf->vf[v];
vf->num_vf_qs = pf->num_vf_qps;
@@ -1113,10 +1074,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
return false;
/* Begin reset on all VFs at once */
- for (v = 0; v < pf->num_alloc_vfs; v++)
+ ice_for_each_vf(pf, v)
ice_trigger_vf_reset(&pf->vf[v], is_vflr, true);
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ ice_for_each_vf(pf, v) {
struct ice_vsi *vsi;
vf = &pf->vf[v];
@@ -1161,7 +1122,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
dev_warn(dev, "VF reset check timeout\n");
/* free VF resources to begin resetting the VSI state */
- for (v = 0; v < pf->num_alloc_vfs; v++) {
+ ice_for_each_vf(pf, v) {
vf = &pf->vf[v];
ice_free_vf_res(vf);
@@ -1273,7 +1234,7 @@ static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
*/
if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) {
- if (vf->port_vlan_id || vf->num_vlan)
+ if (vf->port_vlan_id || vsi->num_vlan)
promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
else
promisc_m = ICE_UCAST_PROMISC_BITS;
@@ -1301,7 +1262,7 @@ void ice_vc_notify_link_state(struct ice_pf *pf)
{
int i;
- for (i = 0; i < pf->num_alloc_vfs; i++)
+ ice_for_each_vf(pf, i)
ice_vc_notify_vf_link_state(&pf->vf[i]);
}
@@ -1385,9 +1346,10 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
goto err_pci_disable_sriov;
}
pf->vf = vfs;
+ pf->num_alloc_vfs = num_alloc_vfs;
/* apply default profile */
- for (i = 0; i < num_alloc_vfs; i++) {
+ ice_for_each_vf(pf, i) {
vfs[i].pf = pf;
vfs[i].vf_sw_id = pf->first_sw;
vfs[i].vf_id = i;
@@ -1396,7 +1358,6 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps);
vfs[i].spoofchk = true;
}
- pf->num_alloc_vfs = num_alloc_vfs;
/* VF resources get allocated with initialization */
if (!ice_config_res_vfs(pf)) {
@@ -1535,7 +1496,7 @@ void ice_process_vflr_event(struct ice_pf *pf)
!pf->num_alloc_vfs)
return;
- for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) {
+ ice_for_each_vf(pf, vf_id) {
struct ice_vf *vf = &pf->vf[vf_id];
u32 reg_idx, bit_idx;
@@ -1918,6 +1879,89 @@ error_param:
}
/**
+ * ice_set_vf_spoofchk
+ * @netdev: network interface device structure
+ * @vf_id: VF identifier
+ * @ena: flag to enable or disable feature
+ *
+ * Enable or disable VF spoof checking
+ */
+int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
+{
+ struct ice_netdev_priv *np = netdev_priv(netdev);
+ struct ice_pf *pf = np->vsi->back;
+ struct ice_vsi_ctx *ctx;
+ struct ice_vsi *vf_vsi;
+ enum ice_status status;
+ struct device *dev;
+ struct ice_vf *vf;
+ int ret = 0;
+
+ dev = ice_pf_to_dev(pf);
+ if (ice_validate_vf_id(pf, vf_id))
+ return -EINVAL;
+
+ vf = &pf->vf[vf_id];
+
+ if (ice_check_vf_init(pf, vf))
+ return -EBUSY;
+
+ vf_vsi = pf->vsi[vf->lan_vsi_idx];
+ if (!vf_vsi) {
+ netdev_err(netdev, "VSI %d for VF %d is null\n",
+ vf->lan_vsi_idx, vf->vf_id);
+ return -EINVAL;
+ }
+
+ if (vf_vsi->type != ICE_VSI_VF) {
+ netdev_err(netdev,
+ "Type %d of VSI %d for VF %d is no ICE_VSI_VF\n",
+ vf_vsi->type, vf_vsi->vsi_num, vf->vf_id);
+ return -ENODEV;
+ }
+
+ if (ena == vf->spoofchk) {
+ dev_dbg(dev, "VF spoofchk already %s\n", ena ? "ON" : "OFF");
+ return 0;
+ }
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ ctx->info.sec_flags = vf_vsi->info.sec_flags;
+ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
+ if (ena) {
+ ctx->info.sec_flags |=
+ ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+ (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S);
+ } else {
+ ctx->info.sec_flags &=
+ ~(ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF |
+ (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA <<
+ ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S));
+ }
+
+ status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL);
+ if (status) {
+ dev_err(dev,
+ "Failed to %sable spoofchk on VF %d VSI %d\n error %d",
+ ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, status);
+ ret = -EIO;
+ goto out;
+ }
+
+ /* only update spoofchk state and VSI context on success */
+ vf_vsi->info.sec_flags = ctx->info.sec_flags;
+ vf->spoofchk = ena;
+
+out:
+ kfree(ctx);
+ return ret;
+}
+
+/**
* ice_vc_get_stats_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -2409,6 +2453,83 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf)
}
/**
+ * ice_vc_add_mac_addr - attempt to add the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @mac_addr: MAC address to add
+ */
+static int
+ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ enum ice_status status;
+
+ /* default unicast MAC already added */
+ if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+ return 0;
+
+ if (is_unicast_ether_addr(mac_addr) && !ice_can_vf_change_mac(vf)) {
+ dev_err(dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n");
+ return -EPERM;
+ }
+
+ status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, true);
+ if (status == ICE_ERR_ALREADY_EXISTS) {
+ dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr,
+ vf->vf_id);
+ return -EEXIST;
+ } else if (status) {
+ dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %d\n",
+ mac_addr, vf->vf_id, status);
+ return -EIO;
+ }
+
+ /* only set dflt_lan_addr once */
+ if (is_zero_ether_addr(vf->dflt_lan_addr.addr) &&
+ is_unicast_ether_addr(mac_addr))
+ ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr);
+
+ vf->num_mac++;
+
+ return 0;
+}
+
+/**
+ * ice_vc_del_mac_addr - attempt to delete the MAC address passed in
+ * @vf: pointer to the VF info
+ * @vsi: pointer to the VF's VSI
+ * @mac_addr: MAC address to delete
+ */
+static int
+ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
+{
+ struct device *dev = ice_pf_to_dev(vf->pf);
+ enum ice_status status;
+
+ if (!ice_can_vf_change_mac(vf) &&
+ ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+ return 0;
+
+ status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, false);
+ if (status == ICE_ERR_DOES_NOT_EXIST) {
+ dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr,
+ vf->vf_id);
+ return -ENOENT;
+ } else if (status) {
+ dev_err(dev, "Failed to delete MAC %pM for VF %d, error %d\n",
+ mac_addr, vf->vf_id, status);
+ return -EIO;
+ }
+
+ if (ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
+ eth_zero_addr(vf->dflt_lan_addr.addr);
+
+ vf->num_mac--;
+
+ return 0;
+}
+
+/**
* ice_vc_handle_mac_addr_msg
* @vf: pointer to the VF info
* @msg: pointer to the msg buffer
@@ -2419,23 +2540,23 @@ static bool ice_can_vf_change_mac(struct ice_vf *vf)
static int
ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
{
+ int (*ice_vc_cfg_mac)
+ (struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr);
enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_ether_addr_list *al =
(struct virtchnl_ether_addr_list *)msg;
struct ice_pf *pf = vf->pf;
enum virtchnl_ops vc_op;
- enum ice_status status;
struct ice_vsi *vsi;
- struct device *dev;
- int mac_count = 0;
int i;
- dev = ice_pf_to_dev(pf);
-
- if (set)
+ if (set) {
vc_op = VIRTCHNL_OP_ADD_ETH_ADDR;
- else
+ ice_vc_cfg_mac = ice_vc_add_mac_addr;
+ } else {
vc_op = VIRTCHNL_OP_DEL_ETH_ADDR;
+ ice_vc_cfg_mac = ice_vc_del_mac_addr;
+ }
if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) ||
!ice_vc_isvalid_vsi_id(vf, al->vsi_id)) {
@@ -2443,14 +2564,15 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
goto handle_mac_exit;
}
+ /* If this VF is not privileged, then we can't add more than a
+ * limited number of addresses. Check to make sure that the
+ * additions do not push us over the limit.
+ */
if (set && !ice_is_vf_trusted(vf) &&
(vf->num_mac + al->num_elements) > ICE_MAX_MACADDR_PER_VF) {
- dev_err(dev,
+ dev_err(ice_pf_to_dev(pf),
"Can't add more MAC addresses, because VF-%d is not trusted, switch the VF to trusted mode in order to add more functionalities\n",
vf->vf_id);
- /* There is no need to let VF know about not being trusted
- * to add more MAC addr, so we can just return success message.
- */
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto handle_mac_exit;
}
@@ -2462,70 +2584,22 @@ ice_vc_handle_mac_addr_msg(struct ice_vf *vf, u8 *msg, bool set)
}
for (i = 0; i < al->num_elements; i++) {
- u8 *maddr = al->list[i].addr;
+ u8 *mac_addr = al->list[i].addr;
+ int result;
- if (ether_addr_equal(maddr, vf->dflt_lan_addr.addr) ||
- is_broadcast_ether_addr(maddr)) {
- if (set) {
- /* VF is trying to add filters that the PF
- * already added. Just continue.
- */
- dev_info(dev,
- "MAC %pM already set for VF %d\n",
- maddr, vf->vf_id);
- continue;
- } else {
- /* VF can't remove dflt_lan_addr/bcast MAC */
- dev_err(dev,
- "VF can't remove default MAC address or MAC %pM programmed by PF for VF %d\n",
- maddr, vf->vf_id);
- continue;
- }
- }
-
- /* check for the invalid cases and bail if necessary */
- if (is_zero_ether_addr(maddr)) {
- dev_err(dev,
- "invalid MAC %pM provided for VF %d\n",
- maddr, vf->vf_id);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto handle_mac_exit;
- }
-
- if (is_unicast_ether_addr(maddr) &&
- !ice_can_vf_change_mac(vf)) {
- dev_err(dev,
- "can't change unicast MAC for untrusted VF %d\n",
- vf->vf_id);
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
- goto handle_mac_exit;
- }
+ if (is_broadcast_ether_addr(mac_addr) ||
+ is_zero_ether_addr(mac_addr))
+ continue;
- /* program the updated filter list */
- status = ice_vsi_cfg_mac_fltr(vsi, maddr, set);
- if (status == ICE_ERR_DOES_NOT_EXIST ||
- status == ICE_ERR_ALREADY_EXISTS) {
- dev_info(dev,
- "can't %s MAC filters %pM for VF %d, error %d\n",
- set ? "add" : "remove", maddr, vf->vf_id,
- status);
- } else if (status) {
- dev_err(dev,
- "can't %s MAC filters for VF %d, error %d\n",
- set ? "add" : "remove", vf->vf_id, status);
- v_ret = ice_err_to_virt_err(status);
+ result = ice_vc_cfg_mac(vf, vsi, mac_addr);
+ if (result == -EEXIST || result == -ENOENT) {
+ continue;
+ } else if (result) {
+ v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
goto handle_mac_exit;
}
-
- mac_count++;
}
- /* Track number of MAC filters programmed for the VF VSI */
- if (set)
- vf->num_mac += mac_count;
- else
- vf->num_mac -= mac_count;
-
handle_mac_exit:
/* send the response to the VF */
return ice_vc_send_msg_to_vf(vf, vc_op, v_ret, NULL, 0);
@@ -2744,17 +2818,6 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
goto error_param;
}
- if (add_v && !ice_is_vf_trusted(vf) &&
- vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
- dev_info(dev,
- "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
- vf->vf_id);
- /* There is no need to let VF know about being not trusted,
- * so we can just return success message here
- */
- goto error_param;
- }
-
for (i = 0; i < vfl->num_elements; i++) {
if (vfl->vlan_id[i] > ICE_MAX_VLANID) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
@@ -2771,6 +2834,17 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
goto error_param;
}
+ if (add_v && !ice_is_vf_trusted(vf) &&
+ vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+ dev_info(dev,
+ "VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
+ vf->vf_id);
+ /* There is no need to let VF know about being not trusted,
+ * so we can just return success message here
+ */
+ goto error_param;
+ }
+
if (vsi->info.pvid) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
@@ -2785,7 +2859,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
u16 vid = vfl->vlan_id[i];
if (!ice_is_vf_trusted(vf) &&
- vf->num_vlan >= ICE_MAX_VLAN_PER_VF) {
+ vsi->num_vlan >= ICE_MAX_VLAN_PER_VF) {
dev_info(dev,
"VF-%d is not trusted, switch the VF to trusted mode, in order to add more VLAN addresses\n",
vf->vf_id);
@@ -2796,12 +2870,20 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
goto error_param;
}
- if (ice_vsi_add_vlan(vsi, vid)) {
+ /* we add VLAN 0 by default for each VF so we can enable
+ * Tx VLAN anti-spoof without triggering MDD events so
+ * we don't need to add it again here
+ */
+ if (!vid)
+ continue;
+
+ status = ice_vsi_add_vlan(vsi, vid);
+ if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
- vf->num_vlan++;
+ vsi->num_vlan++;
/* Enable VLAN pruning when VLAN is added */
if (!vlan_promisc) {
status = ice_cfg_vlan_pruning(vsi, true, false);
@@ -2837,21 +2919,29 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
*/
int num_vf_vlan;
- num_vf_vlan = vf->num_vlan;
+ num_vf_vlan = vsi->num_vlan;
for (i = 0; i < vfl->num_elements && i < num_vf_vlan; i++) {
u16 vid = vfl->vlan_id[i];
+ /* we add VLAN 0 by default for each VF so we can enable
+ * Tx VLAN anti-spoof without triggering MDD events so
+ * we don't want a VIRTCHNL request to remove it
+ */
+ if (!vid)
+ continue;
+
/* Make sure ice_vsi_kill_vlan is successful before
* updating VLAN information
*/
- if (ice_vsi_kill_vlan(vsi, vid)) {
+ status = ice_vsi_kill_vlan(vsi, vid);
+ if (status) {
v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
- vf->num_vlan--;
+ vsi->num_vlan--;
/* Disable VLAN pruning when the last VLAN is removed */
- if (!vf->num_vlan)
+ if (!vsi->num_vlan)
ice_cfg_vlan_pruning(vsi, false, false);
/* Disable Unicast/Multicast VLAN promiscuous mode */
@@ -3165,65 +3255,6 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
}
/**
- * ice_set_vf_spoofchk
- * @netdev: network interface device structure
- * @vf_id: VF identifier
- * @ena: flag to enable or disable feature
- *
- * Enable or disable VF spoof checking
- */
-int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
-{
- struct ice_pf *pf = ice_netdev_to_pf(netdev);
- struct ice_vsi *vsi = pf->vsi[0];
- struct ice_vsi_ctx *ctx;
- enum ice_status status;
- struct device *dev;
- struct ice_vf *vf;
- int ret = 0;
-
- dev = ice_pf_to_dev(pf);
- if (ice_validate_vf_id(pf, vf_id))
- return -EINVAL;
-
- vf = &pf->vf[vf_id];
- if (ice_check_vf_init(pf, vf))
- return -EBUSY;
-
- if (ena == vf->spoofchk) {
- dev_dbg(dev, "VF spoofchk already %s\n",
- ena ? "ON" : "OFF");
- return 0;
- }
-
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
-
- ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID);
-
- if (ena) {
- ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF;
- ctx->info.sw_flags2 |= ICE_AQ_VSI_SW_FLAG_RX_PRUNE_EN_M;
- }
-
- status = ice_update_vsi(&pf->hw, vsi->idx, ctx, NULL);
- if (status) {
- dev_dbg(dev,
- "Error %d, failed to update VSI* parameters\n", status);
- ret = -EIO;
- goto out;
- }
-
- vf->spoofchk = ena;
- vsi->info.sec_flags = ctx->info.sec_flags;
- vsi->info.sw_flags2 = ctx->info.sw_flags2;
-out:
- kfree(ctx);
- return ret;
-}
-
-/**
* ice_wait_on_vf_reset
* @vf: The VF being resseting
*
@@ -3344,28 +3375,18 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
{
struct ice_pf *pf = ice_netdev_to_pf(netdev);
- struct virtchnl_pf_event pfe = { 0 };
- struct ice_link_status *ls;
struct ice_vf *vf;
- struct ice_hw *hw;
if (ice_validate_vf_id(pf, vf_id))
return -EINVAL;
vf = &pf->vf[vf_id];
- hw = &pf->hw;
- ls = &pf->hw.port_info->phy.link_info;
-
if (ice_check_vf_init(pf, vf))
return -EBUSY;
- pfe.event = VIRTCHNL_EVENT_LINK_CHANGE;
- pfe.severity = PF_EVENT_SEVERITY_INFO;
-
switch (link_state) {
case IFLA_VF_LINK_STATE_AUTO:
vf->link_forced = false;
- vf->link_up = ls->link_info & ICE_AQ_LINK_UP;
break;
case IFLA_VF_LINK_STATE_ENABLE:
vf->link_forced = true;
@@ -3379,15 +3400,7 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state)
return -EINVAL;
}
- if (vf->link_forced)
- ice_set_pfe_link_forced(vf, &pfe, vf->link_up);
- else
- ice_set_pfe_link(vf, &pfe, ls->link_speed, vf->link_up);
-
- /* Notify the VF of its new link state */
- ice_aq_send_msg_to_vf(hw, vf->vf_id, VIRTCHNL_OP_EVENT,
- VIRTCHNL_STATUS_SUCCESS, (u8 *)&pfe,
- sizeof(pfe), NULL);
+ ice_vc_notify_vf_link_state(vf);
return 0;
}
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 88aa65d5cb31..4647d636ed36 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -40,6 +40,9 @@
#define ICE_DFLT_INTR_PER_VF (ICE_DFLT_QS_PER_VF + 1)
#define ICE_MAX_VF_RESET_WAIT 15
+#define ice_for_each_vf(pf, i) \
+ for ((i) = 0; (i) < (pf)->num_alloc_vfs; (i)++)
+
/* Specific VF states */
enum ice_vf_states {
ICE_VF_STATE_INIT = 0, /* PF is initializing VF */
@@ -91,7 +94,6 @@ struct ice_vf {
unsigned long vf_caps; /* VF's adv. capabilities */
u8 num_req_qs; /* num of queue pairs requested by VF */
u16 num_mac;
- u16 num_vlan;
u16 num_vf_qs; /* num of queue configured per VF */
u16 num_qs_ena; /* total num of Tx/Rx queue enabled */
};
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index cf9b8b22d24f..0e401f116d54 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -414,7 +414,8 @@ ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
if (vsi->type != ICE_VSI_PF)
return -EINVAL;
- vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
+ if (!vsi->num_xsk_umems)
+ vsi->num_xsk_umems = min_t(u16, vsi->num_rxq, vsi->num_txq);
if (qid >= vsi->num_xsk_umems)
return -EINVAL;
@@ -555,7 +556,7 @@ ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
rx_buf->handle = handle + umem->headroom;
- xsk_umem_discard_addr(umem);
+ xsk_umem_release_addr(umem);
return true;
}
@@ -591,7 +592,7 @@ ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
rx_buf->handle = handle + umem->headroom;
- xsk_umem_discard_addr_rq(umem);
+ xsk_umem_release_addr_rq(umem);
return true;
}
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index ca54e268d157..49b5fa9d4783 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -661,6 +661,7 @@ void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *);
void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *);
void igb_setup_tctl(struct igb_adapter *);
void igb_setup_rctl(struct igb_adapter *);
+void igb_setup_srrctl(struct igb_adapter *, struct igb_ring *);
netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *);
void igb_alloc_rx_buffers(struct igb_ring *, u16);
void igb_update_stats(struct igb_adapter *);
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 3182b059bf55..43c438365389 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -26,7 +26,7 @@ struct igb_stats {
#define IGB_STAT(_name, _stat) { \
.stat_string = _name, \
- .sizeof_stat = FIELD_SIZEOF(struct igb_adapter, _stat), \
+ .sizeof_stat = sizeof_field(struct igb_adapter, _stat), \
.stat_offset = offsetof(struct igb_adapter, _stat) \
}
static const struct igb_stats igb_gstrings_stats[] = {
@@ -76,7 +76,7 @@ static const struct igb_stats igb_gstrings_stats[] = {
#define IGB_NETDEV_STAT(_net_stat) { \
.stat_string = __stringify(_net_stat), \
- .sizeof_stat = FIELD_SIZEOF(struct rtnl_link_stats64, _net_stat), \
+ .sizeof_stat = sizeof_field(struct rtnl_link_stats64, _net_stat), \
.stat_offset = offsetof(struct rtnl_link_stats64, _net_stat) \
}
static const struct igb_stats igb_gstrings_net_stats[] = {
@@ -396,6 +396,7 @@ static int igb_set_pauseparam(struct net_device *netdev,
struct igb_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
int retval = 0;
+ int i;
/* 100basefx does not support setting link flow control */
if (hw->dev_spec._82575.eth_flags.e100_base_fx)
@@ -428,6 +429,13 @@ static int igb_set_pauseparam(struct net_device *netdev,
retval = ((hw->phy.media_type == e1000_media_type_copper) ?
igb_force_mac_fc(hw) : igb_setup_link(hw));
+
+ /* Make sure SRRCTL considers new fc settings for each ring */
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct igb_ring *ring = adapter->rx_ring[i];
+
+ igb_setup_srrctl(adapter, ring);
+ }
}
clear_bit(__IGB_RESETTING, &adapter->state);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 98346eb064d5..b46bff8fe056 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -146,7 +146,7 @@ static int igb_poll(struct napi_struct *, int);
static bool igb_clean_tx_irq(struct igb_q_vector *, int);
static int igb_clean_rx_irq(struct igb_q_vector *, int);
static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
-static void igb_tx_timeout(struct net_device *);
+static void igb_tx_timeout(struct net_device *, unsigned int txqueue);
static void igb_reset_task(struct work_struct *);
static void igb_vlan_mode(struct net_device *netdev,
netdev_features_t features);
@@ -4468,6 +4468,37 @@ static inline void igb_set_vmolr(struct igb_adapter *adapter,
}
/**
+ * igb_setup_srrctl - configure the split and replication receive control
+ * registers
+ * @adapter: Board private structure
+ * @ring: receive ring to be configured
+ **/
+void igb_setup_srrctl(struct igb_adapter *adapter, struct igb_ring *ring)
+{
+ struct e1000_hw *hw = &adapter->hw;
+ int reg_idx = ring->reg_idx;
+ u32 srrctl = 0;
+
+ srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
+ if (ring_uses_large_buffer(ring))
+ srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ else
+ srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
+ srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
+ if (hw->mac.type >= e1000_82580)
+ srrctl |= E1000_SRRCTL_TIMESTAMP;
+ /* Only set Drop Enable if VFs allocated, or we are supporting multiple
+ * queues and rx flow control is disabled
+ */
+ if (adapter->vfs_allocated_count ||
+ (!(hw->fc.current_mode & e1000_fc_rx_pause) &&
+ adapter->num_rx_queues > 1))
+ srrctl |= E1000_SRRCTL_DROP_EN;
+
+ wr32(E1000_SRRCTL(reg_idx), srrctl);
+}
+
+/**
* igb_configure_rx_ring - Configure a receive ring after Reset
* @adapter: board private structure
* @ring: receive ring to be configured
@@ -4481,7 +4512,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
union e1000_adv_rx_desc *rx_desc;
u64 rdba = ring->dma;
int reg_idx = ring->reg_idx;
- u32 srrctl = 0, rxdctl = 0;
+ u32 rxdctl = 0;
/* disable the queue */
wr32(E1000_RXDCTL(reg_idx), 0);
@@ -4499,19 +4530,7 @@ void igb_configure_rx_ring(struct igb_adapter *adapter,
writel(0, ring->tail);
/* set descriptor configuration */
- srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
- if (ring_uses_large_buffer(ring))
- srrctl |= IGB_RXBUFFER_3072 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
- else
- srrctl |= IGB_RXBUFFER_2048 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
- srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
- if (hw->mac.type >= e1000_82580)
- srrctl |= E1000_SRRCTL_TIMESTAMP;
- /* Only set Drop Enable if we are supporting multiple queues */
- if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
- srrctl |= E1000_SRRCTL_DROP_EN;
-
- wr32(E1000_SRRCTL(reg_idx), srrctl);
+ igb_setup_srrctl(adapter, ring);
/* set filtering for VMDQ pools */
igb_set_vmolr(adapter, reg_idx & 0x7, true);
@@ -6184,7 +6203,7 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
* igb_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
-static void igb_tx_timeout(struct net_device *netdev)
+static void igb_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct igb_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 6003dc3ff5fd..5b1800c3ba82 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2375,7 +2375,7 @@ static netdev_tx_t igbvf_xmit_frame(struct sk_buff *skb,
* igbvf_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
-static void igbvf_tx_timeout(struct net_device *netdev)
+static void igbvf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct igbvf_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
index 88c6f88baac5..49fb1e1965cd 100644
--- a/drivers/net/ethernet/intel/igc/Makefile
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -8,4 +8,4 @@
obj-$(CONFIG_IGC) += igc.o
igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
-igc_ethtool.o
+igc_ethtool.o igc_ptp.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 0868677d43ed..52066bdbbad0 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -10,6 +10,9 @@
#include <linux/vmalloc.h>
#include <linux/ethtool.h>
#include <linux/sctp.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
#include "igc_hw.h"
@@ -45,11 +48,15 @@ extern char igc_driver_version[];
#define IGC_REGS_LEN 740
#define IGC_RETA_SIZE 128
+/* flags controlling PTP/1588 function */
+#define IGC_PTP_ENABLED BIT(0)
+
/* Interrupt defines */
#define IGC_START_ITR 648 /* ~6000 ints/sec */
#define IGC_FLAG_HAS_MSI BIT(0)
#define IGC_FLAG_QUEUE_PAIRS BIT(3)
#define IGC_FLAG_DMAC BIT(4)
+#define IGC_FLAG_PTP BIT(8)
#define IGC_FLAG_NEED_LINK_UPDATE BIT(9)
#define IGC_FLAG_MEDIA_RESET BIT(10)
#define IGC_FLAG_MAS_ENABLE BIT(12)
@@ -100,6 +107,20 @@ extern char igc_driver_version[];
#define AUTO_ALL_MODES 0
#define IGC_RX_HDR_LEN IGC_RXBUFFER_256
+/* Transmit and receive latency (for PTP timestamps) */
+/* FIXME: These values were estimated using the ones that i210 has as
+ * basis, they seem to provide good numbers with ptp4l/phc2sys, but we
+ * need to confirm them.
+ */
+#define IGC_I225_TX_LATENCY_10 9542
+#define IGC_I225_TX_LATENCY_100 1024
+#define IGC_I225_TX_LATENCY_1000 178
+#define IGC_I225_TX_LATENCY_2500 64
+#define IGC_I225_RX_LATENCY_10 20662
+#define IGC_I225_RX_LATENCY_100 2213
+#define IGC_I225_RX_LATENCY_1000 448
+#define IGC_I225_RX_LATENCY_2500 160
+
/* RX and TX descriptor control thresholds.
* PTHRESH - MAC will consider prefetch if it has fewer than this number of
* descriptors available in its onboard memory.
@@ -370,6 +391,8 @@ struct igc_adapter {
struct timer_list dma_err_timer;
struct timer_list phy_info_timer;
+ u32 wol;
+ u32 en_mng_pt;
u16 link_speed;
u16 link_duplex;
@@ -430,6 +453,20 @@ struct igc_adapter {
unsigned long link_check_timeout;
struct igc_info ei;
+
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_caps;
+ struct work_struct ptp_tx_work;
+ struct sk_buff *ptp_tx_skb;
+ struct hwtstamp_config tstamp_config;
+ unsigned long ptp_tx_start;
+ unsigned long last_rx_ptp_check;
+ unsigned long last_rx_timestamp;
+ unsigned int ptp_flags;
+ /* System time value lock */
+ spinlock_t tmreg_lock;
+ struct cyclecounter cc;
+ struct timecounter tc;
};
/* igc_desc_unused - calculate if we have unused descriptors */
@@ -513,6 +550,16 @@ int igc_add_filter(struct igc_adapter *adapter,
int igc_erase_filter(struct igc_adapter *adapter,
struct igc_nfc_filter *input);
+void igc_ptp_init(struct igc_adapter *adapter);
+void igc_ptp_reset(struct igc_adapter *adapter);
+void igc_ptp_stop(struct igc_adapter *adapter);
+void igc_ptp_rx_rgtstamp(struct igc_q_vector *q_vector, struct sk_buff *skb);
+void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va,
+ struct sk_buff *skb);
+int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
+int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
+void igc_ptp_tx_hang(struct igc_adapter *adapter);
+
#define igc_rx_pg_size(_ring) (PAGE_SIZE << igc_rx_pg_order(_ring))
#define IGC_TXD_DCMD (IGC_ADVTXD_DCMD_EOP | IGC_ADVTXD_DCMD_RS)
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index f3788f0b95b4..2121fc34e300 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -10,6 +10,37 @@
#define IGC_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */
+/* Definitions for power management and wakeup registers */
+/* Wake Up Control */
+#define IGC_WUC_PME_EN 0x00000002 /* PME Enable */
+
+/* Wake Up Filter Control */
+#define IGC_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */
+#define IGC_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */
+
+#define IGC_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */
+
+/* Wake Up Status */
+#define IGC_WUS_EX 0x00000004 /* Directed Exact */
+#define IGC_WUS_ARPD 0x00000020 /* Directed ARP Request */
+#define IGC_WUS_IPV4 0x00000040 /* Directed IPv4 */
+#define IGC_WUS_IPV6 0x00000080 /* Directed IPv6 */
+#define IGC_WUS_NSD 0x00000400 /* Directed IPv6 Neighbor Solicitation */
+
+/* Packet types that are enabled for wake packet delivery */
+#define WAKE_PKT_WUS ( \
+ IGC_WUS_EX | \
+ IGC_WUS_ARPD | \
+ IGC_WUS_IPV4 | \
+ IGC_WUS_IPV6 | \
+ IGC_WUS_NSD)
+
+/* Wake Up Packet Length */
+#define IGC_WUPL_MASK 0x00000FFF
+
+/* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */
+#define IGC_WUPM_BYTES 128
+
/* Physical Func Reset Done Indication */
#define IGC_CTRL_EXT_LINK_MODE_MASK 0x00C00000
@@ -187,6 +218,7 @@
#define IGC_ICR_RXDMT0 BIT(4) /* Rx desc min. threshold (0) */
#define IGC_ICR_RXO BIT(6) /* Rx overrun */
#define IGC_ICR_RXT0 BIT(7) /* Rx timer intr (ring 0) */
+#define IGC_ICR_TS BIT(19) /* Time Sync Interrupt */
#define IGC_ICR_DRSTA BIT(30) /* Device Reset Asserted */
/* If this bit asserted, the driver should claim the interrupt */
@@ -209,6 +241,7 @@
#define IGC_IMS_DRSTA IGC_ICR_DRSTA /* Device Reset Asserted */
#define IGC_IMS_RXT0 IGC_ICR_RXT0 /* Rx timer intr */
#define IGC_IMS_RXDMT0 IGC_ICR_RXDMT0 /* Rx desc min. threshold */
+#define IGC_IMS_TS IGC_ICR_TS /* Time Sync Interrupt */
#define IGC_QVECTOR_MASK 0x7FFC /* Q-vector mask */
#define IGC_ITR_VAL_MASK 0x04 /* ITR value mask */
@@ -281,12 +314,21 @@
#define IGC_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */
#define IGC_RCTL_BAM 0x00008000 /* broadcast enable */
+/* Split Replication Receive Control */
+#define IGC_SRRCTL_TIMESTAMP 0x40000000
+#define IGC_SRRCTL_TIMER1SEL(timer) (((timer) & 0x3) << 14)
+#define IGC_SRRCTL_TIMER0SEL(timer) (((timer) & 0x3) << 17)
+
/* Receive Descriptor bit definitions */
#define IGC_RXD_STAT_EOP 0x02 /* End of Packet */
#define IGC_RXD_STAT_IXSM 0x04 /* Ignore checksum */
#define IGC_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */
#define IGC_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */
+/* Advanced Receive Descriptor bit definitions */
+#define IGC_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */
+#define IGC_RXDADV_STAT_TS 0x10000 /* Pkt was time stamped */
+
#define IGC_RXDEXT_STATERR_CE 0x01000000
#define IGC_RXDEXT_STATERR_SE 0x02000000
#define IGC_RXDEXT_STATERR_SEQ 0x04000000
@@ -323,6 +365,61 @@
#define I225_RXPBSIZE_DEFAULT 0x000000A2 /* RXPBSIZE default */
#define I225_TXPBSIZE_DEFAULT 0x04000014 /* TXPBSIZE default */
+#define IGC_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */
+
+/* Time Sync Interrupt Causes */
+#define IGC_TSICR_SYS_WRAP BIT(0) /* SYSTIM Wrap around. */
+#define IGC_TSICR_TXTS BIT(1) /* Transmit Timestamp. */
+#define IGC_TSICR_TT0 BIT(3) /* Target Time 0 Trigger. */
+#define IGC_TSICR_TT1 BIT(4) /* Target Time 1 Trigger. */
+#define IGC_TSICR_AUTT0 BIT(5) /* Auxiliary Timestamp 0 Taken. */
+#define IGC_TSICR_AUTT1 BIT(6) /* Auxiliary Timestamp 1 Taken. */
+
+#define IGC_TSICR_INTERRUPTS IGC_TSICR_TXTS
+
+/* PTP Queue Filter */
+#define IGC_ETQF_1588 BIT(30)
+
+#define IGC_FTQF_VF_BP 0x00008000
+#define IGC_FTQF_1588_TIME_STAMP 0x08000000
+#define IGC_FTQF_MASK 0xF0000000
+#define IGC_FTQF_MASK_PROTO_BP 0x10000000
+
+/* Time Sync Receive Control bit definitions */
+#define IGC_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */
+#define IGC_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */
+#define IGC_TSYNCRXCTL_TYPE_L2_V2 0x00
+#define IGC_TSYNCRXCTL_TYPE_L4_V1 0x02
+#define IGC_TSYNCRXCTL_TYPE_L2_L4_V2 0x04
+#define IGC_TSYNCRXCTL_TYPE_ALL 0x08
+#define IGC_TSYNCRXCTL_TYPE_EVENT_V2 0x0A
+#define IGC_TSYNCRXCTL_ENABLED 0x00000010 /* enable Rx timestamping */
+#define IGC_TSYNCRXCTL_SYSCFI 0x00000020 /* Sys clock frequency */
+#define IGC_TSYNCRXCTL_RXSYNSIG 0x00000400 /* Sample RX tstamp in PHY sop */
+
+/* Time Sync Receive Configuration */
+#define IGC_TSYNCRXCFG_PTP_V1_CTRLT_MASK 0x000000FF
+#define IGC_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE 0x00
+#define IGC_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE 0x01
+
+/* Immediate Interrupt Receive */
+#define IGC_IMIR_CLEAR_MASK 0xF001FFFF /* IMIR Reg Clear Mask */
+#define IGC_IMIR_PORT_BYPASS 0x20000 /* IMIR Port Bypass Bit */
+#define IGC_IMIR_PRIORITY_SHIFT 29 /* IMIR Priority Shift */
+#define IGC_IMIREXT_CLEAR_MASK 0x7FFFF /* IMIREXT Reg Clear Mask */
+
+/* Immediate Interrupt Receive Extended */
+#define IGC_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of ctrl bits */
+#define IGC_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */
+
+/* Time Sync Transmit Control bit definitions */
+#define IGC_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */
+#define IGC_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */
+#define IGC_TSYNCTXCTL_MAX_ALLOWED_DLY_MASK 0x0000F000 /* max delay */
+#define IGC_TSYNCTXCTL_SYNC_COMP_ERR 0x20000000 /* sync err */
+#define IGC_TSYNCTXCTL_SYNC_COMP 0x40000000 /* sync complete */
+#define IGC_TSYNCTXCTL_START_SYNC 0x80000000 /* initiate sync */
+#define IGC_TSYNCTXCTL_TXSYNSIG 0x00000020 /* Sample TX tstamp in PHY sop */
/* Receive Checksum Control */
#define IGC_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index ac98f1d96892..ee07011e13e9 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -16,7 +16,7 @@ struct igc_stats {
#define IGC_STAT(_name, _stat) { \
.stat_string = _name, \
- .sizeof_stat = FIELD_SIZEOF(struct igc_adapter, _stat), \
+ .sizeof_stat = sizeof_field(struct igc_adapter, _stat), \
.stat_offset = offsetof(struct igc_adapter, _stat) \
}
@@ -67,7 +67,7 @@ static const struct igc_stats igc_gstrings_stats[] = {
#define IGC_NETDEV_STAT(_net_stat) { \
.stat_string = __stringify(_net_stat), \
- .sizeof_stat = FIELD_SIZEOF(struct rtnl_link_stats64, _net_stat), \
+ .sizeof_stat = sizeof_field(struct rtnl_link_stats64, _net_stat), \
.stat_offset = offsetof(struct rtnl_link_stats64, _net_stat) \
}
@@ -1600,6 +1600,39 @@ static int igc_set_channels(struct net_device *netdev,
return 0;
}
+static int igc_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct igc_adapter *adapter = netdev_priv(dev);
+
+ if (adapter->ptp_clock)
+ info->phc_index = ptp_clock_index(adapter->ptp_clock);
+ else
+ info->phc_index = -1;
+
+ switch (adapter->hw.mac.type) {
+ case igc_i225:
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->tx_types =
+ BIT(HWTSTAMP_TX_OFF) |
+ BIT(HWTSTAMP_TX_ON);
+
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE);
+ info->rx_filters |= BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static u32 igc_get_priv_flags(struct net_device *netdev)
{
struct igc_adapter *adapter = netdev_priv(netdev);
@@ -1847,6 +1880,7 @@ static const struct ethtool_ops igc_ethtool_ops = {
.get_rxfh_indir_size = igc_get_rxfh_indir_size,
.get_rxfh = igc_get_rxfh,
.set_rxfh = igc_set_rxfh,
+ .get_ts_info = igc_get_ts_info,
.get_channels = igc_get_channels,
.set_channels = igc_set_channels,
.get_priv_flags = igc_get_priv_flags,
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9700527dd797..c359f3d9fb25 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -8,6 +8,7 @@
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/ip.h>
+#include <linux/pm_runtime.h>
#include <net/ipv6.h>
@@ -50,25 +51,6 @@ static const struct pci_device_id igc_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
-/* forward declaration */
-static void igc_clean_tx_ring(struct igc_ring *tx_ring);
-static int igc_sw_init(struct igc_adapter *);
-static void igc_configure(struct igc_adapter *adapter);
-static void igc_power_down_link(struct igc_adapter *adapter);
-static void igc_set_default_mac_filter(struct igc_adapter *adapter);
-static void igc_set_rx_mode(struct net_device *netdev);
-static void igc_write_itr(struct igc_q_vector *q_vector);
-static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector);
-static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx);
-static void igc_set_interrupt_capability(struct igc_adapter *adapter,
- bool msix);
-static void igc_free_q_vectors(struct igc_adapter *adapter);
-static void igc_irq_disable(struct igc_adapter *adapter);
-static void igc_irq_enable(struct igc_adapter *adapter);
-static void igc_configure_msix(struct igc_adapter *adapter);
-static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
- struct igc_rx_buffer *bi);
-
enum latency_range {
lowest_latency = 0,
low_latency = 1,
@@ -76,6 +58,16 @@ enum latency_range {
latency_invalid = 255
};
+/**
+ * igc_power_down_link - Power down the phy/serdes link
+ * @adapter: address of board private structure
+ */
+static void igc_power_down_link(struct igc_adapter *adapter)
+{
+ if (adapter->hw.phy.media_type == igc_media_type_copper)
+ igc_power_down_phy_copper_base(&adapter->hw);
+}
+
void igc_reset(struct igc_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
@@ -110,11 +102,14 @@ void igc_reset(struct igc_adapter *adapter)
if (!netif_running(adapter->netdev))
igc_power_down_link(adapter);
+ /* Re-enable PTP, where applicable. */
+ igc_ptp_reset(adapter);
+
igc_get_phy_info(hw);
}
/**
- * igc_power_up_link - Power up the phy/serdes link
+ * igc_power_up_link - Power up the phy link
* @adapter: address of board private structure
*/
static void igc_power_up_link(struct igc_adapter *adapter)
@@ -128,16 +123,6 @@ static void igc_power_up_link(struct igc_adapter *adapter)
}
/**
- * igc_power_down_link - Power down the phy/serdes link
- * @adapter: address of board private structure
- */
-static void igc_power_down_link(struct igc_adapter *adapter)
-{
- if (adapter->hw.phy.media_type == igc_media_type_copper)
- igc_power_down_phy_copper_base(&adapter->hw);
-}
-
-/**
* igc_release_hw_control - release control of the h/w to f/w
* @adapter: address of board private structure
*
@@ -176,43 +161,6 @@ static void igc_get_hw_control(struct igc_adapter *adapter)
}
/**
- * igc_free_tx_resources - Free Tx Resources per Queue
- * @tx_ring: Tx descriptor ring for a specific queue
- *
- * Free all transmit software resources
- */
-void igc_free_tx_resources(struct igc_ring *tx_ring)
-{
- igc_clean_tx_ring(tx_ring);
-
- vfree(tx_ring->tx_buffer_info);
- tx_ring->tx_buffer_info = NULL;
-
- /* if not set, then don't free */
- if (!tx_ring->desc)
- return;
-
- dma_free_coherent(tx_ring->dev, tx_ring->size,
- tx_ring->desc, tx_ring->dma);
-
- tx_ring->desc = NULL;
-}
-
-/**
- * igc_free_all_tx_resources - Free Tx Resources for All Queues
- * @adapter: board private structure
- *
- * Free all transmit software resources
- */
-static void igc_free_all_tx_resources(struct igc_adapter *adapter)
-{
- int i;
-
- for (i = 0; i < adapter->num_tx_queues; i++)
- igc_free_tx_resources(adapter->tx_ring[i]);
-}
-
-/**
* igc_clean_tx_ring - Free Tx Buffers
* @tx_ring: ring to be cleaned
*/
@@ -274,6 +222,43 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
}
/**
+ * igc_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ */
+void igc_free_tx_resources(struct igc_ring *tx_ring)
+{
+ igc_clean_tx_ring(tx_ring);
+
+ vfree(tx_ring->tx_buffer_info);
+ tx_ring->tx_buffer_info = NULL;
+
+ /* if not set, then don't free */
+ if (!tx_ring->desc)
+ return;
+
+ dma_free_coherent(tx_ring->dev, tx_ring->size,
+ tx_ring->desc, tx_ring->dma);
+
+ tx_ring->desc = NULL;
+}
+
+/**
+ * igc_free_all_tx_resources - Free Tx Resources for All Queues
+ * @adapter: board private structure
+ *
+ * Free all transmit software resources
+ */
+static void igc_free_all_tx_resources(struct igc_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++)
+ igc_free_tx_resources(adapter->tx_ring[i]);
+}
+
+/**
* igc_clean_all_tx_rings - Free Tx Buffers for all queues
* @adapter: board private structure
*/
@@ -771,6 +756,51 @@ static void igc_setup_tctl(struct igc_adapter *adapter)
}
/**
+ * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
+ * @adapter: address of board private structure
+ * @index: Index of the RAR entry which need to be synced with MAC table
+ */
+static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
+{
+ u8 *addr = adapter->mac_table[index].addr;
+ struct igc_hw *hw = &adapter->hw;
+ u32 rar_low, rar_high;
+
+ /* HW expects these to be in network order when they are plugged
+ * into the registers which are little endian. In order to guarantee
+ * that ordering we need to do an leXX_to_cpup here in order to be
+ * ready for the byteswap that occurs with writel
+ */
+ rar_low = le32_to_cpup((__le32 *)(addr));
+ rar_high = le16_to_cpup((__le16 *)(addr + 4));
+
+ /* Indicate to hardware the Address is Valid. */
+ if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
+ if (is_valid_ether_addr(addr))
+ rar_high |= IGC_RAH_AV;
+
+ rar_high |= IGC_RAH_POOL_1 <<
+ adapter->mac_table[index].queue;
+ }
+
+ wr32(IGC_RAL(index), rar_low);
+ wrfl();
+ wr32(IGC_RAH(index), rar_high);
+ wrfl();
+}
+
+/* Set default MAC address for the PF in the first RAR entry */
+static void igc_set_default_mac_filter(struct igc_adapter *adapter)
+{
+ struct igc_mac_addr *mac_table = &adapter->mac_table[0];
+
+ ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
+ mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+
+ igc_rar_set_index(adapter, 0);
+}
+
+/**
* igc_set_mac - Change the Ethernet Address of the NIC
* @netdev: network interface device structure
* @p: pointer to an address structure
@@ -957,6 +987,11 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
return __igc_maybe_stop_tx(tx_ring, size);
}
+#define IGC_SET_FLAG(_input, _flag, _result) \
+ (((_flag) <= (_result)) ? \
+ ((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \
+ ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
+
static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
{
/* set type for advanced descriptor with frame checksum insertion */
@@ -964,6 +999,10 @@ static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
IGC_ADVTXD_DCMD_DEXT |
IGC_ADVTXD_DCMD_IFCS;
+ /* set timestamp bit if present */
+ cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
+ (IGC_ADVTXD_MAC_TSTAMP));
+
return cmd_type;
}
@@ -1162,6 +1201,26 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
first->bytecount = skb->len;
first->gso_segs = 1;
+ if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
+
+ /* FIXME: add support for retrieving timestamps from
+ * the other timer registers before skipping the
+ * timestamping request.
+ */
+ if (adapter->tstamp_config.tx_type == HWTSTAMP_TX_ON &&
+ !test_and_set_bit_lock(__IGC_PTP_TX_IN_PROGRESS,
+ &adapter->state)) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+ tx_flags |= IGC_TX_FLAGS_TSTAMP;
+
+ adapter->ptp_tx_skb = skb_get(skb);
+ adapter->ptp_tx_start = jiffies;
+ } else {
+ adapter->tx_hwtstamp_skipped++;
+ }
+ }
+
/* record initial flags and protocol */
first->tx_flags = tx_flags;
first->protocol = protocol;
@@ -1269,6 +1328,10 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring,
igc_rx_checksum(rx_ring, rx_desc, skb);
+ if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TS) &&
+ !igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))
+ igc_ptp_rx_rgtstamp(rx_ring->q_vector, skb);
+
skb_record_rx_queue(skb, rx_ring->queue_index);
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
@@ -1388,6 +1451,12 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
if (unlikely(!skb))
return NULL;
+ if (unlikely(igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))) {
+ igc_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
+ va += IGC_TS_HDR_LEN;
+ size -= IGC_TS_HDR_LEN;
+ }
+
/* Determine available headroom for copy */
headlen = size;
if (headlen > IGC_RX_HDR_LEN)
@@ -1485,7 +1554,6 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
* igc_is_non_eop - process handling of non-EOP buffers
* @rx_ring: Rx ring being processed
* @rx_desc: Rx descriptor for current buffer
- * @skb: current socket buffer containing buffer in progress
*
* This function updates next to clean. If the buffer is an EOP buffer
* this function exits returning false, otherwise it will place the
@@ -1565,9 +1633,56 @@ static void igc_put_rx_buffer(struct igc_ring *rx_ring,
rx_buffer->page = NULL;
}
+static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
+{
+ return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
+}
+
+static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
+ struct igc_rx_buffer *bi)
+{
+ struct page *page = bi->page;
+ dma_addr_t dma;
+
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page))
+ return true;
+
+ /* alloc new page for storage */
+ page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
+ }
+
+ /* map page for use */
+ dma = dma_map_page_attrs(rx_ring->dev, page, 0,
+ igc_rx_pg_size(rx_ring),
+ DMA_FROM_DEVICE,
+ IGC_RX_DMA_ATTR);
+
+ /* if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
+ */
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_page(page);
+
+ rx_ring->rx_stats.alloc_failed++;
+ return false;
+ }
+
+ bi->dma = dma;
+ bi->page = page;
+ bi->page_offset = igc_rx_offset(rx_ring);
+ bi->pagecnt_bias = 1;
+
+ return true;
+}
+
/**
* igc_alloc_rx_buffers - Replace used receive buffers; packet split
- * @adapter: address of board private structure
+ * @rx_ring: rx descriptor ring
+ * @cleaned_count: number of buffers to clean
*/
static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
{
@@ -1725,52 +1840,6 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
return total_packets;
}
-static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
-{
- return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
-}
-
-static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
- struct igc_rx_buffer *bi)
-{
- struct page *page = bi->page;
- dma_addr_t dma;
-
- /* since we are recycling buffers we should seldom need to alloc */
- if (likely(page))
- return true;
-
- /* alloc new page for storage */
- page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
- if (unlikely(!page)) {
- rx_ring->rx_stats.alloc_failed++;
- return false;
- }
-
- /* map page for use */
- dma = dma_map_page_attrs(rx_ring->dev, page, 0,
- igc_rx_pg_size(rx_ring),
- DMA_FROM_DEVICE,
- IGC_RX_DMA_ATTR);
-
- /* if mapping failed free memory back to system since
- * there isn't much point in holding memory we can't use
- */
- if (dma_mapping_error(rx_ring->dev, dma)) {
- __free_page(page);
-
- rx_ring->rx_stats.alloc_failed++;
- return false;
- }
-
- bi->dma = dma;
- bi->page = page;
- bi->page_offset = igc_rx_offset(rx_ring);
- bi->pagecnt_bias = 1;
-
- return true;
-}
-
/**
* igc_clean_tx_irq - Reclaim resources after transmit completes
* @q_vector: pointer to q_vector containing needed info
@@ -1942,6 +2011,1128 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
return !!budget;
}
+static void igc_nfc_filter_restore(struct igc_adapter *adapter)
+{
+ struct igc_nfc_filter *rule;
+
+ spin_lock(&adapter->nfc_lock);
+
+ hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
+ igc_add_filter(adapter, rule);
+
+ spin_unlock(&adapter->nfc_lock);
+}
+
+/* If the filter to be added and an already existing filter express
+ * the same address and address type, it should be possible to only
+ * override the other configurations, for example the queue to steer
+ * traffic.
+ */
+static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
+ const u8 *addr, const u8 flags)
+{
+ if (!(entry->state & IGC_MAC_STATE_IN_USE))
+ return true;
+
+ if ((entry->state & IGC_MAC_STATE_SRC_ADDR) !=
+ (flags & IGC_MAC_STATE_SRC_ADDR))
+ return false;
+
+ if (!ether_addr_equal(addr, entry->addr))
+ return false;
+
+ return true;
+}
+
+/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
+ * 'flags' is used to indicate what kind of match is made, match is by
+ * default for the destination address, if matching by source address
+ * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_add_mac_filter(struct igc_adapter *adapter,
+ const u8 *addr, const u8 queue)
+{
+ struct igc_hw *hw = &adapter->hw;
+ int rar_entries = hw->mac.rar_entry_count;
+ int i;
+
+ if (is_zero_ether_addr(addr))
+ return -EINVAL;
+
+ /* Search for the first empty entry in the MAC table.
+ * Do not touch entries at the end of the table reserved for the VF MAC
+ * addresses.
+ */
+ for (i = 0; i < rar_entries; i++) {
+ if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
+ addr, 0))
+ continue;
+
+ ether_addr_copy(adapter->mac_table[i].addr, addr);
+ adapter->mac_table[i].queue = queue;
+ adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE;
+
+ igc_rar_set_index(adapter, i);
+ return i;
+ }
+
+ return -ENOSPC;
+}
+
+/* Remove a MAC filter for 'addr' directing matching traffic to
+ * 'queue', 'flags' is used to indicate what kind of match need to be
+ * removed, match is by default for the destination address, if
+ * matching by source address is to be removed the flag
+ * IGC_MAC_STATE_SRC_ADDR can be used.
+ */
+static int igc_del_mac_filter(struct igc_adapter *adapter,
+ const u8 *addr, const u8 queue)
+{
+ struct igc_hw *hw = &adapter->hw;
+ int rar_entries = hw->mac.rar_entry_count;
+ int i;
+
+ if (is_zero_ether_addr(addr))
+ return -EINVAL;
+
+ /* Search for matching entry in the MAC table based on given address
+ * and queue. Do not touch entries at the end of the table reserved
+ * for the VF MAC addresses.
+ */
+ for (i = 0; i < rar_entries; i++) {
+ if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
+ continue;
+ if (adapter->mac_table[i].state != 0)
+ continue;
+ if (adapter->mac_table[i].queue != queue)
+ continue;
+ if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
+ continue;
+
+ /* When a filter for the default address is "deleted",
+ * we return it to its initial configuration
+ */
+ if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
+ adapter->mac_table[i].state =
+ IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+ adapter->mac_table[i].queue = 0;
+ } else {
+ adapter->mac_table[i].state = 0;
+ adapter->mac_table[i].queue = 0;
+ memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+ }
+
+ igc_rar_set_index(adapter, i);
+ return 0;
+ }
+
+ return -ENOENT;
+}
+
+static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ int ret;
+
+ ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues);
+
+ return min_t(int, ret, 0);
+}
+
+static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+
+ igc_del_mac_filter(adapter, addr, adapter->num_rx_queues);
+
+ return 0;
+}
+
+/**
+ * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
+ * @netdev: network interface device structure
+ *
+ * The set_rx_mode entry point is called whenever the unicast or multicast
+ * address lists or the network interface flags are updated. This routine is
+ * responsible for configuring the hardware for proper unicast, multicast,
+ * promiscuous mode, and all-multi behavior.
+ */
+static void igc_set_rx_mode(struct net_device *netdev)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct igc_hw *hw = &adapter->hw;
+ u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
+ int count;
+
+ /* Check for Promiscuous and All Multicast modes */
+ if (netdev->flags & IFF_PROMISC) {
+ rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
+ } else {
+ if (netdev->flags & IFF_ALLMULTI) {
+ rctl |= IGC_RCTL_MPE;
+ } else {
+ /* Write addresses to the MTA, if the attempt fails
+ * then we should just turn on promiscuous mode so
+ * that we can at least receive multicast traffic
+ */
+ count = igc_write_mc_addr_list(netdev);
+ if (count < 0)
+ rctl |= IGC_RCTL_MPE;
+ }
+ }
+
+ /* Write addresses to available RAR registers, if there is not
+ * sufficient space to store all the addresses then enable
+ * unicast promiscuous mode
+ */
+ if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
+ rctl |= IGC_RCTL_UPE;
+
+ /* update state of unicast and multicast */
+ rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
+ wr32(IGC_RCTL, rctl);
+
+#if (PAGE_SIZE < 8192)
+ if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
+ rlpml = IGC_MAX_FRAME_BUILD_SKB;
+#endif
+ wr32(IGC_RLPML, rlpml);
+}
+
+/**
+ * igc_configure - configure the hardware for RX and TX
+ * @adapter: private board structure
+ */
+static void igc_configure(struct igc_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ int i = 0;
+
+ igc_get_hw_control(adapter);
+ igc_set_rx_mode(netdev);
+
+ igc_setup_tctl(adapter);
+ igc_setup_mrqc(adapter);
+ igc_setup_rctl(adapter);
+
+ igc_nfc_filter_restore(adapter);
+ igc_configure_tx(adapter);
+ igc_configure_rx(adapter);
+
+ igc_rx_fifo_flush_base(&adapter->hw);
+
+ /* call igc_desc_unused which always leaves
+ * at least 1 descriptor unused to make sure
+ * next_to_use != next_to_clean
+ */
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct igc_ring *ring = adapter->rx_ring[i];
+
+ igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
+ }
+}
+
+/**
+ * igc_write_ivar - configure ivar for given MSI-X vector
+ * @hw: pointer to the HW structure
+ * @msix_vector: vector number we are allocating to a given ring
+ * @index: row index of IVAR register to write within IVAR table
+ * @offset: column offset of in IVAR, should be multiple of 8
+ *
+ * The IVAR table consists of 2 columns,
+ * each containing an cause allocation for an Rx and Tx ring, and a
+ * variable number of rows depending on the number of queues supported.
+ */
+static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
+ int index, int offset)
+{
+ u32 ivar = array_rd32(IGC_IVAR0, index);
+
+ /* clear any bits that are currently set */
+ ivar &= ~((u32)0xFF << offset);
+
+ /* write vector and valid bit */
+ ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
+
+ array_wr32(IGC_IVAR0, index, ivar);
+}
+
+static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ struct igc_hw *hw = &adapter->hw;
+ int rx_queue = IGC_N0_QUEUE;
+ int tx_queue = IGC_N0_QUEUE;
+
+ if (q_vector->rx.ring)
+ rx_queue = q_vector->rx.ring->reg_idx;
+ if (q_vector->tx.ring)
+ tx_queue = q_vector->tx.ring->reg_idx;
+
+ switch (hw->mac.type) {
+ case igc_i225:
+ if (rx_queue > IGC_N0_QUEUE)
+ igc_write_ivar(hw, msix_vector,
+ rx_queue >> 1,
+ (rx_queue & 0x1) << 4);
+ if (tx_queue > IGC_N0_QUEUE)
+ igc_write_ivar(hw, msix_vector,
+ tx_queue >> 1,
+ ((tx_queue & 0x1) << 4) + 8);
+ q_vector->eims_value = BIT(msix_vector);
+ break;
+ default:
+ WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
+ break;
+ }
+
+ /* add q_vector eims value to global eims_enable_mask */
+ adapter->eims_enable_mask |= q_vector->eims_value;
+
+ /* configure q_vector to set itr on first interrupt */
+ q_vector->set_itr = 1;
+}
+
+/**
+ * igc_configure_msix - Configure MSI-X hardware
+ * @adapter: Pointer to adapter structure
+ *
+ * igc_configure_msix sets up the hardware to properly
+ * generate MSI-X interrupts.
+ */
+static void igc_configure_msix(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ int i, vector = 0;
+ u32 tmp;
+
+ adapter->eims_enable_mask = 0;
+
+ /* set vector for other causes, i.e. link changes */
+ switch (hw->mac.type) {
+ case igc_i225:
+ /* Turn on MSI-X capability first, or our settings
+ * won't stick. And it will take days to debug.
+ */
+ wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
+ IGC_GPIE_PBA | IGC_GPIE_EIAME |
+ IGC_GPIE_NSICR);
+
+ /* enable msix_other interrupt */
+ adapter->eims_other = BIT(vector);
+ tmp = (vector++ | IGC_IVAR_VALID) << 8;
+
+ wr32(IGC_IVAR_MISC, tmp);
+ break;
+ default:
+ /* do nothing, since nothing else supports MSI-X */
+ break;
+ } /* switch (hw->mac.type) */
+
+ adapter->eims_enable_mask |= adapter->eims_other;
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ igc_assign_vector(adapter->q_vector[i], vector++);
+
+ wrfl();
+}
+
+/**
+ * igc_irq_enable - Enable default interrupt generation settings
+ * @adapter: board private structure
+ */
+static void igc_irq_enable(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+
+ if (adapter->msix_entries) {
+ u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
+ u32 regval = rd32(IGC_EIAC);
+
+ wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
+ regval = rd32(IGC_EIAM);
+ wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
+ wr32(IGC_EIMS, adapter->eims_enable_mask);
+ wr32(IGC_IMS, ims);
+ } else {
+ wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+ wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
+ }
+}
+
+/**
+ * igc_irq_disable - Mask off interrupt generation on the NIC
+ * @adapter: board private structure
+ */
+static void igc_irq_disable(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+
+ if (adapter->msix_entries) {
+ u32 regval = rd32(IGC_EIAM);
+
+ wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
+ wr32(IGC_EIMC, adapter->eims_enable_mask);
+ regval = rd32(IGC_EIAC);
+ wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
+ }
+
+ wr32(IGC_IAM, 0);
+ wr32(IGC_IMC, ~0);
+ wrfl();
+
+ if (adapter->msix_entries) {
+ int vector = 0, i;
+
+ synchronize_irq(adapter->msix_entries[vector++].vector);
+
+ for (i = 0; i < adapter->num_q_vectors; i++)
+ synchronize_irq(adapter->msix_entries[vector++].vector);
+ } else {
+ synchronize_irq(adapter->pdev->irq);
+ }
+}
+
+void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
+ const u32 max_rss_queues)
+{
+ /* Determine if we need to pair queues. */
+ /* If rss_queues > half of max_rss_queues, pair the queues in
+ * order to conserve interrupts due to limited supply.
+ */
+ if (adapter->rss_queues > (max_rss_queues / 2))
+ adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+ else
+ adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
+}
+
+unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
+{
+ unsigned int max_rss_queues;
+
+ /* Determine the maximum number of RSS queues supported. */
+ max_rss_queues = IGC_MAX_RX_QUEUES;
+
+ return max_rss_queues;
+}
+
+static void igc_init_queue_configuration(struct igc_adapter *adapter)
+{
+ u32 max_rss_queues;
+
+ max_rss_queues = igc_get_max_rss_queues(adapter);
+ adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
+
+ igc_set_flag_queue_pairs(adapter, max_rss_queues);
+}
+
+/**
+ * igc_reset_q_vector - Reset config for interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be reset
+ *
+ * If NAPI is enabled it will delete any references to the
+ * NAPI struct. This is preparation for igc_free_q_vector.
+ */
+static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+ struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+ /* if we're coming from igc_set_interrupt_capability, the vectors are
+ * not yet allocated
+ */
+ if (!q_vector)
+ return;
+
+ if (q_vector->tx.ring)
+ adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
+
+ if (q_vector->rx.ring)
+ adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
+
+ netif_napi_del(&q_vector->napi);
+}
+
+/**
+ * igc_free_q_vector - Free memory allocated for specific interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.
+ */
+static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
+{
+ struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
+
+ adapter->q_vector[v_idx] = NULL;
+
+ /* igc_get_stats64() might access the rings on this vector,
+ * we must wait a grace period before freeing it.
+ */
+ if (q_vector)
+ kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * igc_free_q_vectors - Free memory allocated for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors. In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ */
+static void igc_free_q_vectors(struct igc_adapter *adapter)
+{
+ int v_idx = adapter->num_q_vectors;
+
+ adapter->num_tx_queues = 0;
+ adapter->num_rx_queues = 0;
+ adapter->num_q_vectors = 0;
+
+ while (v_idx--) {
+ igc_reset_q_vector(adapter, v_idx);
+ igc_free_q_vector(adapter, v_idx);
+ }
+}
+
+/**
+ * igc_update_itr - update the dynamic ITR value based on statistics
+ * @q_vector: pointer to q_vector
+ * @ring_container: ring info to update the itr for
+ *
+ * Stores a new ITR value based on packets and byte
+ * counts during the last interrupt. The advantage of per interrupt
+ * computation is faster updates and more accurate ITR for the current
+ * traffic pattern. Constants in this function were computed
+ * based on theoretical maximum wire speed and thresholds were set based
+ * on testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ * NOTE: These calculations are only valid when operating in a single-
+ * queue environment.
+ */
+static void igc_update_itr(struct igc_q_vector *q_vector,
+ struct igc_ring_container *ring_container)
+{
+ unsigned int packets = ring_container->total_packets;
+ unsigned int bytes = ring_container->total_bytes;
+ u8 itrval = ring_container->itr;
+
+ /* no packets, exit with status unchanged */
+ if (packets == 0)
+ return;
+
+ switch (itrval) {
+ case lowest_latency:
+ /* handle TSO and jumbo frames */
+ if (bytes / packets > 8000)
+ itrval = bulk_latency;
+ else if ((packets < 5) && (bytes > 512))
+ itrval = low_latency;
+ break;
+ case low_latency: /* 50 usec aka 20000 ints/s */
+ if (bytes > 10000) {
+ /* this if handles the TSO accounting */
+ if (bytes / packets > 8000)
+ itrval = bulk_latency;
+ else if ((packets < 10) || ((bytes / packets) > 1200))
+ itrval = bulk_latency;
+ else if ((packets > 35))
+ itrval = lowest_latency;
+ } else if (bytes / packets > 2000) {
+ itrval = bulk_latency;
+ } else if (packets <= 2 && bytes < 512) {
+ itrval = lowest_latency;
+ }
+ break;
+ case bulk_latency: /* 250 usec aka 4000 ints/s */
+ if (bytes > 25000) {
+ if (packets > 35)
+ itrval = low_latency;
+ } else if (bytes < 1500) {
+ itrval = low_latency;
+ }
+ break;
+ }
+
+ /* clear work counters since we have the values we need */
+ ring_container->total_bytes = 0;
+ ring_container->total_packets = 0;
+
+ /* write updated itr to ring container */
+ ring_container->itr = itrval;
+}
+
+static void igc_set_itr(struct igc_q_vector *q_vector)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ u32 new_itr = q_vector->itr_val;
+ u8 current_itr = 0;
+
+ /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ case SPEED_100:
+ current_itr = 0;
+ new_itr = IGC_4K_ITR;
+ goto set_itr_now;
+ default:
+ break;
+ }
+
+ igc_update_itr(q_vector, &q_vector->tx);
+ igc_update_itr(q_vector, &q_vector->rx);
+
+ current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
+
+ /* conservative mode (itr 3) eliminates the lowest_latency setting */
+ if (current_itr == lowest_latency &&
+ ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+ (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+ current_itr = low_latency;
+
+ switch (current_itr) {
+ /* counts and packets in update_itr are dependent on these numbers */
+ case lowest_latency:
+ new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
+ break;
+ case low_latency:
+ new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
+ break;
+ case bulk_latency:
+ new_itr = IGC_4K_ITR; /* 4,000 ints/sec */
+ break;
+ default:
+ break;
+ }
+
+set_itr_now:
+ if (new_itr != q_vector->itr_val) {
+ /* this attempts to bias the interrupt rate towards Bulk
+ * by adding intermediate steps when interrupt rate is
+ * increasing
+ */
+ new_itr = new_itr > q_vector->itr_val ?
+ max((new_itr * q_vector->itr_val) /
+ (new_itr + (q_vector->itr_val >> 2)),
+ new_itr) : new_itr;
+ /* Don't write the value here; it resets the adapter's
+ * internal timer, and causes us to delay far longer than
+ * we should between interrupts. Instead, we write the ITR
+ * value at the beginning of the next interrupt so the timing
+ * ends up being correct.
+ */
+ q_vector->itr_val = new_itr;
+ q_vector->set_itr = 1;
+ }
+}
+
+static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
+{
+ int v_idx = adapter->num_q_vectors;
+
+ if (adapter->msix_entries) {
+ pci_disable_msix(adapter->pdev);
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+ } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
+ pci_disable_msi(adapter->pdev);
+ }
+
+ while (v_idx--)
+ igc_reset_q_vector(adapter, v_idx);
+}
+
+/**
+ * igc_set_interrupt_capability - set MSI or MSI-X if supported
+ * @adapter: Pointer to adapter structure
+ * @msix: boolean value for MSI-X capability
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ */
+static void igc_set_interrupt_capability(struct igc_adapter *adapter,
+ bool msix)
+{
+ int numvecs, i;
+ int err;
+
+ if (!msix)
+ goto msi_only;
+ adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+ /* Number of supported queues. */
+ adapter->num_rx_queues = adapter->rss_queues;
+
+ adapter->num_tx_queues = adapter->rss_queues;
+
+ /* start with one vector for every Rx queue */
+ numvecs = adapter->num_rx_queues;
+
+ /* if Tx handler is separate add 1 for every Tx queue */
+ if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
+ numvecs += adapter->num_tx_queues;
+
+ /* store the number of vectors reserved for queues */
+ adapter->num_q_vectors = numvecs;
+
+ /* add 1 vector for link status interrupts */
+ numvecs++;
+
+ adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
+ GFP_KERNEL);
+
+ if (!adapter->msix_entries)
+ return;
+
+ /* populate entry values */
+ for (i = 0; i < numvecs; i++)
+ adapter->msix_entries[i].entry = i;
+
+ err = pci_enable_msix_range(adapter->pdev,
+ adapter->msix_entries,
+ numvecs,
+ numvecs);
+ if (err > 0)
+ return;
+
+ kfree(adapter->msix_entries);
+ adapter->msix_entries = NULL;
+
+ igc_reset_interrupt_capability(adapter);
+
+msi_only:
+ adapter->flags &= ~IGC_FLAG_HAS_MSIX;
+
+ adapter->rss_queues = 1;
+ adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+ adapter->num_rx_queues = 1;
+ adapter->num_tx_queues = 1;
+ adapter->num_q_vectors = 1;
+ if (!pci_enable_msi(adapter->pdev))
+ adapter->flags |= IGC_FLAG_HAS_MSI;
+}
+
+/**
+ * igc_update_ring_itr - update the dynamic ITR value based on packet size
+ * @q_vector: pointer to q_vector
+ *
+ * Stores a new ITR value based on strictly on packet size. This
+ * algorithm is less sophisticated than that used in igc_update_itr,
+ * due to the difficulty of synchronizing statistics across multiple
+ * receive rings. The divisors and thresholds used by this function
+ * were determined based on theoretical maximum wire speed and testing
+ * data, in order to minimize response time while increasing bulk
+ * throughput.
+ * NOTE: This function is called only when operating in a multiqueue
+ * receive environment.
+ */
+static void igc_update_ring_itr(struct igc_q_vector *q_vector)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ int new_val = q_vector->itr_val;
+ int avg_wire_size = 0;
+ unsigned int packets;
+
+ /* For non-gigabit speeds, just fix the interrupt rate at 4000
+ * ints/sec - ITR timer value of 120 ticks.
+ */
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ case SPEED_100:
+ new_val = IGC_4K_ITR;
+ goto set_itr_val;
+ default:
+ break;
+ }
+
+ packets = q_vector->rx.total_packets;
+ if (packets)
+ avg_wire_size = q_vector->rx.total_bytes / packets;
+
+ packets = q_vector->tx.total_packets;
+ if (packets)
+ avg_wire_size = max_t(u32, avg_wire_size,
+ q_vector->tx.total_bytes / packets);
+
+ /* if avg_wire_size isn't set no work was done */
+ if (!avg_wire_size)
+ goto clear_counts;
+
+ /* Add 24 bytes to size to account for CRC, preamble, and gap */
+ avg_wire_size += 24;
+
+ /* Don't starve jumbo frames */
+ avg_wire_size = min(avg_wire_size, 3000);
+
+ /* Give a little boost to mid-size frames */
+ if (avg_wire_size > 300 && avg_wire_size < 1200)
+ new_val = avg_wire_size / 3;
+ else
+ new_val = avg_wire_size / 2;
+
+ /* conservative mode (itr 3) eliminates the lowest_latency setting */
+ if (new_val < IGC_20K_ITR &&
+ ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
+ (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
+ new_val = IGC_20K_ITR;
+
+set_itr_val:
+ if (new_val != q_vector->itr_val) {
+ q_vector->itr_val = new_val;
+ q_vector->set_itr = 1;
+ }
+clear_counts:
+ q_vector->rx.total_bytes = 0;
+ q_vector->rx.total_packets = 0;
+ q_vector->tx.total_bytes = 0;
+ q_vector->tx.total_packets = 0;
+}
+
+static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ struct igc_hw *hw = &adapter->hw;
+
+ if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
+ (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
+ if (adapter->num_q_vectors == 1)
+ igc_set_itr(q_vector);
+ else
+ igc_update_ring_itr(q_vector);
+ }
+
+ if (!test_bit(__IGC_DOWN, &adapter->state)) {
+ if (adapter->msix_entries)
+ wr32(IGC_EIMS, q_vector->eims_value);
+ else
+ igc_irq_enable(adapter);
+ }
+}
+
+static void igc_add_ring(struct igc_ring *ring,
+ struct igc_ring_container *head)
+{
+ head->ring = ring;
+ head->count++;
+}
+
+/**
+ * igc_cache_ring_register - Descriptor ring to register mapping
+ * @adapter: board private structure to initialize
+ *
+ * Once we know the feature-set enabled for the device, we'll cache
+ * the register offset the descriptor ring is assigned to.
+ */
+static void igc_cache_ring_register(struct igc_adapter *adapter)
+{
+ int i = 0, j = 0;
+
+ switch (adapter->hw.mac.type) {
+ case igc_i225:
+ /* Fall through */
+ default:
+ for (; i < adapter->num_rx_queues; i++)
+ adapter->rx_ring[i]->reg_idx = i;
+ for (; j < adapter->num_tx_queues; j++)
+ adapter->tx_ring[j]->reg_idx = j;
+ break;
+ }
+}
+
+/**
+ * igc_poll - NAPI Rx polling callback
+ * @napi: napi polling structure
+ * @budget: count of how many packets we should handle
+ */
+static int igc_poll(struct napi_struct *napi, int budget)
+{
+ struct igc_q_vector *q_vector = container_of(napi,
+ struct igc_q_vector,
+ napi);
+ bool clean_complete = true;
+ int work_done = 0;
+
+ if (q_vector->tx.ring)
+ clean_complete = igc_clean_tx_irq(q_vector, budget);
+
+ if (q_vector->rx.ring) {
+ int cleaned = igc_clean_rx_irq(q_vector, budget);
+
+ work_done += cleaned;
+ if (cleaned >= budget)
+ clean_complete = false;
+ }
+
+ /* If all work not completed, return budget and keep polling */
+ if (!clean_complete)
+ return budget;
+
+ /* Exit the polling mode, but don't re-enable interrupts if stack might
+ * poll us due to busy-polling
+ */
+ if (likely(napi_complete_done(napi, work_done)))
+ igc_ring_irq_enable(q_vector);
+
+ return min(work_done, budget - 1);
+}
+
+/**
+ * igc_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @adapter: board private structure to initialize
+ * @v_count: q_vectors allocated on adapter, used for ring interleaving
+ * @v_idx: index of vector in adapter struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector. If allocation fails we return -ENOMEM.
+ */
+static int igc_alloc_q_vector(struct igc_adapter *adapter,
+ unsigned int v_count, unsigned int v_idx,
+ unsigned int txr_count, unsigned int txr_idx,
+ unsigned int rxr_count, unsigned int rxr_idx)
+{
+ struct igc_q_vector *q_vector;
+ struct igc_ring *ring;
+ int ring_count;
+
+ /* igc only supports 1 Tx and/or 1 Rx queue per vector */
+ if (txr_count > 1 || rxr_count > 1)
+ return -ENOMEM;
+
+ ring_count = txr_count + rxr_count;
+
+ /* allocate q_vector and rings */
+ q_vector = adapter->q_vector[v_idx];
+ if (!q_vector)
+ q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+ GFP_KERNEL);
+ else
+ memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
+ if (!q_vector)
+ return -ENOMEM;
+
+ /* initialize NAPI */
+ netif_napi_add(adapter->netdev, &q_vector->napi,
+ igc_poll, 64);
+
+ /* tie q_vector and adapter together */
+ adapter->q_vector[v_idx] = q_vector;
+ q_vector->adapter = adapter;
+
+ /* initialize work limits */
+ q_vector->tx.work_limit = adapter->tx_work_limit;
+
+ /* initialize ITR configuration */
+ q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
+ q_vector->itr_val = IGC_START_ITR;
+
+ /* initialize pointer to rings */
+ ring = q_vector->ring;
+
+ /* initialize ITR */
+ if (rxr_count) {
+ /* rx or rx/tx vector */
+ if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
+ q_vector->itr_val = adapter->rx_itr_setting;
+ } else {
+ /* tx only vector */
+ if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
+ q_vector->itr_val = adapter->tx_itr_setting;
+ }
+
+ if (txr_count) {
+ /* assign generic ring traits */
+ ring->dev = &adapter->pdev->dev;
+ ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Tx values */
+ igc_add_ring(ring, &q_vector->tx);
+
+ /* apply Tx specific ring traits */
+ ring->count = adapter->tx_ring_count;
+ ring->queue_index = txr_idx;
+
+ /* assign ring to adapter */
+ adapter->tx_ring[txr_idx] = ring;
+
+ /* push pointer to next ring */
+ ring++;
+ }
+
+ if (rxr_count) {
+ /* assign generic ring traits */
+ ring->dev = &adapter->pdev->dev;
+ ring->netdev = adapter->netdev;
+
+ /* configure backlink on ring */
+ ring->q_vector = q_vector;
+
+ /* update q_vector Rx values */
+ igc_add_ring(ring, &q_vector->rx);
+
+ /* apply Rx specific ring traits */
+ ring->count = adapter->rx_ring_count;
+ ring->queue_index = rxr_idx;
+
+ /* assign ring to adapter */
+ adapter->rx_ring[rxr_idx] = ring;
+ }
+
+ return 0;
+}
+
+/**
+ * igc_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @adapter: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt. If allocation fails we
+ * return -ENOMEM.
+ */
+static int igc_alloc_q_vectors(struct igc_adapter *adapter)
+{
+ int rxr_remaining = adapter->num_rx_queues;
+ int txr_remaining = adapter->num_tx_queues;
+ int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+ int q_vectors = adapter->num_q_vectors;
+ int err;
+
+ if (q_vectors >= (rxr_remaining + txr_remaining)) {
+ for (; rxr_remaining; v_idx++) {
+ err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+ 0, 0, 1, rxr_idx);
+
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining--;
+ rxr_idx++;
+ }
+ }
+
+ for (; v_idx < q_vectors; v_idx++) {
+ int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+ int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+
+ err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
+ tqpv, txr_idx, rqpv, rxr_idx);
+
+ if (err)
+ goto err_out;
+
+ /* update counts and index */
+ rxr_remaining -= rqpv;
+ txr_remaining -= tqpv;
+ rxr_idx++;
+ txr_idx++;
+ }
+
+ return 0;
+
+err_out:
+ adapter->num_tx_queues = 0;
+ adapter->num_rx_queues = 0;
+ adapter->num_q_vectors = 0;
+
+ while (v_idx--)
+ igc_free_q_vector(adapter, v_idx);
+
+ return -ENOMEM;
+}
+
+/**
+ * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
+ * @adapter: Pointer to adapter structure
+ * @msix: boolean for MSI-X capability
+ *
+ * This function initializes the interrupts and allocates all of the queues.
+ */
+static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
+{
+ struct pci_dev *pdev = adapter->pdev;
+ int err = 0;
+
+ igc_set_interrupt_capability(adapter, msix);
+
+ err = igc_alloc_q_vectors(adapter);
+ if (err) {
+ dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
+ goto err_alloc_q_vectors;
+ }
+
+ igc_cache_ring_register(adapter);
+
+ return 0;
+
+err_alloc_q_vectors:
+ igc_reset_interrupt_capability(adapter);
+ return err;
+}
+
+/**
+ * igc_sw_init - Initialize general software structures (struct igc_adapter)
+ * @adapter: board private structure to initialize
+ *
+ * igc_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ */
+static int igc_sw_init(struct igc_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct pci_dev *pdev = adapter->pdev;
+ struct igc_hw *hw = &adapter->hw;
+
+ int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
+
+ pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+
+ /* set default ring sizes */
+ adapter->tx_ring_count = IGC_DEFAULT_TXD;
+ adapter->rx_ring_count = IGC_DEFAULT_RXD;
+
+ /* set default ITR values */
+ adapter->rx_itr_setting = IGC_DEFAULT_ITR;
+ adapter->tx_itr_setting = IGC_DEFAULT_ITR;
+
+ /* set default work limits */
+ adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
+
+ /* adjust max frame to be at least the size of a standard frame */
+ adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
+ VLAN_HLEN;
+ adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+
+ spin_lock_init(&adapter->nfc_lock);
+ spin_lock_init(&adapter->stats64_lock);
+ /* Assume MSI-X interrupts, will be checked during IRQ allocation */
+ adapter->flags |= IGC_FLAG_HAS_MSIX;
+
+ adapter->mac_table = kzalloc(size, GFP_ATOMIC);
+ if (!adapter->mac_table)
+ return -ENOMEM;
+
+ igc_init_queue_configuration(adapter);
+
+ /* This call may decrease the number of queues */
+ if (igc_init_interrupt_scheme(adapter, true)) {
+ dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+ return -ENOMEM;
+ }
+
+ /* Explicitly disable IRQ since the NIC can be in any state. */
+ igc_irq_disable(adapter);
+
+ set_bit(__IGC_DOWN, &adapter->state);
+
+ return 0;
+}
+
/**
* igc_up - Open the interface and prepare it to handle traffic
* @adapter: board private structure
@@ -2163,18 +3354,6 @@ static void igc_nfc_filter_exit(struct igc_adapter *adapter)
spin_unlock(&adapter->nfc_lock);
}
-static void igc_nfc_filter_restore(struct igc_adapter *adapter)
-{
- struct igc_nfc_filter *rule;
-
- spin_lock(&adapter->nfc_lock);
-
- hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
- igc_add_filter(adapter, rule);
-
- spin_unlock(&adapter->nfc_lock);
-}
-
/**
* igc_down - Close the interface
* @adapter: board private structure
@@ -2398,105 +3577,6 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
return features;
}
-/**
- * igc_configure - configure the hardware for RX and TX
- * @adapter: private board structure
- */
-static void igc_configure(struct igc_adapter *adapter)
-{
- struct net_device *netdev = adapter->netdev;
- int i = 0;
-
- igc_get_hw_control(adapter);
- igc_set_rx_mode(netdev);
-
- igc_setup_tctl(adapter);
- igc_setup_mrqc(adapter);
- igc_setup_rctl(adapter);
-
- igc_nfc_filter_restore(adapter);
- igc_configure_tx(adapter);
- igc_configure_rx(adapter);
-
- igc_rx_fifo_flush_base(&adapter->hw);
-
- /* call igc_desc_unused which always leaves
- * at least 1 descriptor unused to make sure
- * next_to_use != next_to_clean
- */
- for (i = 0; i < adapter->num_rx_queues; i++) {
- struct igc_ring *ring = adapter->rx_ring[i];
-
- igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
- }
-}
-
-/**
- * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
- * @adapter: address of board private structure
- * @index: Index of the RAR entry which need to be synced with MAC table
- */
-static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
-{
- u8 *addr = adapter->mac_table[index].addr;
- struct igc_hw *hw = &adapter->hw;
- u32 rar_low, rar_high;
-
- /* HW expects these to be in network order when they are plugged
- * into the registers which are little endian. In order to guarantee
- * that ordering we need to do an leXX_to_cpup here in order to be
- * ready for the byteswap that occurs with writel
- */
- rar_low = le32_to_cpup((__le32 *)(addr));
- rar_high = le16_to_cpup((__le16 *)(addr + 4));
-
- /* Indicate to hardware the Address is Valid. */
- if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
- if (is_valid_ether_addr(addr))
- rar_high |= IGC_RAH_AV;
-
- rar_high |= IGC_RAH_POOL_1 <<
- adapter->mac_table[index].queue;
- }
-
- wr32(IGC_RAL(index), rar_low);
- wrfl();
- wr32(IGC_RAH(index), rar_high);
- wrfl();
-}
-
-/* Set default MAC address for the PF in the first RAR entry */
-static void igc_set_default_mac_filter(struct igc_adapter *adapter)
-{
- struct igc_mac_addr *mac_table = &adapter->mac_table[0];
-
- ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
- mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
-
- igc_rar_set_index(adapter, 0);
-}
-
-/* If the filter to be added and an already existing filter express
- * the same address and address type, it should be possible to only
- * override the other configurations, for example the queue to steer
- * traffic.
- */
-static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
- const u8 *addr, const u8 flags)
-{
- if (!(entry->state & IGC_MAC_STATE_IN_USE))
- return true;
-
- if ((entry->state & IGC_MAC_STATE_SRC_ADDR) !=
- (flags & IGC_MAC_STATE_SRC_ADDR))
- return false;
-
- if (!ether_addr_equal(addr, entry->addr))
- return false;
-
- return true;
-}
-
/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
* 'flags' is used to indicate what kind of match is made, match is by
* default for the destination address, if matching by source address
@@ -2597,159 +3677,20 @@ int igc_del_mac_steering_filter(struct igc_adapter *adapter,
IGC_MAC_STATE_QUEUE_STEERING | flags);
}
-/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
- * 'flags' is used to indicate what kind of match is made, match is by
- * default for the destination address, if matching by source address
- * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
- */
-static int igc_add_mac_filter(struct igc_adapter *adapter,
- const u8 *addr, const u8 queue)
-{
- struct igc_hw *hw = &adapter->hw;
- int rar_entries = hw->mac.rar_entry_count;
- int i;
-
- if (is_zero_ether_addr(addr))
- return -EINVAL;
-
- /* Search for the first empty entry in the MAC table.
- * Do not touch entries at the end of the table reserved for the VF MAC
- * addresses.
- */
- for (i = 0; i < rar_entries; i++) {
- if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
- addr, 0))
- continue;
-
- ether_addr_copy(adapter->mac_table[i].addr, addr);
- adapter->mac_table[i].queue = queue;
- adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE;
-
- igc_rar_set_index(adapter, i);
- return i;
- }
-
- return -ENOSPC;
-}
-
-/* Remove a MAC filter for 'addr' directing matching traffic to
- * 'queue', 'flags' is used to indicate what kind of match need to be
- * removed, match is by default for the destination address, if
- * matching by source address is to be removed the flag
- * IGC_MAC_STATE_SRC_ADDR can be used.
- */
-static int igc_del_mac_filter(struct igc_adapter *adapter,
- const u8 *addr, const u8 queue)
-{
- struct igc_hw *hw = &adapter->hw;
- int rar_entries = hw->mac.rar_entry_count;
- int i;
-
- if (is_zero_ether_addr(addr))
- return -EINVAL;
-
- /* Search for matching entry in the MAC table based on given address
- * and queue. Do not touch entries at the end of the table reserved
- * for the VF MAC addresses.
- */
- for (i = 0; i < rar_entries; i++) {
- if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
- continue;
- if (adapter->mac_table[i].state != 0)
- continue;
- if (adapter->mac_table[i].queue != queue)
- continue;
- if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
- continue;
-
- /* When a filter for the default address is "deleted",
- * we return it to its initial configuration
- */
- if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
- adapter->mac_table[i].state =
- IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
- adapter->mac_table[i].queue = 0;
- } else {
- adapter->mac_table[i].state = 0;
- adapter->mac_table[i].queue = 0;
- memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
- }
-
- igc_rar_set_index(adapter, i);
- return 0;
- }
-
- return -ENOENT;
-}
-
-static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
+static void igc_tsync_interrupt(struct igc_adapter *adapter)
{
- struct igc_adapter *adapter = netdev_priv(netdev);
- int ret;
-
- ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues);
-
- return min_t(int, ret, 0);
-}
-
-static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
-{
- struct igc_adapter *adapter = netdev_priv(netdev);
-
- igc_del_mac_filter(adapter, addr, adapter->num_rx_queues);
-
- return 0;
-}
-
-/**
- * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
- * @netdev: network interface device structure
- *
- * The set_rx_mode entry point is called whenever the unicast or multicast
- * address lists or the network interface flags are updated. This routine is
- * responsible for configuring the hardware for proper unicast, multicast,
- * promiscuous mode, and all-multi behavior.
- */
-static void igc_set_rx_mode(struct net_device *netdev)
-{
- struct igc_adapter *adapter = netdev_priv(netdev);
struct igc_hw *hw = &adapter->hw;
- u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
- int count;
+ u32 tsicr = rd32(IGC_TSICR);
+ u32 ack = 0;
- /* Check for Promiscuous and All Multicast modes */
- if (netdev->flags & IFF_PROMISC) {
- rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
- } else {
- if (netdev->flags & IFF_ALLMULTI) {
- rctl |= IGC_RCTL_MPE;
- } else {
- /* Write addresses to the MTA, if the attempt fails
- * then we should just turn on promiscuous mode so
- * that we can at least receive multicast traffic
- */
- count = igc_write_mc_addr_list(netdev);
- if (count < 0)
- rctl |= IGC_RCTL_MPE;
- }
+ if (tsicr & IGC_TSICR_TXTS) {
+ /* retrieve hardware timestamp */
+ schedule_work(&adapter->ptp_tx_work);
+ ack |= IGC_TSICR_TXTS;
}
- /* Write addresses to available RAR registers, if there is not
- * sufficient space to store all the addresses then enable
- * unicast promiscuous mode
- */
- if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
- rctl |= IGC_RCTL_UPE;
-
- /* update state of unicast and multicast */
- rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
- wr32(IGC_RCTL, rctl);
-
-#if (PAGE_SIZE < 8192)
- if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
- rlpml = IGC_MAX_FRAME_BUILD_SKB;
-#endif
- wr32(IGC_RLPML, rlpml);
+ /* acknowledge the interrupts */
+ wr32(IGC_TSICR, ack);
}
/**
@@ -2779,114 +3720,28 @@ static irqreturn_t igc_msix_other(int irq, void *data)
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
+ if (icr & IGC_ICR_TS)
+ igc_tsync_interrupt(adapter);
+
wr32(IGC_EIMS, adapter->eims_other);
return IRQ_HANDLED;
}
-/**
- * igc_write_ivar - configure ivar for given MSI-X vector
- * @hw: pointer to the HW structure
- * @msix_vector: vector number we are allocating to a given ring
- * @index: row index of IVAR register to write within IVAR table
- * @offset: column offset of in IVAR, should be multiple of 8
- *
- * The IVAR table consists of 2 columns,
- * each containing an cause allocation for an Rx and Tx ring, and a
- * variable number of rows depending on the number of queues supported.
- */
-static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
- int index, int offset)
-{
- u32 ivar = array_rd32(IGC_IVAR0, index);
-
- /* clear any bits that are currently set */
- ivar &= ~((u32)0xFF << offset);
-
- /* write vector and valid bit */
- ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
-
- array_wr32(IGC_IVAR0, index, ivar);
-}
-
-static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
-{
- struct igc_adapter *adapter = q_vector->adapter;
- struct igc_hw *hw = &adapter->hw;
- int rx_queue = IGC_N0_QUEUE;
- int tx_queue = IGC_N0_QUEUE;
-
- if (q_vector->rx.ring)
- rx_queue = q_vector->rx.ring->reg_idx;
- if (q_vector->tx.ring)
- tx_queue = q_vector->tx.ring->reg_idx;
-
- switch (hw->mac.type) {
- case igc_i225:
- if (rx_queue > IGC_N0_QUEUE)
- igc_write_ivar(hw, msix_vector,
- rx_queue >> 1,
- (rx_queue & 0x1) << 4);
- if (tx_queue > IGC_N0_QUEUE)
- igc_write_ivar(hw, msix_vector,
- tx_queue >> 1,
- ((tx_queue & 0x1) << 4) + 8);
- q_vector->eims_value = BIT(msix_vector);
- break;
- default:
- WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
- break;
- }
-
- /* add q_vector eims value to global eims_enable_mask */
- adapter->eims_enable_mask |= q_vector->eims_value;
-
- /* configure q_vector to set itr on first interrupt */
- q_vector->set_itr = 1;
-}
-
-/**
- * igc_configure_msix - Configure MSI-X hardware
- * @adapter: Pointer to adapter structure
- *
- * igc_configure_msix sets up the hardware to properly
- * generate MSI-X interrupts.
- */
-static void igc_configure_msix(struct igc_adapter *adapter)
+static void igc_write_itr(struct igc_q_vector *q_vector)
{
- struct igc_hw *hw = &adapter->hw;
- int i, vector = 0;
- u32 tmp;
-
- adapter->eims_enable_mask = 0;
-
- /* set vector for other causes, i.e. link changes */
- switch (hw->mac.type) {
- case igc_i225:
- /* Turn on MSI-X capability first, or our settings
- * won't stick. And it will take days to debug.
- */
- wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
- IGC_GPIE_PBA | IGC_GPIE_EIAME |
- IGC_GPIE_NSICR);
-
- /* enable msix_other interrupt */
- adapter->eims_other = BIT(vector);
- tmp = (vector++ | IGC_IVAR_VALID) << 8;
+ u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
- wr32(IGC_IVAR_MISC, tmp);
- break;
- default:
- /* do nothing, since nothing else supports MSI-X */
- break;
- } /* switch (hw->mac.type) */
+ if (!q_vector->set_itr)
+ return;
- adapter->eims_enable_mask |= adapter->eims_other;
+ if (!itr_val)
+ itr_val = IGC_ITR_VAL_MASK;
- for (i = 0; i < adapter->num_q_vectors; i++)
- igc_assign_vector(adapter->q_vector[i], vector++);
+ itr_val |= IGC_EITR_CNT_IGNR;
- wrfl();
+ writel(itr_val, q_vector->itr_register);
+ q_vector->set_itr = 0;
}
static irqreturn_t igc_msix_ring(int irq, void *data)
@@ -2961,49 +3816,6 @@ err_out:
}
/**
- * igc_reset_q_vector - Reset config for interrupt vector
- * @adapter: board private structure to initialize
- * @v_idx: Index of vector to be reset
- *
- * If NAPI is enabled it will delete any references to the
- * NAPI struct. This is preparation for igc_free_q_vector.
- */
-static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
-{
- struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
-
- /* if we're coming from igc_set_interrupt_capability, the vectors are
- * not yet allocated
- */
- if (!q_vector)
- return;
-
- if (q_vector->tx.ring)
- adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
-
- if (q_vector->rx.ring)
- adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
-
- netif_napi_del(&q_vector->napi);
-}
-
-static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
-{
- int v_idx = adapter->num_q_vectors;
-
- if (adapter->msix_entries) {
- pci_disable_msix(adapter->pdev);
- kfree(adapter->msix_entries);
- adapter->msix_entries = NULL;
- } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
- pci_disable_msi(adapter->pdev);
- }
-
- while (v_idx--)
- igc_reset_q_vector(adapter, v_idx);
-}
-
-/**
* igc_clear_interrupt_scheme - reset the device to a state of no interrupts
* @adapter: Pointer to adapter structure
*
@@ -3016,48 +3828,6 @@ static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
igc_reset_interrupt_capability(adapter);
}
-/**
- * igc_free_q_vectors - Free memory allocated for interrupt vectors
- * @adapter: board private structure to initialize
- *
- * This function frees the memory allocated to the q_vectors. In addition if
- * NAPI is enabled it will delete any references to the NAPI struct prior
- * to freeing the q_vector.
- */
-static void igc_free_q_vectors(struct igc_adapter *adapter)
-{
- int v_idx = adapter->num_q_vectors;
-
- adapter->num_tx_queues = 0;
- adapter->num_rx_queues = 0;
- adapter->num_q_vectors = 0;
-
- while (v_idx--) {
- igc_reset_q_vector(adapter, v_idx);
- igc_free_q_vector(adapter, v_idx);
- }
-}
-
-/**
- * igc_free_q_vector - Free memory allocated for specific interrupt vector
- * @adapter: board private structure to initialize
- * @v_idx: Index of vector to be freed
- *
- * This function frees the memory allocated to the q_vector.
- */
-static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
-{
- struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
-
- adapter->q_vector[v_idx] = NULL;
-
- /* igc_get_stats64() might access the rings on this vector,
- * we must wait a grace period before freeing it.
- */
- if (q_vector)
- kfree_rcu(q_vector, rcu);
-}
-
/* Need to wait a few seconds after link up to get diagnostic information from
* the phy
*/
@@ -3109,7 +3879,7 @@ bool igc_has_link(struct igc_adapter *adapter)
/**
* igc_watchdog - Timer Call-back
- * @data: pointer to adapter cast into an unsigned long
+ * @t: timer for the watchdog
*/
static void igc_watchdog(struct timer_list *t)
{
@@ -3282,6 +4052,8 @@ no_wait:
wr32(IGC_ICS, IGC_ICS_RXDMT0);
}
+ igc_ptp_tx_hang(adapter);
+
/* Reset the timer */
if (!test_bit(__IGC_DOWN, &adapter->state)) {
if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
@@ -3294,149 +4066,6 @@ no_wait:
}
/**
- * igc_update_ring_itr - update the dynamic ITR value based on packet size
- * @q_vector: pointer to q_vector
- *
- * Stores a new ITR value based on strictly on packet size. This
- * algorithm is less sophisticated than that used in igc_update_itr,
- * due to the difficulty of synchronizing statistics across multiple
- * receive rings. The divisors and thresholds used by this function
- * were determined based on theoretical maximum wire speed and testing
- * data, in order to minimize response time while increasing bulk
- * throughput.
- * NOTE: This function is called only when operating in a multiqueue
- * receive environment.
- */
-static void igc_update_ring_itr(struct igc_q_vector *q_vector)
-{
- struct igc_adapter *adapter = q_vector->adapter;
- int new_val = q_vector->itr_val;
- int avg_wire_size = 0;
- unsigned int packets;
-
- /* For non-gigabit speeds, just fix the interrupt rate at 4000
- * ints/sec - ITR timer value of 120 ticks.
- */
- switch (adapter->link_speed) {
- case SPEED_10:
- case SPEED_100:
- new_val = IGC_4K_ITR;
- goto set_itr_val;
- default:
- break;
- }
-
- packets = q_vector->rx.total_packets;
- if (packets)
- avg_wire_size = q_vector->rx.total_bytes / packets;
-
- packets = q_vector->tx.total_packets;
- if (packets)
- avg_wire_size = max_t(u32, avg_wire_size,
- q_vector->tx.total_bytes / packets);
-
- /* if avg_wire_size isn't set no work was done */
- if (!avg_wire_size)
- goto clear_counts;
-
- /* Add 24 bytes to size to account for CRC, preamble, and gap */
- avg_wire_size += 24;
-
- /* Don't starve jumbo frames */
- avg_wire_size = min(avg_wire_size, 3000);
-
- /* Give a little boost to mid-size frames */
- if (avg_wire_size > 300 && avg_wire_size < 1200)
- new_val = avg_wire_size / 3;
- else
- new_val = avg_wire_size / 2;
-
- /* conservative mode (itr 3) eliminates the lowest_latency setting */
- if (new_val < IGC_20K_ITR &&
- ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
- (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
- new_val = IGC_20K_ITR;
-
-set_itr_val:
- if (new_val != q_vector->itr_val) {
- q_vector->itr_val = new_val;
- q_vector->set_itr = 1;
- }
-clear_counts:
- q_vector->rx.total_bytes = 0;
- q_vector->rx.total_packets = 0;
- q_vector->tx.total_bytes = 0;
- q_vector->tx.total_packets = 0;
-}
-
-/**
- * igc_update_itr - update the dynamic ITR value based on statistics
- * @q_vector: pointer to q_vector
- * @ring_container: ring info to update the itr for
- *
- * Stores a new ITR value based on packets and byte
- * counts during the last interrupt. The advantage of per interrupt
- * computation is faster updates and more accurate ITR for the current
- * traffic pattern. Constants in this function were computed
- * based on theoretical maximum wire speed and thresholds were set based
- * on testing data as well as attempting to minimize response time
- * while increasing bulk throughput.
- * NOTE: These calculations are only valid when operating in a single-
- * queue environment.
- */
-static void igc_update_itr(struct igc_q_vector *q_vector,
- struct igc_ring_container *ring_container)
-{
- unsigned int packets = ring_container->total_packets;
- unsigned int bytes = ring_container->total_bytes;
- u8 itrval = ring_container->itr;
-
- /* no packets, exit with status unchanged */
- if (packets == 0)
- return;
-
- switch (itrval) {
- case lowest_latency:
- /* handle TSO and jumbo frames */
- if (bytes / packets > 8000)
- itrval = bulk_latency;
- else if ((packets < 5) && (bytes > 512))
- itrval = low_latency;
- break;
- case low_latency: /* 50 usec aka 20000 ints/s */
- if (bytes > 10000) {
- /* this if handles the TSO accounting */
- if (bytes / packets > 8000)
- itrval = bulk_latency;
- else if ((packets < 10) || ((bytes / packets) > 1200))
- itrval = bulk_latency;
- else if ((packets > 35))
- itrval = lowest_latency;
- } else if (bytes / packets > 2000) {
- itrval = bulk_latency;
- } else if (packets <= 2 && bytes < 512) {
- itrval = lowest_latency;
- }
- break;
- case bulk_latency: /* 250 usec aka 4000 ints/s */
- if (bytes > 25000) {
- if (packets > 35)
- itrval = low_latency;
- } else if (bytes < 1500) {
- itrval = low_latency;
- }
- break;
- }
-
- /* clear work counters since we have the values we need */
- ring_container->total_bytes = 0;
- ring_container->total_packets = 0;
-
- /* write updated itr to ring container */
- ring_container->itr = itrval;
-}
-
-/**
* igc_intr_msi - Interrupt Handler
* @irq: interrupt number
* @data: pointer to a network interface device structure
@@ -3513,424 +4142,6 @@ static irqreturn_t igc_intr(int irq, void *data)
return IRQ_HANDLED;
}
-static void igc_set_itr(struct igc_q_vector *q_vector)
-{
- struct igc_adapter *adapter = q_vector->adapter;
- u32 new_itr = q_vector->itr_val;
- u8 current_itr = 0;
-
- /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
- switch (adapter->link_speed) {
- case SPEED_10:
- case SPEED_100:
- current_itr = 0;
- new_itr = IGC_4K_ITR;
- goto set_itr_now;
- default:
- break;
- }
-
- igc_update_itr(q_vector, &q_vector->tx);
- igc_update_itr(q_vector, &q_vector->rx);
-
- current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
-
- /* conservative mode (itr 3) eliminates the lowest_latency setting */
- if (current_itr == lowest_latency &&
- ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
- (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
- current_itr = low_latency;
-
- switch (current_itr) {
- /* counts and packets in update_itr are dependent on these numbers */
- case lowest_latency:
- new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
- break;
- case low_latency:
- new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
- break;
- case bulk_latency:
- new_itr = IGC_4K_ITR; /* 4,000 ints/sec */
- break;
- default:
- break;
- }
-
-set_itr_now:
- if (new_itr != q_vector->itr_val) {
- /* this attempts to bias the interrupt rate towards Bulk
- * by adding intermediate steps when interrupt rate is
- * increasing
- */
- new_itr = new_itr > q_vector->itr_val ?
- max((new_itr * q_vector->itr_val) /
- (new_itr + (q_vector->itr_val >> 2)),
- new_itr) : new_itr;
- /* Don't write the value here; it resets the adapter's
- * internal timer, and causes us to delay far longer than
- * we should between interrupts. Instead, we write the ITR
- * value at the beginning of the next interrupt so the timing
- * ends up being correct.
- */
- q_vector->itr_val = new_itr;
- q_vector->set_itr = 1;
- }
-}
-
-static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
-{
- struct igc_adapter *adapter = q_vector->adapter;
- struct igc_hw *hw = &adapter->hw;
-
- if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
- (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
- if (adapter->num_q_vectors == 1)
- igc_set_itr(q_vector);
- else
- igc_update_ring_itr(q_vector);
- }
-
- if (!test_bit(__IGC_DOWN, &adapter->state)) {
- if (adapter->msix_entries)
- wr32(IGC_EIMS, q_vector->eims_value);
- else
- igc_irq_enable(adapter);
- }
-}
-
-/**
- * igc_poll - NAPI Rx polling callback
- * @napi: napi polling structure
- * @budget: count of how many packets we should handle
- */
-static int igc_poll(struct napi_struct *napi, int budget)
-{
- struct igc_q_vector *q_vector = container_of(napi,
- struct igc_q_vector,
- napi);
- bool clean_complete = true;
- int work_done = 0;
-
- if (q_vector->tx.ring)
- clean_complete = igc_clean_tx_irq(q_vector, budget);
-
- if (q_vector->rx.ring) {
- int cleaned = igc_clean_rx_irq(q_vector, budget);
-
- work_done += cleaned;
- if (cleaned >= budget)
- clean_complete = false;
- }
-
- /* If all work not completed, return budget and keep polling */
- if (!clean_complete)
- return budget;
-
- /* Exit the polling mode, but don't re-enable interrupts if stack might
- * poll us due to busy-polling
- */
- if (likely(napi_complete_done(napi, work_done)))
- igc_ring_irq_enable(q_vector);
-
- return min(work_done, budget - 1);
-}
-
-/**
- * igc_set_interrupt_capability - set MSI or MSI-X if supported
- * @adapter: Pointer to adapter structure
- *
- * Attempt to configure interrupts using the best available
- * capabilities of the hardware and kernel.
- */
-static void igc_set_interrupt_capability(struct igc_adapter *adapter,
- bool msix)
-{
- int numvecs, i;
- int err;
-
- if (!msix)
- goto msi_only;
- adapter->flags |= IGC_FLAG_HAS_MSIX;
-
- /* Number of supported queues. */
- adapter->num_rx_queues = adapter->rss_queues;
-
- adapter->num_tx_queues = adapter->rss_queues;
-
- /* start with one vector for every Rx queue */
- numvecs = adapter->num_rx_queues;
-
- /* if Tx handler is separate add 1 for every Tx queue */
- if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
- numvecs += adapter->num_tx_queues;
-
- /* store the number of vectors reserved for queues */
- adapter->num_q_vectors = numvecs;
-
- /* add 1 vector for link status interrupts */
- numvecs++;
-
- adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
- GFP_KERNEL);
-
- if (!adapter->msix_entries)
- return;
-
- /* populate entry values */
- for (i = 0; i < numvecs; i++)
- adapter->msix_entries[i].entry = i;
-
- err = pci_enable_msix_range(adapter->pdev,
- adapter->msix_entries,
- numvecs,
- numvecs);
- if (err > 0)
- return;
-
- kfree(adapter->msix_entries);
- adapter->msix_entries = NULL;
-
- igc_reset_interrupt_capability(adapter);
-
-msi_only:
- adapter->flags &= ~IGC_FLAG_HAS_MSIX;
-
- adapter->rss_queues = 1;
- adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
- adapter->num_rx_queues = 1;
- adapter->num_tx_queues = 1;
- adapter->num_q_vectors = 1;
- if (!pci_enable_msi(adapter->pdev))
- adapter->flags |= IGC_FLAG_HAS_MSI;
-}
-
-static void igc_add_ring(struct igc_ring *ring,
- struct igc_ring_container *head)
-{
- head->ring = ring;
- head->count++;
-}
-
-/**
- * igc_alloc_q_vector - Allocate memory for a single interrupt vector
- * @adapter: board private structure to initialize
- * @v_count: q_vectors allocated on adapter, used for ring interleaving
- * @v_idx: index of vector in adapter struct
- * @txr_count: total number of Tx rings to allocate
- * @txr_idx: index of first Tx ring to allocate
- * @rxr_count: total number of Rx rings to allocate
- * @rxr_idx: index of first Rx ring to allocate
- *
- * We allocate one q_vector. If allocation fails we return -ENOMEM.
- */
-static int igc_alloc_q_vector(struct igc_adapter *adapter,
- unsigned int v_count, unsigned int v_idx,
- unsigned int txr_count, unsigned int txr_idx,
- unsigned int rxr_count, unsigned int rxr_idx)
-{
- struct igc_q_vector *q_vector;
- struct igc_ring *ring;
- int ring_count;
-
- /* igc only supports 1 Tx and/or 1 Rx queue per vector */
- if (txr_count > 1 || rxr_count > 1)
- return -ENOMEM;
-
- ring_count = txr_count + rxr_count;
-
- /* allocate q_vector and rings */
- q_vector = adapter->q_vector[v_idx];
- if (!q_vector)
- q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
- GFP_KERNEL);
- else
- memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
- if (!q_vector)
- return -ENOMEM;
-
- /* initialize NAPI */
- netif_napi_add(adapter->netdev, &q_vector->napi,
- igc_poll, 64);
-
- /* tie q_vector and adapter together */
- adapter->q_vector[v_idx] = q_vector;
- q_vector->adapter = adapter;
-
- /* initialize work limits */
- q_vector->tx.work_limit = adapter->tx_work_limit;
-
- /* initialize ITR configuration */
- q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
- q_vector->itr_val = IGC_START_ITR;
-
- /* initialize pointer to rings */
- ring = q_vector->ring;
-
- /* initialize ITR */
- if (rxr_count) {
- /* rx or rx/tx vector */
- if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
- q_vector->itr_val = adapter->rx_itr_setting;
- } else {
- /* tx only vector */
- if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
- q_vector->itr_val = adapter->tx_itr_setting;
- }
-
- if (txr_count) {
- /* assign generic ring traits */
- ring->dev = &adapter->pdev->dev;
- ring->netdev = adapter->netdev;
-
- /* configure backlink on ring */
- ring->q_vector = q_vector;
-
- /* update q_vector Tx values */
- igc_add_ring(ring, &q_vector->tx);
-
- /* apply Tx specific ring traits */
- ring->count = adapter->tx_ring_count;
- ring->queue_index = txr_idx;
-
- /* assign ring to adapter */
- adapter->tx_ring[txr_idx] = ring;
-
- /* push pointer to next ring */
- ring++;
- }
-
- if (rxr_count) {
- /* assign generic ring traits */
- ring->dev = &adapter->pdev->dev;
- ring->netdev = adapter->netdev;
-
- /* configure backlink on ring */
- ring->q_vector = q_vector;
-
- /* update q_vector Rx values */
- igc_add_ring(ring, &q_vector->rx);
-
- /* apply Rx specific ring traits */
- ring->count = adapter->rx_ring_count;
- ring->queue_index = rxr_idx;
-
- /* assign ring to adapter */
- adapter->rx_ring[rxr_idx] = ring;
- }
-
- return 0;
-}
-
-/**
- * igc_alloc_q_vectors - Allocate memory for interrupt vectors
- * @adapter: board private structure to initialize
- *
- * We allocate one q_vector per queue interrupt. If allocation fails we
- * return -ENOMEM.
- */
-static int igc_alloc_q_vectors(struct igc_adapter *adapter)
-{
- int rxr_remaining = adapter->num_rx_queues;
- int txr_remaining = adapter->num_tx_queues;
- int rxr_idx = 0, txr_idx = 0, v_idx = 0;
- int q_vectors = adapter->num_q_vectors;
- int err;
-
- if (q_vectors >= (rxr_remaining + txr_remaining)) {
- for (; rxr_remaining; v_idx++) {
- err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
- 0, 0, 1, rxr_idx);
-
- if (err)
- goto err_out;
-
- /* update counts and index */
- rxr_remaining--;
- rxr_idx++;
- }
- }
-
- for (; v_idx < q_vectors; v_idx++) {
- int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
- int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
-
- err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
- tqpv, txr_idx, rqpv, rxr_idx);
-
- if (err)
- goto err_out;
-
- /* update counts and index */
- rxr_remaining -= rqpv;
- txr_remaining -= tqpv;
- rxr_idx++;
- txr_idx++;
- }
-
- return 0;
-
-err_out:
- adapter->num_tx_queues = 0;
- adapter->num_rx_queues = 0;
- adapter->num_q_vectors = 0;
-
- while (v_idx--)
- igc_free_q_vector(adapter, v_idx);
-
- return -ENOMEM;
-}
-
-/**
- * igc_cache_ring_register - Descriptor ring to register mapping
- * @adapter: board private structure to initialize
- *
- * Once we know the feature-set enabled for the device, we'll cache
- * the register offset the descriptor ring is assigned to.
- */
-static void igc_cache_ring_register(struct igc_adapter *adapter)
-{
- int i = 0, j = 0;
-
- switch (adapter->hw.mac.type) {
- case igc_i225:
- /* Fall through */
- default:
- for (; i < adapter->num_rx_queues; i++)
- adapter->rx_ring[i]->reg_idx = i;
- for (; j < adapter->num_tx_queues; j++)
- adapter->tx_ring[j]->reg_idx = j;
- break;
- }
-}
-
-/**
- * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
- * @adapter: Pointer to adapter structure
- *
- * This function initializes the interrupts and allocates all of the queues.
- */
-static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
-{
- struct pci_dev *pdev = adapter->pdev;
- int err = 0;
-
- igc_set_interrupt_capability(adapter, msix);
-
- err = igc_alloc_q_vectors(adapter);
- if (err) {
- dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
- goto err_alloc_q_vectors;
- }
-
- igc_cache_ring_register(adapter);
-
- return 0;
-
-err_alloc_q_vectors:
- igc_reset_interrupt_capability(adapter);
- return err;
-}
-
static void igc_free_irq(struct igc_adapter *adapter)
{
if (adapter->msix_entries) {
@@ -3947,62 +4158,6 @@ static void igc_free_irq(struct igc_adapter *adapter)
}
/**
- * igc_irq_disable - Mask off interrupt generation on the NIC
- * @adapter: board private structure
- */
-static void igc_irq_disable(struct igc_adapter *adapter)
-{
- struct igc_hw *hw = &adapter->hw;
-
- if (adapter->msix_entries) {
- u32 regval = rd32(IGC_EIAM);
-
- wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
- wr32(IGC_EIMC, adapter->eims_enable_mask);
- regval = rd32(IGC_EIAC);
- wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
- }
-
- wr32(IGC_IAM, 0);
- wr32(IGC_IMC, ~0);
- wrfl();
-
- if (adapter->msix_entries) {
- int vector = 0, i;
-
- synchronize_irq(adapter->msix_entries[vector++].vector);
-
- for (i = 0; i < adapter->num_q_vectors; i++)
- synchronize_irq(adapter->msix_entries[vector++].vector);
- } else {
- synchronize_irq(adapter->pdev->irq);
- }
-}
-
-/**
- * igc_irq_enable - Enable default interrupt generation settings
- * @adapter: board private structure
- */
-static void igc_irq_enable(struct igc_adapter *adapter)
-{
- struct igc_hw *hw = &adapter->hw;
-
- if (adapter->msix_entries) {
- u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
- u32 regval = rd32(IGC_EIAC);
-
- wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
- regval = rd32(IGC_EIAM);
- wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
- wr32(IGC_EIMS, adapter->eims_enable_mask);
- wr32(IGC_IMS, ims);
- } else {
- wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
- wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
- }
-}
-
-/**
* igc_request_irq - initialize interrupts
* @adapter: Pointer to adapter structure
*
@@ -4056,25 +4211,10 @@ request_done:
return err;
}
-static void igc_write_itr(struct igc_q_vector *q_vector)
-{
- u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
-
- if (!q_vector->set_itr)
- return;
-
- if (!itr_val)
- itr_val = IGC_ITR_VAL_MASK;
-
- itr_val |= IGC_EITR_CNT_IGNR;
-
- writel(itr_val, q_vector->itr_register);
- q_vector->set_itr = 0;
-}
-
/**
- * igc_open - Called when a network interface is made active
+ * __igc_open - Called when a network interface is made active
* @netdev: network interface device structure
+ * @resuming: boolean indicating if the device is resuming
*
* Returns 0 on success, negative value on failure
*
@@ -4164,8 +4304,9 @@ static int igc_open(struct net_device *netdev)
}
/**
- * igc_close - Disables a network interface
+ * __igc_close - Disables a network interface
* @netdev: network interface device structure
+ * @suspending: boolean indicating the device is suspending
*
* Returns 0, this is not allowed to fail
*
@@ -4199,6 +4340,24 @@ static int igc_close(struct net_device *netdev)
return 0;
}
+/**
+ * igc_ioctl - Access the hwtstamp interface
+ * @netdev: network interface device structure
+ * @ifreq: interface request data
+ * @cmd: ioctl command
+ **/
+static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ return igc_ptp_get_ts_config(netdev, ifr);
+ case SIOCSHWTSTAMP:
+ return igc_ptp_set_ts_config(netdev, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops igc_netdev_ops = {
.ndo_open = igc_open,
.ndo_stop = igc_close,
@@ -4210,6 +4369,7 @@ static const struct net_device_ops igc_netdev_ops = {
.ndo_fix_features = igc_fix_features,
.ndo_set_features = igc_set_features,
.ndo_features_check = igc_features_check,
+ .ndo_do_ioctl = igc_ioctl,
};
/* PCIe configuration access */
@@ -4345,32 +4505,26 @@ static int igc_probe(struct pci_dev *pdev,
struct net_device *netdev;
struct igc_hw *hw;
const struct igc_info *ei = igc_info_tbl[ent->driver_data];
- int err;
+ int err, pci_using_dac;
err = pci_enable_device_mem(pdev);
if (err)
return err;
- err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
+ pci_using_dac = 0;
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (!err) {
- err = dma_set_coherent_mask(&pdev->dev,
- DMA_BIT_MASK(64));
+ pci_using_dac = 1;
} else {
- err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+ err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
- err = dma_set_coherent_mask(&pdev->dev,
- DMA_BIT_MASK(32));
- if (err) {
- dev_err(&pdev->dev, "igc: Wrong DMA config\n");
- goto err_dma;
- }
+ dev_err(&pdev->dev,
+ "No usable DMA configuration, aborting\n");
+ goto err_dma;
}
}
- err = pci_request_selected_regions(pdev,
- pci_select_bars(pdev,
- IORESOURCE_MEM),
- igc_driver_name);
+ err = pci_request_mem_regions(pdev, igc_driver_name);
if (err)
goto err_pci_reg;
@@ -4433,6 +4587,7 @@ static int igc_probe(struct pci_dev *pdev,
goto err_sw_init;
/* Add supported features to the features list*/
+ netdev->features |= NETIF_F_SG;
netdev->features |= NETIF_F_RXCSUM;
netdev->features |= NETIF_F_HW_CSUM;
netdev->features |= NETIF_F_SCTP_CRC;
@@ -4446,6 +4601,9 @@ static int igc_probe(struct pci_dev *pdev,
netdev->hw_features |= NETIF_F_NTUPLE;
netdev->hw_features |= netdev->features;
+ if (pci_using_dac)
+ netdev->features |= NETIF_F_HIGHDMA;
+
/* MTU range: 68 - 9216 */
netdev->min_mtu = ETH_MIN_MTU;
netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
@@ -4512,6 +4670,9 @@ static int igc_probe(struct pci_dev *pdev,
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
+ /* do hw tstamp init after resetting */
+ igc_ptp_init(adapter);
+
/* Check if Media Autosense is enabled */
adapter->ei = *ei;
@@ -4532,8 +4693,7 @@ err_sw_init:
err_ioremap:
free_netdev(netdev);
err_alloc_etherdev:
- pci_release_selected_regions(pdev,
- pci_select_bars(pdev, IORESOURCE_MEM));
+ pci_release_mem_regions(pdev);
err_pci_reg:
err_dma:
pci_disable_device(pdev);
@@ -4554,6 +4714,8 @@ static void igc_remove(struct pci_dev *pdev)
struct net_device *netdev = pci_get_drvdata(pdev);
struct igc_adapter *adapter = netdev_priv(netdev);
+ igc_ptp_stop(adapter);
+
set_bit(__IGC_DOWN, &adapter->state);
del_timer_sync(&adapter->watchdog_timer);
@@ -4580,105 +4742,216 @@ static void igc_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
}
-static struct pci_driver igc_driver = {
- .name = igc_driver_name,
- .id_table = igc_pci_tbl,
- .probe = igc_probe,
- .remove = igc_remove,
-};
-
-void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
- const u32 max_rss_queues)
+static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
+ bool runtime)
{
- /* Determine if we need to pair queues. */
- /* If rss_queues > half of max_rss_queues, pair the queues in
- * order to conserve interrupts due to limited supply.
- */
- if (adapter->rss_queues > (max_rss_queues / 2))
- adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
+ struct igc_hw *hw = &adapter->hw;
+ u32 ctrl, rctl, status;
+ bool wake;
+
+ rtnl_lock();
+ netif_device_detach(netdev);
+
+ if (netif_running(netdev))
+ __igc_close(netdev, true);
+
+ igc_clear_interrupt_scheme(adapter);
+ rtnl_unlock();
+
+ status = rd32(IGC_STATUS);
+ if (status & IGC_STATUS_LU)
+ wufc &= ~IGC_WUFC_LNKC;
+
+ if (wufc) {
+ igc_setup_rctl(adapter);
+ igc_set_rx_mode(netdev);
+
+ /* turn on all-multi mode if wake on multicast is enabled */
+ if (wufc & IGC_WUFC_MC) {
+ rctl = rd32(IGC_RCTL);
+ rctl |= IGC_RCTL_MPE;
+ wr32(IGC_RCTL, rctl);
+ }
+
+ ctrl = rd32(IGC_CTRL);
+ ctrl |= IGC_CTRL_ADVD3WUC;
+ wr32(IGC_CTRL, ctrl);
+
+ /* Allow time for pending master requests to run */
+ igc_disable_pcie_master(hw);
+
+ wr32(IGC_WUC, IGC_WUC_PME_EN);
+ wr32(IGC_WUFC, wufc);
+ } else {
+ wr32(IGC_WUC, 0);
+ wr32(IGC_WUFC, 0);
+ }
+
+ wake = wufc || adapter->en_mng_pt;
+ if (!wake)
+ igc_power_down_link(adapter);
else
- adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
-}
+ igc_power_up_link(adapter);
-unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
-{
- unsigned int max_rss_queues;
+ if (enable_wake)
+ *enable_wake = wake;
- /* Determine the maximum number of RSS queues supported. */
- max_rss_queues = IGC_MAX_RX_QUEUES;
+ /* Release control of h/w to f/w. If f/w is AMT enabled, this
+ * would have already happened in close and is redundant.
+ */
+ igc_release_hw_control(adapter);
- return max_rss_queues;
+ pci_disable_device(pdev);
+
+ return 0;
}
-static void igc_init_queue_configuration(struct igc_adapter *adapter)
+#ifdef CONFIG_PM
+static int __maybe_unused igc_runtime_suspend(struct device *dev)
{
- u32 max_rss_queues;
-
- max_rss_queues = igc_get_max_rss_queues(adapter);
- adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
-
- igc_set_flag_queue_pairs(adapter, max_rss_queues);
+ return __igc_shutdown(to_pci_dev(dev), NULL, 1);
}
-/**
- * igc_sw_init - Initialize general software structures (struct igc_adapter)
- * @adapter: board private structure to initialize
- *
- * igc_sw_init initializes the Adapter private data structure.
- * Fields are initialized based on PCI device information and
- * OS network device settings (MTU size).
- */
-static int igc_sw_init(struct igc_adapter *adapter)
+static void igc_deliver_wake_packet(struct net_device *netdev)
{
- struct net_device *netdev = adapter->netdev;
- struct pci_dev *pdev = adapter->pdev;
+ struct igc_adapter *adapter = netdev_priv(netdev);
struct igc_hw *hw = &adapter->hw;
+ struct sk_buff *skb;
+ u32 wupl;
- int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
+ wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
- pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
+ /* WUPM stores only the first 128 bytes of the wake packet.
+ * Read the packet only if we have the whole thing.
+ */
+ if (wupl == 0 || wupl > IGC_WUPM_BYTES)
+ return;
- /* set default ring sizes */
- adapter->tx_ring_count = IGC_DEFAULT_TXD;
- adapter->rx_ring_count = IGC_DEFAULT_RXD;
+ skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
+ if (!skb)
+ return;
- /* set default ITR values */
- adapter->rx_itr_setting = IGC_DEFAULT_ITR;
- adapter->tx_itr_setting = IGC_DEFAULT_ITR;
+ skb_put(skb, wupl);
- /* set default work limits */
- adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
+ /* Ensure reads are 32-bit aligned */
+ wupl = roundup(wupl, 4);
- /* adjust max frame to be at least the size of a standard frame */
- adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
- VLAN_HLEN;
- adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
+ memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
- spin_lock_init(&adapter->nfc_lock);
- spin_lock_init(&adapter->stats64_lock);
- /* Assume MSI-X interrupts, will be checked during IRQ allocation */
- adapter->flags |= IGC_FLAG_HAS_MSIX;
+ skb->protocol = eth_type_trans(skb, netdev);
+ netif_rx(skb);
+}
- adapter->mac_table = kzalloc(size, GFP_ATOMIC);
- if (!adapter->mac_table)
- return -ENOMEM;
+static int __maybe_unused igc_resume(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct igc_hw *hw = &adapter->hw;
+ u32 err, val;
- igc_init_queue_configuration(adapter);
+ pci_set_power_state(pdev, PCI_D0);
+ pci_restore_state(pdev);
+ pci_save_state(pdev);
+
+ if (!pci_device_is_present(pdev))
+ return -ENODEV;
+ err = pci_enable_device_mem(pdev);
+ if (err) {
+ dev_err(&pdev->dev,
+ "igc: Cannot enable PCI device from suspend\n");
+ return err;
+ }
+ pci_set_master(pdev);
+
+ pci_enable_wake(pdev, PCI_D3hot, 0);
+ pci_enable_wake(pdev, PCI_D3cold, 0);
- /* This call may decrease the number of queues */
if (igc_init_interrupt_scheme(adapter, true)) {
dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
return -ENOMEM;
}
- /* Explicitly disable IRQ since the NIC can be in any state. */
- igc_irq_disable(adapter);
+ igc_reset(adapter);
- set_bit(__IGC_DOWN, &adapter->state);
+ /* let the f/w know that the h/w is now under the control of the
+ * driver.
+ */
+ igc_get_hw_control(adapter);
- return 0;
+ val = rd32(IGC_WUS);
+ if (val & WAKE_PKT_WUS)
+ igc_deliver_wake_packet(netdev);
+
+ wr32(IGC_WUS, ~0);
+
+ rtnl_lock();
+ if (!err && netif_running(netdev))
+ err = __igc_open(netdev, true);
+
+ if (!err)
+ netif_device_attach(netdev);
+ rtnl_unlock();
+
+ return err;
+}
+
+static int __maybe_unused igc_runtime_resume(struct device *dev)
+{
+ return igc_resume(dev);
+}
+
+static int __maybe_unused igc_suspend(struct device *dev)
+{
+ return __igc_shutdown(to_pci_dev(dev), NULL, 0);
+}
+
+static int __maybe_unused igc_runtime_idle(struct device *dev)
+{
+ struct net_device *netdev = dev_get_drvdata(dev);
+ struct igc_adapter *adapter = netdev_priv(netdev);
+
+ if (!igc_has_link(adapter))
+ pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
+
+ return -EBUSY;
+}
+#endif /* CONFIG_PM */
+
+static void igc_shutdown(struct pci_dev *pdev)
+{
+ bool wake;
+
+ __igc_shutdown(pdev, &wake, 0);
+
+ if (system_state == SYSTEM_POWER_OFF) {
+ pci_wake_from_d3(pdev, wake);
+ pci_set_power_state(pdev, PCI_D3hot);
+ }
}
+#ifdef CONFIG_PM
+static const struct dev_pm_ops igc_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
+ SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
+ igc_runtime_idle)
+};
+#endif
+
+static struct pci_driver igc_driver = {
+ .name = igc_driver_name,
+ .id_table = igc_pci_tbl,
+ .probe = igc_probe,
+ .remove = igc_remove,
+#ifdef CONFIG_PM
+ .driver.pm = &igc_pm_ops,
+#endif
+ .shutdown = igc_shutdown,
+};
+
/**
* igc_reinit_queues - return error
* @adapter: pointer to adapter structure
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
new file mode 100644
index 000000000000..693506587198
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Intel Corporation */
+
+#include "igc.h"
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/ptp_classify.h>
+#include <linux/clocksource.h>
+
+#define INCVALUE_MASK 0x7fffffff
+#define ISGN 0x80000000
+
+#define IGC_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9)
+#define IGC_PTP_TX_TIMEOUT (HZ * 15)
+
+/* SYSTIM read access for I225 */
+static void igc_ptp_read_i225(struct igc_adapter *adapter,
+ struct timespec64 *ts)
+{
+ struct igc_hw *hw = &adapter->hw;
+ u32 sec, nsec;
+
+ /* The timestamp latches on lowest register read. For I210/I211, the
+ * lowest register is SYSTIMR. Since we only need to provide nanosecond
+ * resolution, we can ignore it.
+ */
+ rd32(IGC_SYSTIMR);
+ nsec = rd32(IGC_SYSTIML);
+ sec = rd32(IGC_SYSTIMH);
+
+ ts->tv_sec = sec;
+ ts->tv_nsec = nsec;
+}
+
+static void igc_ptp_write_i225(struct igc_adapter *adapter,
+ const struct timespec64 *ts)
+{
+ struct igc_hw *hw = &adapter->hw;
+
+ /* Writing the SYSTIMR register is not necessary as it only
+ * provides sub-nanosecond resolution.
+ */
+ wr32(IGC_SYSTIML, ts->tv_nsec);
+ wr32(IGC_SYSTIMH, ts->tv_sec);
+}
+
+static int igc_ptp_adjfine_i225(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+ ptp_caps);
+ struct igc_hw *hw = &igc->hw;
+ int neg_adj = 0;
+ u64 rate;
+ u32 inca;
+
+ if (scaled_ppm < 0) {
+ neg_adj = 1;
+ scaled_ppm = -scaled_ppm;
+ }
+ rate = scaled_ppm;
+ rate <<= 14;
+ rate = div_u64(rate, 78125);
+
+ inca = rate & INCVALUE_MASK;
+ if (neg_adj)
+ inca |= ISGN;
+
+ wr32(IGC_TIMINCA, inca);
+
+ return 0;
+}
+
+static int igc_ptp_adjtime_i225(struct ptp_clock_info *ptp, s64 delta)
+{
+ struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+ ptp_caps);
+ struct timespec64 now, then = ns_to_timespec64(delta);
+ unsigned long flags;
+
+ spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+ igc_ptp_read_i225(igc, &now);
+ now = timespec64_add(now, then);
+ igc_ptp_write_i225(igc, (const struct timespec64 *)&now);
+
+ spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+ return 0;
+}
+
+static int igc_ptp_gettimex64_i225(struct ptp_clock_info *ptp,
+ struct timespec64 *ts,
+ struct ptp_system_timestamp *sts)
+{
+ struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+ ptp_caps);
+ struct igc_hw *hw = &igc->hw;
+ unsigned long flags;
+
+ spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+ ptp_read_system_prets(sts);
+ rd32(IGC_SYSTIMR);
+ ptp_read_system_postts(sts);
+ ts->tv_nsec = rd32(IGC_SYSTIML);
+ ts->tv_sec = rd32(IGC_SYSTIMH);
+
+ spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+ return 0;
+}
+
+static int igc_ptp_settime_i225(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct igc_adapter *igc = container_of(ptp, struct igc_adapter,
+ ptp_caps);
+ unsigned long flags;
+
+ spin_lock_irqsave(&igc->tmreg_lock, flags);
+
+ igc_ptp_write_i225(igc, ts);
+
+ spin_unlock_irqrestore(&igc->tmreg_lock, flags);
+
+ return 0;
+}
+
+static int igc_ptp_feature_enable_i225(struct ptp_clock_info *ptp,
+ struct ptp_clock_request *rq, int on)
+{
+ return -EOPNOTSUPP;
+}
+
+/**
+ * igc_ptp_systim_to_hwtstamp - convert system time value to HW timestamp
+ * @adapter: board private structure
+ * @hwtstamps: timestamp structure to update
+ * @systim: unsigned 64bit system time value
+ *
+ * We need to convert the system time value stored in the RX/TXSTMP registers
+ * into a hwtstamp which can be used by the upper level timestamping functions.
+ **/
+static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
+ struct skb_shared_hwtstamps *hwtstamps,
+ u64 systim)
+{
+ switch (adapter->hw.mac.type) {
+ case igc_i225:
+ memset(hwtstamps, 0, sizeof(*hwtstamps));
+ /* Upper 32 bits contain s, lower 32 bits contain ns. */
+ hwtstamps->hwtstamp = ktime_set(systim >> 32,
+ systim & 0xFFFFFFFF);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * igc_ptp_rx_pktstamp - retrieve Rx per packet timestamp
+ * @q_vector: Pointer to interrupt specific structure
+ * @va: Pointer to address containing Rx buffer
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve the first timestamp from the
+ * first buffer of an incoming frame. The value is stored in little
+ * endian format starting on byte 0. There's a second timestamp
+ * starting on byte 8.
+ **/
+void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va,
+ struct sk_buff *skb)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ __le64 *regval = (__le64 *)va;
+ int adjust = 0;
+
+ /* The timestamp is recorded in little endian format.
+ * DWORD: | 0 | 1 | 2 | 3
+ * Field: | Timer0 Low | Timer0 High | Timer1 Low | Timer1 High
+ */
+ igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
+ le64_to_cpu(regval[0]));
+
+ /* adjust timestamp for the RX latency based on link speed */
+ if (adapter->hw.mac.type == igc_i225) {
+ switch (adapter->link_speed) {
+ case SPEED_10:
+ adjust = IGC_I225_RX_LATENCY_10;
+ break;
+ case SPEED_100:
+ adjust = IGC_I225_RX_LATENCY_100;
+ break;
+ case SPEED_1000:
+ adjust = IGC_I225_RX_LATENCY_1000;
+ break;
+ case SPEED_2500:
+ adjust = IGC_I225_RX_LATENCY_2500;
+ break;
+ }
+ }
+ skb_hwtstamps(skb)->hwtstamp =
+ ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+}
+
+/**
+ * igc_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register
+ * @q_vector: Pointer to interrupt specific structure
+ * @skb: Buffer containing timestamp and packet
+ *
+ * This function is meant to retrieve a timestamp from the internal registers
+ * of the adapter and store it in the skb.
+ */
+void igc_ptp_rx_rgtstamp(struct igc_q_vector *q_vector,
+ struct sk_buff *skb)
+{
+ struct igc_adapter *adapter = q_vector->adapter;
+ struct igc_hw *hw = &adapter->hw;
+ u64 regval;
+
+ /* If this bit is set, then the RX registers contain the time
+ * stamp. No other packet will be time stamped until we read
+ * these registers, so read the registers to make them
+ * available again. Because only one packet can be time
+ * stamped at a time, we know that the register values must
+ * belong to this one here and therefore we don't need to
+ * compare any of the additional attributes stored for it.
+ *
+ * If nothing went wrong, then it should have a shared
+ * tx_flags that we can turn into a skb_shared_hwtstamps.
+ */
+ if (!(rd32(IGC_TSYNCRXCTL) & IGC_TSYNCRXCTL_VALID))
+ return;
+
+ regval = rd32(IGC_RXSTMPL);
+ regval |= (u64)rd32(IGC_RXSTMPH) << 32;
+
+ igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+
+ /* Update the last_rx_timestamp timer in order to enable watchdog check
+ * for error case of latched timestamp on a dropped packet.
+ */
+ adapter->last_rx_timestamp = jiffies;
+}
+
+/**
+ * igc_ptp_enable_tstamp_rxqueue - Enable RX timestamp for a queue
+ * @rx_ring: Pointer to RX queue
+ * @timer: Index for timer
+ *
+ * This function enables RX timestamping for a queue, and selects
+ * which 1588 timer will provide the timestamp.
+ */
+static void igc_ptp_enable_tstamp_rxqueue(struct igc_adapter *adapter,
+ struct igc_ring *rx_ring, u8 timer)
+{
+ struct igc_hw *hw = &adapter->hw;
+ int reg_idx = rx_ring->reg_idx;
+ u32 srrctl = rd32(IGC_SRRCTL(reg_idx));
+
+ srrctl |= IGC_SRRCTL_TIMESTAMP;
+ srrctl |= IGC_SRRCTL_TIMER1SEL(timer);
+ srrctl |= IGC_SRRCTL_TIMER0SEL(timer);
+
+ wr32(IGC_SRRCTL(reg_idx), srrctl);
+}
+
+static void igc_ptp_enable_tstamp_all_rxqueues(struct igc_adapter *adapter,
+ u8 timer)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_rx_queues; i++) {
+ struct igc_ring *ring = adapter->rx_ring[i];
+
+ igc_ptp_enable_tstamp_rxqueue(adapter, ring, timer);
+ }
+}
+
+/**
+ * igc_ptp_set_timestamp_mode - setup hardware for timestamping
+ * @adapter: networking device structure
+ * @config: hwtstamp configuration
+ *
+ * Outgoing time stamping can be enabled and disabled. Play nice and
+ * disable it when requested, although it shouldn't case any overhead
+ * when no packet needs it. At most one packet in the queue may be
+ * marked for time stamping, otherwise it would be impossible to tell
+ * for sure to which packet the hardware time stamp belongs.
+ *
+ * Incoming time stamping has to be configured via the hardware
+ * filters. Not all combinations are supported, in particular event
+ * type has to be specified. Matching the kind of event packet is
+ * not supported, with the exception of "all V2 events regardless of
+ * level 2 or 4".
+ *
+ */
+static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
+ struct hwtstamp_config *config)
+{
+ u32 tsync_tx_ctl = IGC_TSYNCTXCTL_ENABLED;
+ u32 tsync_rx_ctl = IGC_TSYNCRXCTL_ENABLED;
+ struct igc_hw *hw = &adapter->hw;
+ u32 tsync_rx_cfg = 0;
+ bool is_l4 = false;
+ bool is_l2 = false;
+ u32 regval;
+
+ /* reserved for future extensions */
+ if (config->flags)
+ return -EINVAL;
+
+ switch (config->tx_type) {
+ case HWTSTAMP_TX_OFF:
+ tsync_tx_ctl = 0;
+ case HWTSTAMP_TX_ON:
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (config->rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ tsync_rx_ctl = 0;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_L4_V1;
+ tsync_rx_cfg = IGC_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
+ is_l4 = true;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_L4_V1;
+ tsync_rx_cfg = IGC_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
+ is_l4 = true;
+ break;
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_EVENT_V2;
+ config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ is_l2 = true;
+ is_l4 = true;
+ break;
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ case HWTSTAMP_FILTER_ALL:
+ tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_ALL;
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+ break;
+ /* fall through */
+ default:
+ config->rx_filter = HWTSTAMP_FILTER_NONE;
+ return -ERANGE;
+ }
+
+ /* Per-packet timestamping only works if all packets are
+ * timestamped, so enable timestamping in all packets as long
+ * as one Rx filter was configured.
+ */
+ if (tsync_rx_ctl) {
+ tsync_rx_ctl = IGC_TSYNCRXCTL_ENABLED;
+ tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_ALL;
+ tsync_rx_ctl |= IGC_TSYNCRXCTL_RXSYNSIG;
+ config->rx_filter = HWTSTAMP_FILTER_ALL;
+ is_l2 = true;
+ is_l4 = true;
+
+ if (hw->mac.type == igc_i225) {
+ regval = rd32(IGC_RXPBS);
+ regval |= IGC_RXPBS_CFG_TS_EN;
+ wr32(IGC_RXPBS, regval);
+
+ /* FIXME: For now, only support retrieving RX
+ * timestamps from timer 0
+ */
+ igc_ptp_enable_tstamp_all_rxqueues(adapter, 0);
+ }
+ }
+
+ if (tsync_tx_ctl) {
+ tsync_tx_ctl = IGC_TSYNCTXCTL_ENABLED;
+ tsync_tx_ctl |= IGC_TSYNCTXCTL_TXSYNSIG;
+ }
+
+ /* enable/disable TX */
+ regval = rd32(IGC_TSYNCTXCTL);
+ regval &= ~IGC_TSYNCTXCTL_ENABLED;
+ regval |= tsync_tx_ctl;
+ wr32(IGC_TSYNCTXCTL, regval);
+
+ /* enable/disable RX */
+ regval = rd32(IGC_TSYNCRXCTL);
+ regval &= ~(IGC_TSYNCRXCTL_ENABLED | IGC_TSYNCRXCTL_TYPE_MASK);
+ regval |= tsync_rx_ctl;
+ wr32(IGC_TSYNCRXCTL, regval);
+
+ /* define which PTP packets are time stamped */
+ wr32(IGC_TSYNCRXCFG, tsync_rx_cfg);
+
+ /* define ethertype filter for timestamped packets */
+ if (is_l2)
+ wr32(IGC_ETQF(3),
+ (IGC_ETQF_FILTER_ENABLE | /* enable filter */
+ IGC_ETQF_1588 | /* enable timestamping */
+ ETH_P_1588)); /* 1588 eth protocol type */
+ else
+ wr32(IGC_ETQF(3), 0);
+
+ /* L4 Queue Filter[3]: filter by destination port and protocol */
+ if (is_l4) {
+ u32 ftqf = (IPPROTO_UDP /* UDP */
+ | IGC_FTQF_VF_BP /* VF not compared */
+ | IGC_FTQF_1588_TIME_STAMP /* Enable Timestamp */
+ | IGC_FTQF_MASK); /* mask all inputs */
+ ftqf &= ~IGC_FTQF_MASK_PROTO_BP; /* enable protocol check */
+
+ wr32(IGC_IMIR(3), htons(PTP_EV_PORT));
+ wr32(IGC_IMIREXT(3),
+ (IGC_IMIREXT_SIZE_BP | IGC_IMIREXT_CTRL_BP));
+ wr32(IGC_FTQF(3), ftqf);
+ } else {
+ wr32(IGC_FTQF(3), IGC_FTQF_MASK);
+ }
+ wrfl();
+
+ /* clear TX/RX time stamp registers, just to be sure */
+ regval = rd32(IGC_TXSTMPL);
+ regval = rd32(IGC_TXSTMPH);
+ regval = rd32(IGC_RXSTMPL);
+ regval = rd32(IGC_RXSTMPH);
+
+ return 0;
+}
+
+void igc_ptp_tx_hang(struct igc_adapter *adapter)
+{
+ bool timeout = time_is_before_jiffies(adapter->ptp_tx_start +
+ IGC_PTP_TX_TIMEOUT);
+ struct igc_hw *hw = &adapter->hw;
+
+ if (!adapter->ptp_tx_skb)
+ return;
+
+ if (!test_bit(__IGC_PTP_TX_IN_PROGRESS, &adapter->state))
+ return;
+
+ /* If we haven't received a timestamp within the timeout, it is
+ * reasonable to assume that it will never occur, so we can unlock the
+ * timestamp bit when this occurs.
+ */
+ if (timeout) {
+ cancel_work_sync(&adapter->ptp_tx_work);
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+ adapter->tx_hwtstamp_timeouts++;
+ /* Clear the Tx valid bit in TSYNCTXCTL register to enable
+ * interrupt
+ */
+ rd32(IGC_TXSTMPH);
+ dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
+ }
+}
+
+/**
+ * igc_ptp_tx_hwtstamp - utility function which checks for TX time stamp
+ * @adapter: Board private structure
+ *
+ * If we were asked to do hardware stamping and such a time stamp is
+ * available, then it must have been for this skb here because we only
+ * allow only one such packet into the queue.
+ */
+static void igc_ptp_tx_hwtstamp(struct igc_adapter *adapter)
+{
+ struct sk_buff *skb = adapter->ptp_tx_skb;
+ struct skb_shared_hwtstamps shhwtstamps;
+ struct igc_hw *hw = &adapter->hw;
+ u64 regval;
+
+ regval = rd32(IGC_TXSTMPL);
+ regval |= (u64)rd32(IGC_TXSTMPH) << 32;
+ igc_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
+
+ /* Clear the lock early before calling skb_tstamp_tx so that
+ * applications are not woken up before the lock bit is clear. We use
+ * a copy of the skb pointer to ensure other threads can't change it
+ * while we're notifying the stack.
+ */
+ adapter->ptp_tx_skb = NULL;
+ clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+
+ /* Notify the stack and free the skb after we've unlocked */
+ skb_tstamp_tx(skb, &shhwtstamps);
+ dev_kfree_skb_any(skb);
+}
+
+/**
+ * igc_ptp_tx_work
+ * @work: pointer to work struct
+ *
+ * This work function polls the TSYNCTXCTL valid bit to determine when a
+ * timestamp has been taken for the current stored skb.
+ */
+void igc_ptp_tx_work(struct work_struct *work)
+{
+ struct igc_adapter *adapter = container_of(work, struct igc_adapter,
+ ptp_tx_work);
+ struct igc_hw *hw = &adapter->hw;
+ u32 tsynctxctl;
+
+ if (!adapter->ptp_tx_skb)
+ return;
+
+ if (time_is_before_jiffies(adapter->ptp_tx_start +
+ IGC_PTP_TX_TIMEOUT)) {
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+ adapter->tx_hwtstamp_timeouts++;
+ /* Clear the tx valid bit in TSYNCTXCTL register to enable
+ * interrupt
+ */
+ rd32(IGC_TXSTMPH);
+ dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
+ return;
+ }
+
+ tsynctxctl = rd32(IGC_TSYNCTXCTL);
+ if (tsynctxctl & IGC_TSYNCTXCTL_VALID)
+ igc_ptp_tx_hwtstamp(adapter);
+ else
+ /* reschedule to check later */
+ schedule_work(&adapter->ptp_tx_work);
+}
+
+/**
+ * igc_ptp_set_ts_config - set hardware time stamping config
+ * @netdev: network interface device structure
+ * @ifreq: interface request data
+ *
+ **/
+int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct hwtstamp_config config;
+ int err;
+
+ if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+ return -EFAULT;
+
+ err = igc_ptp_set_timestamp_mode(adapter, &config);
+ if (err)
+ return err;
+
+ /* save these settings for future reference */
+ memcpy(&adapter->tstamp_config, &config,
+ sizeof(adapter->tstamp_config));
+
+ return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
+ -EFAULT : 0;
+}
+
+/**
+ * igc_ptp_get_ts_config - get hardware time stamping config
+ * @netdev: network interface device structure
+ * @ifreq: interface request data
+ *
+ * Get the hwtstamp_config settings to return to the user. Rather than attempt
+ * to deconstruct the settings from the registers, just return a shadow copy
+ * of the last known settings.
+ **/
+int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr)
+{
+ struct igc_adapter *adapter = netdev_priv(netdev);
+ struct hwtstamp_config *config = &adapter->tstamp_config;
+
+ return copy_to_user(ifr->ifr_data, config, sizeof(*config)) ?
+ -EFAULT : 0;
+}
+
+/**
+ * igc_ptp_init - Initialize PTP functionality
+ * @adapter: Board private structure
+ *
+ * This function is called at device probe to initialize the PTP
+ * functionality.
+ */
+void igc_ptp_init(struct igc_adapter *adapter)
+{
+ struct net_device *netdev = adapter->netdev;
+ struct igc_hw *hw = &adapter->hw;
+
+ switch (hw->mac.type) {
+ case igc_i225:
+ snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr);
+ adapter->ptp_caps.owner = THIS_MODULE;
+ adapter->ptp_caps.max_adj = 62499999;
+ adapter->ptp_caps.adjfine = igc_ptp_adjfine_i225;
+ adapter->ptp_caps.adjtime = igc_ptp_adjtime_i225;
+ adapter->ptp_caps.gettimex64 = igc_ptp_gettimex64_i225;
+ adapter->ptp_caps.settime64 = igc_ptp_settime_i225;
+ adapter->ptp_caps.enable = igc_ptp_feature_enable_i225;
+ break;
+ default:
+ adapter->ptp_clock = NULL;
+ return;
+ }
+
+ spin_lock_init(&adapter->tmreg_lock);
+ INIT_WORK(&adapter->ptp_tx_work, igc_ptp_tx_work);
+
+ adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+ adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+ igc_ptp_reset(adapter);
+
+ adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
+ &adapter->pdev->dev);
+ if (IS_ERR(adapter->ptp_clock)) {
+ adapter->ptp_clock = NULL;
+ dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
+ } else if (adapter->ptp_clock) {
+ dev_info(&adapter->pdev->dev, "added PHC on %s\n",
+ adapter->netdev->name);
+ adapter->ptp_flags |= IGC_PTP_ENABLED;
+ }
+}
+
+/**
+ * igc_ptp_suspend - Disable PTP work items and prepare for suspend
+ * @adapter: Board private structure
+ *
+ * This function stops the overflow check work and PTP Tx timestamp work, and
+ * will prepare the device for OS suspend.
+ */
+void igc_ptp_suspend(struct igc_adapter *adapter)
+{
+ if (!(adapter->ptp_flags & IGC_PTP_ENABLED))
+ return;
+
+ cancel_work_sync(&adapter->ptp_tx_work);
+ if (adapter->ptp_tx_skb) {
+ dev_kfree_skb_any(adapter->ptp_tx_skb);
+ adapter->ptp_tx_skb = NULL;
+ clear_bit_unlock(__IGC_PTP_TX_IN_PROGRESS, &adapter->state);
+ }
+}
+
+/**
+ * igc_ptp_stop - Disable PTP device and stop the overflow check.
+ * @adapter: Board private structure.
+ *
+ * This function stops the PTP support and cancels the delayed work.
+ **/
+void igc_ptp_stop(struct igc_adapter *adapter)
+{
+ igc_ptp_suspend(adapter);
+
+ if (adapter->ptp_clock) {
+ ptp_clock_unregister(adapter->ptp_clock);
+ dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
+ adapter->netdev->name);
+ adapter->ptp_flags &= ~IGC_PTP_ENABLED;
+ }
+}
+
+/**
+ * igc_ptp_reset - Re-enable the adapter for PTP following a reset.
+ * @adapter: Board private structure.
+ *
+ * This function handles the reset work required to re-enable the PTP device.
+ **/
+void igc_ptp_reset(struct igc_adapter *adapter)
+{
+ struct igc_hw *hw = &adapter->hw;
+ unsigned long flags;
+
+ /* reset the tstamp_config */
+ igc_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config);
+
+ spin_lock_irqsave(&adapter->tmreg_lock, flags);
+
+ switch (adapter->hw.mac.type) {
+ case igc_i225:
+ wr32(IGC_TSAUXC, 0x0);
+ wr32(IGC_TSSDP, 0x0);
+ wr32(IGC_TSIM, IGC_TSICR_INTERRUPTS);
+ wr32(IGC_IMS, IGC_IMS_TS);
+ break;
+ default:
+ /* No work to do. */
+ goto out;
+ }
+
+ /* Re-initialize the timer. */
+ if (hw->mac.type == igc_i225) {
+ struct timespec64 ts64 = ktime_to_timespec64(ktime_get_real());
+
+ igc_ptp_write_i225(adapter, &ts64);
+ } else {
+ timecounter_init(&adapter->tc, &adapter->cc,
+ ktime_to_ns(ktime_get_real()));
+ }
+out:
+ spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+
+ wrfl();
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 50d7c04dccf5..c82111051898 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -209,12 +209,48 @@
#define IGC_LENERRS 0x04138 /* Length Errors Count */
#define IGC_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */
+/* Time sync registers */
+#define IGC_TSICR 0x0B66C /* Time Sync Interrupt Cause */
+#define IGC_TSIM 0x0B674 /* Time Sync Interrupt Mask Register */
+#define IGC_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */
+#define IGC_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */
+#define IGC_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */
+#define IGC_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */
+#define IGC_TSSDP 0x0003C /* Time Sync SDP Configuration Register - RW */
+
+#define IGC_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */
+#define IGC_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/
+
+#define IGC_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */
+
+#define IGC_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */
+
+/* System Time Registers */
+#define IGC_SYSTIML 0x0B600 /* System time register Low - RO */
+#define IGC_SYSTIMH 0x0B604 /* System time register High - RO */
+#define IGC_SYSTIMR 0x0B6F8 /* System time register Residue */
+#define IGC_TIMINCA 0x0B608 /* Increment attributes register - RW */
+
+#define IGC_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */
+#define IGC_RXSTMPH 0x0B628 /* Rx timestamp High - RO */
+#define IGC_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */
+#define IGC_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */
+
/* Management registers */
#define IGC_MANC 0x05820 /* Management Control - RW */
/* Shadow Ram Write Register - RW */
#define IGC_SRWR 0x12018
+/* Wake Up registers */
+#define IGC_WUC 0x05800 /* Wakeup Control - RW */
+#define IGC_WUFC 0x05808 /* Wakeup Filter Control - RW */
+#define IGC_WUS 0x05810 /* Wakeup Status - R/W1C */
+#define IGC_WUPL 0x05900 /* Wakeup Packet Length - RW */
+
+/* Wake Up packet memory */
+#define IGC_WUPM_REG(_i) (0x05A00 + ((_i) * 4))
+
/* forward declaration */
struct igc_hw;
u32 igc_rd32(struct igc_hw *hw, u32 reg);
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
index c8c93ac436d4..c65eb1afc8fb 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
@@ -19,10 +19,10 @@ struct ixgb_stats {
};
#define IXGB_STAT(m) IXGB_STATS, \
- FIELD_SIZEOF(struct ixgb_adapter, m), \
+ sizeof_field(struct ixgb_adapter, m), \
offsetof(struct ixgb_adapter, m)
#define IXGB_NETDEV_STAT(m) NETDEV_STATS, \
- FIELD_SIZEOF(struct net_device, m), \
+ sizeof_field(struct net_device, m), \
offsetof(struct net_device, m)
static struct ixgb_stats ixgb_gstrings_stats[] = {
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 3d8c051dd327..b64e91ea3465 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -70,7 +70,7 @@ static int ixgb_clean(struct napi_struct *, int);
static bool ixgb_clean_rx_irq(struct ixgb_adapter *, int *, int);
static void ixgb_alloc_rx_buffers(struct ixgb_adapter *, int);
-static void ixgb_tx_timeout(struct net_device *dev);
+static void ixgb_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void ixgb_tx_timeout_task(struct work_struct *work);
static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter);
@@ -1538,7 +1538,7 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
**/
static void
-ixgb_tx_timeout(struct net_device *netdev)
+ixgb_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct ixgb_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
index 171cdc552961..5b1cf49df3d3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_debugfs.c
@@ -166,7 +166,9 @@ static ssize_t ixgbe_dbg_netdev_ops_write(struct file *filp,
ixgbe_dbg_netdev_ops_buf[len] = '\0';
if (strncmp(ixgbe_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) {
- adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev);
+ /* TX Queue number below is wrong, but ixgbe does not use it */
+ adapter->netdev->netdev_ops->ndo_tx_timeout(adapter->netdev,
+ UINT_MAX);
e_dev_info("tx_timeout called\n");
} else {
e_dev_info("Unknown command: %s\n", ixgbe_dbg_netdev_ops_buf);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 25c097cd8100..4c13cca656b2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -6158,7 +6158,7 @@ static void ixgbe_set_eee_capable(struct ixgbe_adapter *adapter)
* ixgbe_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
-static void ixgbe_tx_timeout(struct net_device *netdev)
+static void ixgbe_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct ixgbe_adapter *adapter = netdev_priv(netdev);
@@ -10261,7 +10261,12 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
/* If transitioning XDP modes reconfigure rings */
if (need_reset) {
- int err = ixgbe_setup_tc(dev, adapter->hw_tcs);
+ int err;
+
+ if (!prog)
+ /* Wait until ndo_xsk_wakeup completes. */
+ synchronize_rcu();
+ err = ixgbe_setup_tc(dev, adapter->hw_tcs);
if (err) {
rcu_assign_pointer(adapter->xdp_prog, old_prog);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index d6feaacfbf89..74b540ebb3dc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -277,7 +277,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
- xsk_umem_discard_addr(umem);
+ xsk_umem_release_addr(umem);
return true;
}
@@ -304,7 +304,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
- xsk_umem_discard_addr_rq(umem);
+ xsk_umem_release_addr_rq(umem);
return true;
}
@@ -709,10 +709,14 @@ int ixgbe_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
if (qid >= adapter->num_xdp_queues)
return -ENXIO;
- if (!adapter->xdp_ring[qid]->xsk_umem)
+ ring = adapter->xdp_ring[qid];
+
+ if (test_bit(__IXGBE_TX_DISABLED, &ring->state))
+ return -ENETDOWN;
+
+ if (!ring->xsk_umem)
return -ENXIO;
- ring = adapter->xdp_ring[qid];
if (!napi_if_scheduled_mark_missed(&ring->q_vector->napi)) {
u64 eics = BIT_ULL(ring->q_vector->v_idx);
diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
index 54459b69c948..f7f309c96fa8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c
@@ -31,14 +31,14 @@ struct ixgbe_stats {
#define IXGBEVF_STAT(_name, _stat) { \
.stat_string = _name, \
.type = IXGBEVF_STATS, \
- .sizeof_stat = FIELD_SIZEOF(struct ixgbevf_adapter, _stat), \
+ .sizeof_stat = sizeof_field(struct ixgbevf_adapter, _stat), \
.stat_offset = offsetof(struct ixgbevf_adapter, _stat) \
}
#define IXGBEVF_NETDEV_STAT(_net_stat) { \
.stat_string = #_net_stat, \
.type = NETDEV_STATS, \
- .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \
+ .sizeof_stat = sizeof_field(struct net_device_stats, _net_stat), \
.stat_offset = offsetof(struct net_device_stats, _net_stat) \
}
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 076f2da36f27..fa286694ac2c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -250,7 +250,7 @@ static void ixgbevf_tx_timeout_reset(struct ixgbevf_adapter *adapter)
* ixgbevf_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
-static void ixgbevf_tx_timeout(struct net_device *netdev)
+static void ixgbevf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct ixgbevf_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 25aa400e2e3c..2e4975572e9f 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -2337,7 +2337,7 @@ jme_change_mtu(struct net_device *netdev, int new_mtu)
}
static void
-jme_tx_timeout(struct net_device *netdev)
+jme_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct jme_adapter *jme = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index ae195f8adff5..f98d9d627c71 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -917,7 +917,7 @@ static void korina_restart_task(struct work_struct *work)
enable_irq(lp->rx_irq);
}
-static void korina_tx_timeout(struct net_device *dev)
+static void korina_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct korina_private *lp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 6e73ffe6f928..028e3e6222e9 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -594,7 +594,7 @@ err_hw:
}
static void
-ltq_etop_tx_timeout(struct net_device *dev)
+ltq_etop_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
int err;
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index d5b644131cff..3c8125cbc84d 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1432,11 +1432,11 @@ struct mv643xx_eth_stats {
};
#define SSTAT(m) \
- { #m, FIELD_SIZEOF(struct net_device_stats, m), \
+ { #m, sizeof_field(struct net_device_stats, m), \
offsetof(struct net_device, stats.m), -1 }
#define MIBSTAT(m) \
- { #m, FIELD_SIZEOF(struct mib_counters, m), \
+ { #m, sizeof_field(struct mib_counters, m), \
-1, offsetof(struct mv643xx_eth_private, mib_counters.m) }
static const struct mv643xx_eth_stats mv643xx_eth_stats[] = {
@@ -2590,7 +2590,7 @@ static void tx_timeout_task(struct work_struct *ugly)
}
}
-static void mv643xx_eth_tx_timeout(struct net_device *dev)
+static void mv643xx_eth_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mv643xx_eth_private *mp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 62dc2f362a16..72133cbe55d4 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -1114,7 +1114,7 @@ mvpp2_shared_interrupt_mask_unmask(struct mvpp2_port *port, bool mask)
/* Port configuration routines */
static bool mvpp2_is_xlg(phy_interface_t interface)
{
- return interface == PHY_INTERFACE_MODE_10GKR ||
+ return interface == PHY_INTERFACE_MODE_10GBASER ||
interface == PHY_INTERFACE_MODE_XAUI;
}
@@ -1200,7 +1200,7 @@ static int mvpp22_gop_init(struct mvpp2_port *port)
case PHY_INTERFACE_MODE_2500BASEX:
mvpp22_gop_init_sgmii(port);
break;
- case PHY_INTERFACE_MODE_10GKR:
+ case PHY_INTERFACE_MODE_10GBASER:
if (port->gop_id != 0)
goto invalid_conf;
mvpp22_gop_init_10gkr(port);
@@ -1649,7 +1649,7 @@ static void mvpp22_pcs_reset_deassert(struct mvpp2_port *port)
xpcs = priv->iface_base + MVPP22_XPCS_BASE(port->gop_id);
switch (port->phy_interface) {
- case PHY_INTERFACE_MODE_10GKR:
+ case PHY_INTERFACE_MODE_10GBASER:
val = readl(mpcs + MVPP22_MPCS_CLK_RESET);
val |= MAC_CLK_RESET_MAC | MAC_CLK_RESET_SD_RX |
MAC_CLK_RESET_SD_TX;
@@ -3680,7 +3680,7 @@ static int mvpp2_open(struct net_device *dev)
valid = true;
}
- if (priv->hw_version == MVPP22 && port->link_irq && !port->phylink) {
+ if (priv->hw_version == MVPP22 && port->link_irq) {
err = request_irq(port->link_irq, mvpp2_link_status_isr, 0,
dev->name, port);
if (err) {
@@ -4758,7 +4758,7 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
/* Invalid combinations */
switch (state->interface) {
- case PHY_INTERFACE_MODE_10GKR:
+ case PHY_INTERFACE_MODE_10GBASER:
case PHY_INTERFACE_MODE_XAUI:
if (port->gop_id != 0)
goto empty_set;
@@ -4780,7 +4780,7 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
phylink_set(mask, Asym_Pause);
switch (state->interface) {
- case PHY_INTERFACE_MODE_10GKR:
+ case PHY_INTERFACE_MODE_10GBASER:
case PHY_INTERFACE_MODE_XAUI:
case PHY_INTERFACE_MODE_NA:
if (port->gop_id == 0) {
@@ -4792,6 +4792,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
phylink_set(mask, 10000baseER_Full);
phylink_set(mask, 10000baseKR_Full);
}
+ if (state->interface != PHY_INTERFACE_MODE_NA)
+ break;
/* Fall-through */
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_ID:
@@ -4802,13 +4804,23 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
phylink_set(mask, 10baseT_Full);
phylink_set(mask, 100baseT_Half);
phylink_set(mask, 100baseT_Full);
+ phylink_set(mask, 1000baseT_Full);
+ phylink_set(mask, 1000baseX_Full);
+ if (state->interface != PHY_INTERFACE_MODE_NA)
+ break;
/* Fall-through */
case PHY_INTERFACE_MODE_1000BASEX:
case PHY_INTERFACE_MODE_2500BASEX:
- phylink_set(mask, 1000baseT_Full);
- phylink_set(mask, 1000baseX_Full);
- phylink_set(mask, 2500baseT_Full);
- phylink_set(mask, 2500baseX_Full);
+ if (port->comphy ||
+ state->interface != PHY_INTERFACE_MODE_2500BASEX) {
+ phylink_set(mask, 1000baseT_Full);
+ phylink_set(mask, 1000baseX_Full);
+ }
+ if (port->comphy ||
+ state->interface == PHY_INTERFACE_MODE_2500BASEX) {
+ phylink_set(mask, 2500baseT_Full);
+ phylink_set(mask, 2500baseX_Full);
+ }
break;
default:
goto empty_set;
@@ -4817,6 +4829,8 @@ static void mvpp2_phylink_validate(struct phylink_config *config,
bitmap_and(supported, supported, mask, __ETHTOOL_LINK_MODE_MASK_NBITS);
bitmap_and(state->advertising, state->advertising, mask,
__ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ phylink_helper_basex_speed(state);
return;
empty_set:
@@ -5233,6 +5247,15 @@ static int mvpp2_port_probe(struct platform_device *pdev,
goto err_free_netdev;
}
+ /*
+ * Rewrite 10GBASE-KR to 10GBASE-R for compatibility with existing DT.
+ * Existing usage of 10GBASE-KR is not correct; no backplane
+ * negotiation is done, and this driver does not actually support
+ * 10GBASE-KR.
+ */
+ if (phy_mode == PHY_INTERFACE_MODE_10GKR)
+ phy_mode = PHY_INTERFACE_MODE_10GBASER;
+
if (port_node) {
comphy = devm_of_phy_get(&pdev->dev, port_node, NULL);
if (IS_ERR(comphy)) {
@@ -5411,6 +5434,16 @@ static int mvpp2_port_probe(struct platform_device *pdev,
port->phylink = NULL;
}
+ /* Cycle the comphy to power it down, saving 270mW per port -
+ * don't worry about an error powering it up. When the comphy
+ * driver does this, we can remove this code.
+ */
+ if (port->comphy) {
+ err = mvpp22_comphy_init(port);
+ if (err == 0)
+ phy_power_off(port->comphy);
+ }
+
err = register_netdev(dev);
if (err < 0) {
dev_err(&pdev->dev, "failed to register netdev\n");
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 3fb7ee3d4d13..1a6877902dd6 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -742,7 +742,7 @@ txq_reclaim_end:
return released;
}
-static void pxa168_eth_tx_timeout(struct net_device *dev)
+static void pxa168_eth_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct pxa168_eth_private *pep = netdev_priv(dev);
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 095f6c71b4fa..8ca15958e752 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -2884,7 +2884,7 @@ static void skge_tx_clean(struct net_device *dev)
skge->tx_ring.to_clean = e;
}
-static void skge_tx_timeout(struct net_device *dev)
+static void skge_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct skge_port *skge = netdev_priv(dev);
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 5f56ee83e3b1..acd1cba987fb 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -2358,7 +2358,7 @@ static void sky2_qlink_intr(struct sky2_hw *hw)
/* Transmit timeout is only called if we are running, carrier is up
* and tx queue is full (stopped).
*/
-static void sky2_tx_timeout(struct net_device *dev)
+static void sky2_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct sky2_port *sky2 = netdev_priv(dev);
struct sky2_hw *hw = sky2->hw;
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 527ad2aadcca..8c6cfd15481c 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -2081,7 +2081,7 @@ static void mtk_dma_free(struct mtk_eth *eth)
kfree(eth->scratch_head);
}
-static void mtk_tx_timeout(struct net_device *dev)
+static void mtk_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mtk_mac *mac = netdev_priv(dev);
struct mtk_eth *eth = mac->hw;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index a1202e53710c..8bf1f08fdee2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -611,7 +611,7 @@ static u32 ptys_get_active_port(struct mlx4_ptys_reg *ptys_reg)
}
#define MLX4_LINK_MODES_SZ \
- (FIELD_SIZEOF(struct mlx4_ptys_reg, eth_proto_cap) * 8)
+ (sizeof_field(struct mlx4_ptys_reg, eth_proto_cap) * 8)
enum ethtool_report {
SUPPORTED = 0,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 7af75b63245f..43dcbd8214c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1363,24 +1363,18 @@ static void mlx4_en_delete_rss_steer_rules(struct mlx4_en_priv *priv)
}
}
-static void mlx4_en_tx_timeout(struct net_device *dev)
+static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_en_dev *mdev = priv->mdev;
- int i;
+ struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][txqueue];
if (netif_msg_timer(priv))
en_warn(priv, "Tx timeout called on port:%d\n", priv->port);
- for (i = 0; i < priv->tx_ring_num[TX]; i++) {
- struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i];
-
- if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
- continue;
- en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n",
- i, tx_ring->qpn, tx_ring->sp_cqn,
- tx_ring->cons, tx_ring->prod);
- }
+ en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n",
+ txqueue, tx_ring->qpn, tx_ring->sp_cqn,
+ tx_ring->cons, tx_ring->prod);
priv->port_stats.tx_timeout++;
en_dbg(DRV, priv, "Scheduling watchdog\n");
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 549f962cd86e..42198e64a7f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -71,8 +71,8 @@ static void *mlx5_dma_zalloc_coherent_node(struct mlx5_core_dev *dev,
return cpu_handle;
}
-int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
- struct mlx5_frag_buf *buf, int node)
+static int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+ struct mlx5_frag_buf *buf, int node)
{
dma_addr_t t;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 2c16add0b642..fc80b59db9a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -135,7 +135,7 @@ struct page_pool;
#define MLX5E_LOG_INDIR_RQT_SIZE 0x7
#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE)
#define MLX5E_MIN_NUM_CHANNELS 0x1
-#define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1)
+#define MLX5E_MAX_NUM_CHANNELS MLX5E_INDIR_RQT_SIZE
#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC)
#define MLX5E_TX_CQ_POLL_BUDGET 128
#define MLX5E_TX_XSK_POLL_BUDGET 64
@@ -760,7 +760,7 @@ enum {
MLX5E_STATE_OPENED,
MLX5E_STATE_DESTROYING,
MLX5E_STATE_XDP_TX_ENABLED,
- MLX5E_STATE_XDP_OPEN,
+ MLX5E_STATE_XDP_ACTIVE,
};
struct mlx5e_rqt {
@@ -1175,11 +1175,11 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv);
void mlx5e_detach_netdev(struct mlx5e_priv *priv);
void mlx5e_destroy_netdev(struct mlx5e_priv *priv);
void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv);
-void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+void mlx5e_build_nic_params(struct mlx5e_priv *priv,
struct mlx5e_xsk *xsk,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
- u16 max_channels, u16 mtu);
+ u16 mtu);
void mlx5e_build_rq_params(struct mlx5_core_dev *mdev,
struct mlx5e_params *params);
void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
index 68d593074f6c..d48292ccda29 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h
@@ -122,6 +122,22 @@ enum {
#endif
};
+#define MLX5E_TTC_NUM_GROUPS 3
+#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT)
+#define MLX5E_TTC_GROUP2_SIZE BIT(1)
+#define MLX5E_TTC_GROUP3_SIZE BIT(0)
+#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\
+ MLX5E_TTC_GROUP2_SIZE +\
+ MLX5E_TTC_GROUP3_SIZE)
+
+#define MLX5E_INNER_TTC_NUM_GROUPS 3
+#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3)
+#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1)
+#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0)
+#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\
+ MLX5E_INNER_TTC_GROUP2_SIZE +\
+ MLX5E_INNER_TTC_GROUP3_SIZE)
+
#ifdef CONFIG_MLX5_EN_RXNFC
struct mlx5e_ethtool_table {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 1d6b58860da6..3a975641f902 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -197,9 +197,10 @@ int mlx5e_health_report(struct mlx5e_priv *priv,
struct devlink_health_reporter *reporter, char *err_str,
struct mlx5e_err_ctx *err_ctx)
{
- if (!reporter) {
- netdev_err(priv->netdev, err_str);
+ netdev_err(priv->netdev, err_str);
+
+ if (!reporter)
return err_ctx->recover(&err_ctx->ctx);
- }
+
return devlink_health_report(reporter, err_str, err_ctx);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 36ac1e3816b9..d7587f40ecae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -75,12 +75,18 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
static inline void mlx5e_xdp_tx_enable(struct mlx5e_priv *priv)
{
set_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
+
+ if (priv->channels.params.xdp_prog)
+ set_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
}
static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv)
{
+ if (priv->channels.params.xdp_prog)
+ clear_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
+
clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
- /* let other device's napi(s) see our new state */
+ /* Let other device's napi(s) and XSK wakeups see our new state. */
synchronize_rcu();
}
@@ -89,19 +95,9 @@ static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv)
return test_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state);
}
-static inline void mlx5e_xdp_set_open(struct mlx5e_priv *priv)
-{
- set_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
-}
-
-static inline void mlx5e_xdp_set_closed(struct mlx5e_priv *priv)
-{
- clear_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
-}
-
-static inline bool mlx5e_xdp_is_open(struct mlx5e_priv *priv)
+static inline bool mlx5e_xdp_is_active(struct mlx5e_priv *priv)
{
- return test_bit(MLX5E_STATE_XDP_OPEN, &priv->state);
+ return test_bit(MLX5E_STATE_XDP_ACTIVE, &priv->state);
}
static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 475b6bd5d29b..62fc8a128a8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -35,7 +35,7 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
*/
dma_info->addr = xdp_umem_get_dma(umem, handle);
- xsk_umem_discard_addr_rq(umem);
+ xsk_umem_release_addr_rq(umem);
dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
DMA_BIDIRECTIONAL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
index 631af8dee517..c28cbae42331 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c
@@ -144,6 +144,7 @@ void mlx5e_close_xsk(struct mlx5e_channel *c)
{
clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state);
napi_synchronize(&c->napi);
+ synchronize_rcu(); /* Sync with the XSK wakeup. */
mlx5e_close_rq(&c->xskrq);
mlx5e_close_cq(&c->xskrq.cq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 87827477d38c..fe2d596cb361 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -14,7 +14,7 @@ int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
struct mlx5e_channel *c;
u16 ix;
- if (unlikely(!mlx5e_xdp_is_open(priv)))
+ if (unlikely(!mlx5e_xdp_is_active(priv)))
return -ENETDOWN;
if (unlikely(!mlx5e_qid_get_ch_if_in_group(params, qid, MLX5E_RQ_GROUP_XSK, &ix)))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 15b7f0f1427c..73d3dc07331f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -904,22 +904,6 @@ del_rules:
return err;
}
-#define MLX5E_TTC_NUM_GROUPS 3
-#define MLX5E_TTC_GROUP1_SIZE (BIT(3) + MLX5E_NUM_TUNNEL_TT)
-#define MLX5E_TTC_GROUP2_SIZE BIT(1)
-#define MLX5E_TTC_GROUP3_SIZE BIT(0)
-#define MLX5E_TTC_TABLE_SIZE (MLX5E_TTC_GROUP1_SIZE +\
- MLX5E_TTC_GROUP2_SIZE +\
- MLX5E_TTC_GROUP3_SIZE)
-
-#define MLX5E_INNER_TTC_NUM_GROUPS 3
-#define MLX5E_INNER_TTC_GROUP1_SIZE BIT(3)
-#define MLX5E_INNER_TTC_GROUP2_SIZE BIT(1)
-#define MLX5E_INNER_TTC_GROUP3_SIZE BIT(0)
-#define MLX5E_INNER_TTC_TABLE_SIZE (MLX5E_INNER_TTC_GROUP1_SIZE +\
- MLX5E_INNER_TTC_GROUP2_SIZE +\
- MLX5E_INNER_TTC_GROUP3_SIZE)
-
static int mlx5e_create_ttc_table_groups(struct mlx5e_ttc_table *ttc,
bool use_ipv)
{
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 4980e80a5e85..78737fd42616 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3000,12 +3000,9 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv)
int mlx5e_open_locked(struct net_device *netdev)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
- bool is_xdp = priv->channels.params.xdp_prog;
int err;
set_bit(MLX5E_STATE_OPENED, &priv->state);
- if (is_xdp)
- mlx5e_xdp_set_open(priv);
err = mlx5e_open_channels(priv, &priv->channels);
if (err)
@@ -3020,8 +3017,6 @@ int mlx5e_open_locked(struct net_device *netdev)
return 0;
err_clear_state_opened_flag:
- if (is_xdp)
- mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
return err;
}
@@ -3053,8 +3048,6 @@ int mlx5e_close_locked(struct net_device *netdev)
if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
return 0;
- if (priv->channels.params.xdp_prog)
- mlx5e_xdp_set_closed(priv);
clear_bit(MLX5E_STATE_OPENED, &priv->state);
netif_carrier_off(priv->netdev);
@@ -4332,7 +4325,7 @@ unlock:
rtnl_unlock();
}
-static void mlx5e_tx_timeout(struct net_device *dev)
+static void mlx5e_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mlx5e_priv *priv = netdev_priv(dev);
@@ -4371,16 +4364,6 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog)
return 0;
}
-static int mlx5e_xdp_update_state(struct mlx5e_priv *priv)
-{
- if (priv->channels.params.xdp_prog)
- mlx5e_xdp_set_open(priv);
- else
- mlx5e_xdp_set_closed(priv);
-
- return 0;
-}
-
static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
{
struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -4415,7 +4398,7 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
mlx5e_set_rq_type(priv->mdev, &new_channels.params);
old_prog = priv->channels.params.xdp_prog;
- err = mlx5e_safe_switch_channels(priv, &new_channels, mlx5e_xdp_update_state);
+ err = mlx5e_safe_switch_channels(priv, &new_channels, NULL);
if (err)
goto unlock;
} else {
@@ -4756,17 +4739,19 @@ void mlx5e_build_rss_params(struct mlx5e_rss_params *rss_params,
tirc_default_config[tt].rx_hash_fields;
}
-void mlx5e_build_nic_params(struct mlx5_core_dev *mdev,
+void mlx5e_build_nic_params(struct mlx5e_priv *priv,
struct mlx5e_xsk *xsk,
struct mlx5e_rss_params *rss_params,
struct mlx5e_params *params,
- u16 max_channels, u16 mtu)
+ u16 mtu)
{
+ struct mlx5_core_dev *mdev = priv->mdev;
u8 rx_cq_period_mode;
params->sw_mtu = mtu;
params->hard_mtu = MLX5E_ETH_HARD_MTU;
- params->num_channels = max_channels;
+ params->num_channels = min_t(unsigned int, MLX5E_MAX_NUM_CHANNELS / 2,
+ priv->max_nch);
params->num_tc = 1;
/* SQ */
@@ -5003,8 +4988,8 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
if (err)
return err;
- mlx5e_build_nic_params(mdev, &priv->xsk, rss, &priv->channels.params,
- priv->max_nch, netdev->mtu);
+ mlx5e_build_nic_params(priv, &priv->xsk, rss, &priv->channels.params,
+ netdev->mtu);
mlx5e_timestamp_init(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 9f09253f9f46..a05158472ed1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -297,6 +297,9 @@ static void mlx5e_grp_sw_update_stats(struct mlx5e_priv *priv)
s->tx_tls_drop_bypass_req += sq_stats->tls_drop_bypass_req;
#endif
s->tx_cqes += sq_stats->cqes;
+
+ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92657 */
+ barrier();
}
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 9b32a9c0f497..db614bd6bd1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -592,7 +592,7 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
ttc_params->indir_tirn[tt] = hp->indir_tirn[tt];
- ft_attr->max_fte = MLX5E_NUM_TT;
+ ft_attr->max_fte = MLX5E_TTC_TABLE_SIZE;
ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
ft_attr->prio = MLX5E_TC_PRIO;
}
@@ -2842,6 +2842,10 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
+ case FLOW_ACTION_ACCEPT:
+ action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+ MLX5_FLOW_CONTEXT_ACTION_COUNT;
+ break;
case FLOW_ACTION_DROP:
action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
if (MLX5_CAP_FLOWTABLE(priv->mdev,
@@ -2999,6 +3003,25 @@ static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info
return kmemdup(tun_info, tun_size, GFP_KERNEL);
}
+static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
+ struct mlx5e_tc_flow *flow,
+ int out_index,
+ struct mlx5e_encap_entry *e,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < out_index; i++) {
+ if (flow->encaps[i].e != e)
+ continue;
+ NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
+ netdev_err(priv->netdev, "can't duplicate encap action\n");
+ return true;
+ }
+
+ return false;
+}
+
static int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct net_device *mirred_dev,
@@ -3034,6 +3057,12 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
/* must verify if encap is valid or not */
if (e) {
+ /* Check that entry was not already attached to this flow */
+ if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
+ err = -EOPNOTSUPP;
+ goto out_err;
+ }
+
mutex_unlock(&esw->offloads.encap_tbl_lock);
wait_for_completion(&e->res_ready);
@@ -3220,6 +3249,26 @@ bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
same_hw_devs(priv, netdev_priv(out_dev));
}
+static bool is_duplicated_output_device(struct net_device *dev,
+ struct net_device *out_dev,
+ int *ifindexes, int if_count,
+ struct netlink_ext_ack *extack)
+{
+ int i;
+
+ for (i = 0; i < if_count; i++) {
+ if (ifindexes[i] == out_dev->ifindex) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "can't duplicate output to same device");
+ netdev_err(dev, "can't duplicate output to same device: %s\n",
+ out_dev->name);
+ return true;
+ }
+ }
+
+ return false;
+}
+
static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct flow_action *flow_action,
struct mlx5e_tc_flow *flow,
@@ -3231,11 +3280,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
struct mlx5e_rep_priv *rpriv = priv->ppriv;
const struct ip_tunnel_info *info = NULL;
+ int ifindexes[MLX5_MAX_FLOW_FWD_VPORTS];
bool ft_flow = mlx5e_is_ft_flow(flow);
const struct flow_action_entry *act;
+ int err, i, if_count = 0;
bool encap = false;
u32 action = 0;
- int err, i;
if (!flow_action_has_entries(flow_action))
return -EINVAL;
@@ -3312,6 +3362,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
struct net_device *uplink_upper;
+ if (is_duplicated_output_device(priv->netdev,
+ out_dev,
+ ifindexes,
+ if_count,
+ extack))
+ return -EOPNOTSUPP;
+
+ ifindexes[if_count] = out_dev->ifindex;
+ if_count++;
+
rcu_read_lock();
uplink_upper =
netdev_master_upper_dev_get_rcu(uplink_dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 580c71cb9dfa..cccea3a8eddd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -156,7 +156,8 @@ static int mlx5_eq_comp_int(struct notifier_block *nb,
cq->comp(cq, eqe);
mlx5_cq_put(cq);
} else {
- mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
+ dev_dbg_ratelimited(eq->dev->device,
+ "Completion event for bogus CQ 0x%x\n", cqn);
}
++eq->cons_index;
@@ -563,6 +564,39 @@ static void gather_async_events_mask(struct mlx5_core_dev *dev, u64 mask[4])
gather_user_async_events(dev, mask);
}
+static int
+setup_async_eq(struct mlx5_core_dev *dev, struct mlx5_eq_async *eq,
+ struct mlx5_eq_param *param, const char *name)
+{
+ int err;
+
+ eq->irq_nb.notifier_call = mlx5_eq_async_int;
+
+ err = create_async_eq(dev, &eq->core, param);
+ if (err) {
+ mlx5_core_warn(dev, "failed to create %s EQ %d\n", name, err);
+ return err;
+ }
+ err = mlx5_eq_enable(dev, &eq->core, &eq->irq_nb);
+ if (err) {
+ mlx5_core_warn(dev, "failed to enable %s EQ %d\n", name, err);
+ destroy_async_eq(dev, &eq->core);
+ }
+ return err;
+}
+
+static void cleanup_async_eq(struct mlx5_core_dev *dev,
+ struct mlx5_eq_async *eq, const char *name)
+{
+ int err;
+
+ mlx5_eq_disable(dev, &eq->core, &eq->irq_nb);
+ err = destroy_async_eq(dev, &eq->core);
+ if (err)
+ mlx5_core_err(dev, "failed to destroy %s eq, err(%d)\n",
+ name, err);
+}
+
static int create_async_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -572,77 +606,45 @@ static int create_async_eqs(struct mlx5_core_dev *dev)
MLX5_NB_INIT(&table->cq_err_nb, cq_err_event_notifier, CQ_ERROR);
mlx5_eq_notifier_register(dev, &table->cq_err_nb);
- table->cmd_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
.irq_index = 0,
.nent = MLX5_NUM_CMD_EQE,
+ .mask[0] = 1ull << MLX5_EVENT_TYPE_CMD,
};
-
- param.mask[0] = 1ull << MLX5_EVENT_TYPE_CMD;
- err = create_async_eq(dev, &table->cmd_eq.core, &param);
- if (err) {
- mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
- goto err0;
- }
- err = mlx5_eq_enable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
- if (err) {
- mlx5_core_warn(dev, "failed to enable cmd EQ %d\n", err);
+ err = setup_async_eq(dev, &table->cmd_eq, &param, "cmd");
+ if (err)
goto err1;
- }
+
mlx5_cmd_use_events(dev);
- table->async_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
.irq_index = 0,
.nent = MLX5_NUM_ASYNC_EQE,
};
gather_async_events_mask(dev, param.mask);
- err = create_async_eq(dev, &table->async_eq.core, &param);
- if (err) {
- mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
+ err = setup_async_eq(dev, &table->async_eq, &param, "async");
+ if (err)
goto err2;
- }
- err = mlx5_eq_enable(dev, &table->async_eq.core,
- &table->async_eq.irq_nb);
- if (err) {
- mlx5_core_warn(dev, "failed to enable async EQ %d\n", err);
- goto err3;
- }
- table->pages_eq.irq_nb.notifier_call = mlx5_eq_async_int;
param = (struct mlx5_eq_param) {
.irq_index = 0,
.nent = /* TODO: sriov max_vf + */ 1,
+ .mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST,
};
- param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_REQUEST;
- err = create_async_eq(dev, &table->pages_eq.core, &param);
- if (err) {
- mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
- goto err4;
- }
- err = mlx5_eq_enable(dev, &table->pages_eq.core,
- &table->pages_eq.irq_nb);
- if (err) {
- mlx5_core_warn(dev, "failed to enable pages EQ %d\n", err);
- goto err5;
- }
+ err = setup_async_eq(dev, &table->pages_eq, &param, "pages");
+ if (err)
+ goto err3;
- return err;
+ return 0;
-err5:
- destroy_async_eq(dev, &table->pages_eq.core);
-err4:
- mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
err3:
- destroy_async_eq(dev, &table->async_eq.core);
+ cleanup_async_eq(dev, &table->async_eq, "async");
err2:
mlx5_cmd_use_polling(dev);
- mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
+ cleanup_async_eq(dev, &table->cmd_eq, "cmd");
err1:
- destroy_async_eq(dev, &table->cmd_eq.core);
-err0:
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
return err;
}
@@ -650,28 +652,11 @@ err0:
static void destroy_async_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- int err;
-
- mlx5_eq_disable(dev, &table->pages_eq.core, &table->pages_eq.irq_nb);
- err = destroy_async_eq(dev, &table->pages_eq.core);
- if (err)
- mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
- err);
-
- mlx5_eq_disable(dev, &table->async_eq.core, &table->async_eq.irq_nb);
- err = destroy_async_eq(dev, &table->async_eq.core);
- if (err)
- mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
- err);
+ cleanup_async_eq(dev, &table->pages_eq, "pages");
+ cleanup_async_eq(dev, &table->async_eq, "async");
mlx5_cmd_use_polling(dev);
-
- mlx5_eq_disable(dev, &table->cmd_eq.core, &table->cmd_eq.irq_nb);
- err = destroy_async_eq(dev, &table->cmd_eq.core);
- if (err)
- mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
- err);
-
+ cleanup_async_eq(dev, &table->cmd_eq, "cmd");
mlx5_eq_notifier_unregister(dev, &table->cq_err_nb);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index c76da309506b..e4ec0e03c289 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -87,10 +87,10 @@ static const struct rhashtable_params rhash_sa = {
* value is not constant during the lifetime
* of the key object.
*/
- .key_len = FIELD_SIZEOF(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) -
- FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
+ .key_len = sizeof_field(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) -
+ sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
.key_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hw_sa) +
- FIELD_SIZEOF(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
+ sizeof_field(struct mlx5_ifc_fpga_ipsec_sa_v1, cmd),
.head_offset = offsetof(struct mlx5_fpga_ipsec_sa_ctx, hash),
.automatic_shrinking = true,
.min_size = 1,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d60577484567..8c5df6c7d7b6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -209,7 +209,7 @@ enum fs_i_lock_class {
};
static const struct rhashtable_params rhash_fte = {
- .key_len = FIELD_SIZEOF(struct fs_fte, val),
+ .key_len = sizeof_field(struct fs_fte, val),
.key_offset = offsetof(struct fs_fte, val),
.head_offset = offsetof(struct fs_fte, hash),
.automatic_shrinking = true,
@@ -217,7 +217,7 @@ static const struct rhashtable_params rhash_fte = {
};
static const struct rhashtable_params rhash_fg = {
- .key_len = FIELD_SIZEOF(struct mlx5_flow_group, mask),
+ .key_len = sizeof_field(struct mlx5_flow_group, mask),
.key_offset = offsetof(struct mlx5_flow_group, mask),
.head_offset = offsetof(struct mlx5_flow_group, hash),
.automatic_shrinking = true,
@@ -531,16 +531,9 @@ static void del_hw_fte(struct fs_node *node)
}
}
-static void del_sw_fte_rcu(struct rcu_head *head)
-{
- struct fs_fte *fte = container_of(head, struct fs_fte, rcu);
- struct mlx5_flow_steering *steering = get_steering(&fte->node);
-
- kmem_cache_free(steering->ftes_cache, fte);
-}
-
static void del_sw_fte(struct fs_node *node)
{
+ struct mlx5_flow_steering *steering = get_steering(node);
struct mlx5_flow_group *fg;
struct fs_fte *fte;
int err;
@@ -553,8 +546,7 @@ static void del_sw_fte(struct fs_node *node)
rhash_fte);
WARN_ON(err);
ida_simple_remove(&fg->fte_allocator, fte->index - fg->start_index);
-
- call_rcu(&fte->rcu, del_sw_fte_rcu);
+ kmem_cache_free(steering->ftes_cache, fte);
}
static void del_hw_flow_group(struct fs_node *node)
@@ -1633,47 +1625,22 @@ static u64 matched_fgs_get_version(struct list_head *match_head)
}
static struct fs_fte *
-lookup_fte_for_write_locked(struct mlx5_flow_group *g, const u32 *match_value)
+lookup_fte_locked(struct mlx5_flow_group *g,
+ const u32 *match_value,
+ bool take_write)
{
struct fs_fte *fte_tmp;
- nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
-
- fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value, rhash_fte);
- if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
- fte_tmp = NULL;
- goto out;
- }
-
- if (!fte_tmp->node.active) {
- tree_put_node(&fte_tmp->node, false);
- fte_tmp = NULL;
- goto out;
- }
- nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
-
-out:
- up_write_ref_node(&g->node, false);
- return fte_tmp;
-}
-
-static struct fs_fte *
-lookup_fte_for_read_locked(struct mlx5_flow_group *g, const u32 *match_value)
-{
- struct fs_fte *fte_tmp;
-
- if (!tree_get_node(&g->node))
- return NULL;
-
- rcu_read_lock();
- fte_tmp = rhashtable_lookup(&g->ftes_hash, match_value, rhash_fte);
+ if (take_write)
+ nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
+ else
+ nested_down_read_ref_node(&g->node, FS_LOCK_PARENT);
+ fte_tmp = rhashtable_lookup_fast(&g->ftes_hash, match_value,
+ rhash_fte);
if (!fte_tmp || !tree_get_node(&fte_tmp->node)) {
- rcu_read_unlock();
fte_tmp = NULL;
goto out;
}
- rcu_read_unlock();
-
if (!fte_tmp->node.active) {
tree_put_node(&fte_tmp->node, false);
fte_tmp = NULL;
@@ -1681,19 +1648,12 @@ lookup_fte_for_read_locked(struct mlx5_flow_group *g, const u32 *match_value)
}
nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD);
-
out:
- tree_put_node(&g->node, false);
- return fte_tmp;
-}
-
-static struct fs_fte *
-lookup_fte_locked(struct mlx5_flow_group *g, const u32 *match_value, bool write)
-{
- if (write)
- return lookup_fte_for_write_locked(g, match_value);
+ if (take_write)
+ up_write_ref_node(&g->node, false);
else
- return lookup_fte_for_read_locked(g, match_value);
+ up_read_ref_node(&g->node);
+ return fte_tmp;
}
static struct mlx5_flow_handle *
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index e8cd997f413e..c2621b911563 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -203,7 +203,6 @@ struct fs_fte {
enum fs_fte_status status;
struct mlx5_fc *counter;
struct rhash_head hash;
- struct rcu_head rcu;
int modify_mask;
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 3ed8ab2d703d..7c87f523e370 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -87,8 +87,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
mlx5e_set_netdev_mtu_boundaries(priv);
netdev->mtu = netdev->max_mtu;
- mlx5e_build_nic_params(mdev, NULL, &priv->rss_params, &priv->channels.params,
- priv->max_nch, netdev->mtu);
+ mlx5e_build_nic_params(priv, NULL, &priv->rss_params, &priv->channels.params,
+ netdev->mtu);
mlx5i_build_nic_params(mdev, &priv->channels.params);
mlx5e_timestamp_init(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
index b70afa310ad2..416676c35b1f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c
@@ -200,8 +200,6 @@ static void mlx5_lag_fib_update(struct work_struct *work)
rtnl_lock();
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
- case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
mlx5_lag_fib_route_event(ldev, fib_work->event,
fib_work->fen_info.fi);
@@ -259,8 +257,6 @@ static int mlx5_lag_fib_event(struct notifier_block *nb,
switch (event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
- case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 173e2c12e1c7..cf7b8da0f010 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1193,6 +1193,12 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
if (err)
goto err_load;
+ if (boot) {
+ err = mlx5_devlink_register(priv_to_devlink(dev), dev->device);
+ if (err)
+ goto err_devlink_reg;
+ }
+
if (mlx5_device_registered(dev)) {
mlx5_attach_device(dev);
} else {
@@ -1210,6 +1216,9 @@ out:
return err;
err_reg_dev:
+ if (boot)
+ mlx5_devlink_unregister(priv_to_devlink(dev));
+err_devlink_reg:
mlx5_unload(dev);
err_load:
if (boot)
@@ -1347,10 +1356,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
request_module_nowait(MLX5_IB_MOD);
- err = mlx5_devlink_register(devlink, &pdev->dev);
- if (err)
- goto clean_load;
-
err = mlx5_crdump_enable(dev);
if (err)
dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err);
@@ -1358,9 +1363,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id)
pci_save_state(pdev);
return 0;
-clean_load:
- mlx5_unload_one(dev, true);
-
err_load_one:
mlx5_pci_close(dev);
pci_init_err:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 004c56c2fc0c..9359eed10889 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -690,9 +690,9 @@ int mlx5dr_actions_build_ste_arr(struct mlx5dr_matcher *matcher,
/* get the relevant addresses */
if (!action->dest_tbl.fw_tbl.rx_icm_addr) {
- ret = mlx5dr_cmd_query_flow_table(action->dest_tbl.fw_tbl.mdev,
- action->dest_tbl.fw_tbl.ft->type,
- action->dest_tbl.fw_tbl.ft->id,
+ ret = mlx5dr_cmd_query_flow_table(dmn->mdev,
+ action->dest_tbl.fw_tbl.type,
+ action->dest_tbl.fw_tbl.id,
&output);
if (!ret) {
action->dest_tbl.fw_tbl.tx_icm_addr =
@@ -982,8 +982,106 @@ dec_ref:
}
struct mlx5dr_action *
-mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
- struct mlx5_core_dev *mdev)
+mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action_dest *dests,
+ u32 num_of_dests)
+{
+ struct mlx5dr_cmd_flow_destination_hw_info *hw_dests;
+ struct mlx5dr_action **ref_actions;
+ struct mlx5dr_action *action;
+ bool reformat_req = false;
+ u32 num_of_ref = 0;
+ int ret;
+ int i;
+
+ if (dmn->type != MLX5DR_DOMAIN_TYPE_FDB) {
+ mlx5dr_err(dmn, "Multiple destination support is for FDB only\n");
+ return NULL;
+ }
+
+ hw_dests = kzalloc(sizeof(*hw_dests) * num_of_dests, GFP_KERNEL);
+ if (!hw_dests)
+ return NULL;
+
+ ref_actions = kzalloc(sizeof(*ref_actions) * num_of_dests * 2, GFP_KERNEL);
+ if (!ref_actions)
+ goto free_hw_dests;
+
+ for (i = 0; i < num_of_dests; i++) {
+ struct mlx5dr_action *reformat_action = dests[i].reformat;
+ struct mlx5dr_action *dest_action = dests[i].dest;
+
+ ref_actions[num_of_ref++] = dest_action;
+
+ switch (dest_action->action_type) {
+ case DR_ACTION_TYP_VPORT:
+ hw_dests[i].vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+ hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+ hw_dests[i].vport.num = dest_action->vport.caps->num;
+ hw_dests[i].vport.vhca_id = dest_action->vport.caps->vhca_gvmi;
+ if (reformat_action) {
+ reformat_req = true;
+ hw_dests[i].vport.reformat_id =
+ reformat_action->reformat.reformat_id;
+ ref_actions[num_of_ref++] = reformat_action;
+ hw_dests[i].vport.flags |= MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+ }
+ break;
+
+ case DR_ACTION_TYP_FT:
+ hw_dests[i].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ if (dest_action->dest_tbl.is_fw_tbl)
+ hw_dests[i].ft_id = dest_action->dest_tbl.fw_tbl.id;
+ else
+ hw_dests[i].ft_id = dest_action->dest_tbl.tbl->table_id;
+ break;
+
+ default:
+ mlx5dr_dbg(dmn, "Invalid multiple destinations action\n");
+ goto free_ref_actions;
+ }
+ }
+
+ action = dr_action_create_generic(DR_ACTION_TYP_FT);
+ if (!action)
+ goto free_ref_actions;
+
+ ret = mlx5dr_fw_create_md_tbl(dmn,
+ hw_dests,
+ num_of_dests,
+ reformat_req,
+ &action->dest_tbl.fw_tbl.id,
+ &action->dest_tbl.fw_tbl.group_id);
+ if (ret)
+ goto free_action;
+
+ refcount_inc(&dmn->refcount);
+
+ for (i = 0; i < num_of_ref; i++)
+ refcount_inc(&ref_actions[i]->refcount);
+
+ action->dest_tbl.is_fw_tbl = true;
+ action->dest_tbl.fw_tbl.dmn = dmn;
+ action->dest_tbl.fw_tbl.type = FS_FT_FDB;
+ action->dest_tbl.fw_tbl.ref_actions = ref_actions;
+ action->dest_tbl.fw_tbl.num_of_ref_actions = num_of_ref;
+
+ kfree(hw_dests);
+
+ return action;
+
+free_action:
+ kfree(action);
+free_ref_actions:
+ kfree(ref_actions);
+free_hw_dests:
+ kfree(hw_dests);
+ return NULL;
+}
+
+struct mlx5dr_action *
+mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *dmn,
+ struct mlx5_flow_table *ft)
{
struct mlx5dr_action *action;
@@ -992,8 +1090,11 @@ mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
return NULL;
action->dest_tbl.is_fw_tbl = 1;
- action->dest_tbl.fw_tbl.ft = ft;
- action->dest_tbl.fw_tbl.mdev = mdev;
+ action->dest_tbl.fw_tbl.type = ft->type;
+ action->dest_tbl.fw_tbl.id = ft->id;
+ action->dest_tbl.fw_tbl.dmn = dmn;
+
+ refcount_inc(&dmn->refcount);
return action;
}
@@ -1559,8 +1660,26 @@ int mlx5dr_action_destroy(struct mlx5dr_action *action)
switch (action->action_type) {
case DR_ACTION_TYP_FT:
- if (!action->dest_tbl.is_fw_tbl)
+ if (action->dest_tbl.is_fw_tbl)
+ refcount_dec(&action->dest_tbl.fw_tbl.dmn->refcount);
+ else
refcount_dec(&action->dest_tbl.tbl->refcount);
+
+ if (action->dest_tbl.is_fw_tbl &&
+ action->dest_tbl.fw_tbl.num_of_ref_actions) {
+ struct mlx5dr_action **ref_actions;
+ int i;
+
+ ref_actions = action->dest_tbl.fw_tbl.ref_actions;
+ for (i = 0; i < action->dest_tbl.fw_tbl.num_of_ref_actions; i++)
+ refcount_dec(&ref_actions[i]->refcount);
+
+ kfree(ref_actions);
+
+ mlx5dr_fw_destroy_md_tbl(action->dest_tbl.fw_tbl.dmn,
+ action->dest_tbl.fw_tbl.id,
+ action->dest_tbl.fw_tbl.group_id);
+ }
break;
case DR_ACTION_TYP_TNL_L2_TO_L2:
refcount_dec(&action->reformat.dmn->refcount);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 41662c4e2664..461b39376daf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -320,12 +320,7 @@ int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
}
int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
- u32 table_type,
- u64 icm_addr_rx,
- u64 icm_addr_tx,
- u8 level,
- bool sw_owner,
- bool term_tbl,
+ struct mlx5dr_cmd_create_flow_table_attr *attr,
u64 *fdb_rx_icm_addr,
u32 *table_id)
{
@@ -335,37 +330,43 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
int err;
MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE);
- MLX5_SET(create_flow_table_in, in, table_type, table_type);
+ MLX5_SET(create_flow_table_in, in, table_type, attr->table_type);
ft_mdev = MLX5_ADDR_OF(create_flow_table_in, in, flow_table_context);
- MLX5_SET(flow_table_context, ft_mdev, termination_table, term_tbl);
- MLX5_SET(flow_table_context, ft_mdev, sw_owner, sw_owner);
- MLX5_SET(flow_table_context, ft_mdev, level, level);
+ MLX5_SET(flow_table_context, ft_mdev, termination_table, attr->term_tbl);
+ MLX5_SET(flow_table_context, ft_mdev, sw_owner, attr->sw_owner);
+ MLX5_SET(flow_table_context, ft_mdev, level, attr->level);
- if (sw_owner) {
+ if (attr->sw_owner) {
/* icm_addr_0 used for FDB RX / NIC TX / NIC_RX
* icm_addr_1 used for FDB TX
*/
- if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) {
+ if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_RX) {
MLX5_SET64(flow_table_context, ft_mdev,
- sw_owner_icm_root_0, icm_addr_rx);
- } else if (table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) {
+ sw_owner_icm_root_0, attr->icm_addr_rx);
+ } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_NIC_TX) {
MLX5_SET64(flow_table_context, ft_mdev,
- sw_owner_icm_root_0, icm_addr_tx);
- } else if (table_type == MLX5_FLOW_TABLE_TYPE_FDB) {
+ sw_owner_icm_root_0, attr->icm_addr_tx);
+ } else if (attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB) {
MLX5_SET64(flow_table_context, ft_mdev,
- sw_owner_icm_root_0, icm_addr_rx);
+ sw_owner_icm_root_0, attr->icm_addr_rx);
MLX5_SET64(flow_table_context, ft_mdev,
- sw_owner_icm_root_1, icm_addr_tx);
+ sw_owner_icm_root_1, attr->icm_addr_tx);
}
}
+ MLX5_SET(create_flow_table_in, in, flow_table_context.decap_en,
+ attr->decap_en);
+ MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
+ attr->reformat_en);
+
err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
if (err)
return err;
*table_id = MLX5_GET(create_flow_table_out, out, table_id);
- if (!sw_owner && table_type == MLX5_FLOW_TABLE_TYPE_FDB)
+ if (!attr->sw_owner && attr->table_type == MLX5_FLOW_TABLE_TYPE_FDB &&
+ fdb_rx_icm_addr)
*fdb_rx_icm_addr =
(u64)MLX5_GET(create_flow_table_out, out, icm_address_31_0) |
(u64)MLX5_GET(create_flow_table_out, out, icm_address_39_32) << 32 |
@@ -478,3 +479,208 @@ int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
return 0;
}
+
+static int mlx5dr_cmd_set_extended_dest(struct mlx5_core_dev *dev,
+ struct mlx5dr_cmd_fte_info *fte,
+ bool *extended_dest)
+{
+ int fw_log_max_fdb_encap_uplink = MLX5_CAP_ESW(dev, log_max_fdb_encap_uplink);
+ int num_fwd_destinations = 0;
+ int num_encap = 0;
+ int i;
+
+ *extended_dest = false;
+ if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST))
+ return 0;
+ for (i = 0; i < fte->dests_size; i++) {
+ if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+ if (fte->dest_arr[i].type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ fte->dest_arr[i].vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+ num_encap++;
+ num_fwd_destinations++;
+ }
+
+ if (num_fwd_destinations > 1 && num_encap > 0)
+ *extended_dest = true;
+
+ if (*extended_dest && !fw_log_max_fdb_encap_uplink) {
+ mlx5_core_warn(dev, "FW does not support extended destination");
+ return -EOPNOTSUPP;
+ }
+ if (num_encap > (1 << fw_log_max_fdb_encap_uplink)) {
+ mlx5_core_warn(dev, "FW does not support more than %d encaps",
+ 1 << fw_log_max_fdb_encap_uplink);
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
+int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
+ int opmod, int modify_mask,
+ struct mlx5dr_cmd_ft_info *ft,
+ u32 group_id,
+ struct mlx5dr_cmd_fte_info *fte)
+{
+ u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {};
+ void *in_flow_context, *vlan;
+ bool extended_dest = false;
+ void *in_match_value;
+ unsigned int inlen;
+ int dst_cnt_size;
+ void *in_dests;
+ u32 *in;
+ int err;
+ int i;
+
+ if (mlx5dr_cmd_set_extended_dest(dev, fte, &extended_dest))
+ return -EOPNOTSUPP;
+
+ if (!extended_dest)
+ dst_cnt_size = MLX5_ST_SZ_BYTES(dest_format_struct);
+ else
+ dst_cnt_size = MLX5_ST_SZ_BYTES(extended_dest_format);
+
+ inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * dst_cnt_size;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(set_fte_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY);
+ MLX5_SET(set_fte_in, in, op_mod, opmod);
+ MLX5_SET(set_fte_in, in, modify_enable_mask, modify_mask);
+ MLX5_SET(set_fte_in, in, table_type, ft->type);
+ MLX5_SET(set_fte_in, in, table_id, ft->id);
+ MLX5_SET(set_fte_in, in, flow_index, fte->index);
+ if (ft->vport) {
+ MLX5_SET(set_fte_in, in, vport_number, ft->vport);
+ MLX5_SET(set_fte_in, in, other_vport, 1);
+ }
+
+ in_flow_context = MLX5_ADDR_OF(set_fte_in, in, flow_context);
+ MLX5_SET(flow_context, in_flow_context, group_id, group_id);
+
+ MLX5_SET(flow_context, in_flow_context, flow_tag,
+ fte->flow_context.flow_tag);
+ MLX5_SET(flow_context, in_flow_context, flow_source,
+ fte->flow_context.flow_source);
+
+ MLX5_SET(flow_context, in_flow_context, extended_destination,
+ extended_dest);
+ if (extended_dest) {
+ u32 action;
+
+ action = fte->action.action &
+ ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ MLX5_SET(flow_context, in_flow_context, action, action);
+ } else {
+ MLX5_SET(flow_context, in_flow_context, action,
+ fte->action.action);
+ if (fte->action.pkt_reformat)
+ MLX5_SET(flow_context, in_flow_context, packet_reformat_id,
+ fte->action.pkt_reformat->id);
+ }
+ if (fte->action.modify_hdr)
+ MLX5_SET(flow_context, in_flow_context, modify_header_id,
+ fte->action.modify_hdr->id);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[0].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[0].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[0].prio);
+
+ vlan = MLX5_ADDR_OF(flow_context, in_flow_context, push_vlan_2);
+
+ MLX5_SET(vlan, vlan, ethtype, fte->action.vlan[1].ethtype);
+ MLX5_SET(vlan, vlan, vid, fte->action.vlan[1].vid);
+ MLX5_SET(vlan, vlan, prio, fte->action.vlan[1].prio);
+
+ in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
+ match_value);
+ memcpy(in_match_value, fte->val, sizeof(u32) * MLX5_ST_SZ_DW_MATCH_PARAM);
+
+ in_dests = MLX5_ADDR_OF(flow_context, in_flow_context, destination);
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ int list_size = 0;
+
+ for (i = 0; i < fte->dests_size; i++) {
+ unsigned int id, type = fte->dest_arr[i].type;
+
+ if (type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ switch (type) {
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM:
+ id = fte->dest_arr[i].ft_num;
+ type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
+ id = fte->dest_arr[i].ft_id;
+ break;
+ case MLX5_FLOW_DESTINATION_TYPE_VPORT:
+ id = fte->dest_arr[i].vport.num;
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id_valid,
+ !!(fte->dest_arr[i].vport.flags &
+ MLX5_FLOW_DEST_VPORT_VHCA_ID));
+ MLX5_SET(dest_format_struct, in_dests,
+ destination_eswitch_owner_vhca_id,
+ fte->dest_arr[i].vport.vhca_id);
+ if (extended_dest && (fte->dest_arr[i].vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID)) {
+ MLX5_SET(dest_format_struct, in_dests,
+ packet_reformat,
+ !!(fte->dest_arr[i].vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID));
+ MLX5_SET(extended_dest_format, in_dests,
+ packet_reformat_id,
+ fte->dest_arr[i].vport.reformat_id);
+ }
+ break;
+ default:
+ id = fte->dest_arr[i].tir_num;
+ }
+
+ MLX5_SET(dest_format_struct, in_dests, destination_type,
+ type);
+ MLX5_SET(dest_format_struct, in_dests, destination_id, id);
+ in_dests += dst_cnt_size;
+ list_size++;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, destination_list_size,
+ list_size);
+ }
+
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+ int max_list_size = BIT(MLX5_CAP_FLOWTABLE_TYPE(dev,
+ log_max_flow_counter,
+ ft->type));
+ int list_size = 0;
+
+ for (i = 0; i < fte->dests_size; i++) {
+ if (fte->dest_arr[i].type !=
+ MLX5_FLOW_DESTINATION_TYPE_COUNTER)
+ continue;
+
+ MLX5_SET(flow_counter_list, in_dests, flow_counter_id,
+ fte->dest_arr[i].counter_id);
+ in_dests += dst_cnt_size;
+ list_size++;
+ }
+ if (list_size > max_list_size) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+ MLX5_SET(flow_context, in_flow_context, flow_counter_list_size,
+ list_size);
+ }
+
+ err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+err_out:
+ kvfree(in);
+ return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
index 60ef6e6171e3..1fbcd012bb85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_fw.c
@@ -7,6 +7,7 @@
struct mlx5dr_fw_recalc_cs_ft *
mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num)
{
+ struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
struct mlx5dr_fw_recalc_cs_ft *recalc_cs_ft;
u32 table_id, group_id, modify_hdr_id;
u64 rx_icm_addr, modify_ttl_action;
@@ -16,9 +17,14 @@ mlx5dr_fw_create_recalc_cs_ft(struct mlx5dr_domain *dmn, u32 vport_num)
if (!recalc_cs_ft)
return NULL;
- ret = mlx5dr_cmd_create_flow_table(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB,
- 0, 0, dmn->info.caps.max_ft_level - 1,
- false, true, &rx_icm_addr, &table_id);
+ ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+ ft_attr.level = dmn->info.caps.max_ft_level - 1;
+ ft_attr.term_tbl = true;
+
+ ret = mlx5dr_cmd_create_flow_table(dmn->mdev,
+ &ft_attr,
+ &rx_icm_addr,
+ &table_id);
if (ret) {
mlx5dr_err(dmn, "Failed creating TTL W/A FW flow table %d\n", ret);
goto free_ttl_tbl;
@@ -91,3 +97,70 @@ void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
kfree(recalc_cs_ft);
}
+
+int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_cmd_flow_destination_hw_info *dest,
+ int num_dest,
+ bool reformat_req,
+ u32 *tbl_id,
+ u32 *group_id)
+{
+ struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
+ struct mlx5dr_cmd_fte_info fte_info = {};
+ u32 val[MLX5_ST_SZ_DW_MATCH_PARAM] = {};
+ struct mlx5dr_cmd_ft_info ft_info = {};
+ int ret;
+
+ ft_attr.table_type = MLX5_FLOW_TABLE_TYPE_FDB;
+ ft_attr.level = dmn->info.caps.max_ft_level - 2;
+ ft_attr.reformat_en = reformat_req;
+ ft_attr.decap_en = reformat_req;
+
+ ret = mlx5dr_cmd_create_flow_table(dmn->mdev, &ft_attr, NULL, tbl_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed creating multi dest FW flow table %d\n", ret);
+ return ret;
+ }
+
+ ret = mlx5dr_cmd_create_empty_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ *tbl_id, group_id);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed creating multi dest FW flow group %d\n", ret);
+ goto free_flow_table;
+ }
+
+ ft_info.id = *tbl_id;
+ ft_info.type = FS_FT_FDB;
+ fte_info.action.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+ fte_info.dests_size = num_dest;
+ fte_info.val = val;
+ fte_info.dest_arr = dest;
+
+ ret = mlx5dr_cmd_set_fte(dmn->mdev, 0, 0, &ft_info, *group_id, &fte_info);
+ if (ret) {
+ mlx5dr_err(dmn, "Failed setting fte into table %d\n", ret);
+ goto free_flow_group;
+ }
+
+ return 0;
+
+free_flow_group:
+ mlx5dr_cmd_destroy_flow_group(dmn->mdev, MLX5_FLOW_TABLE_TYPE_FDB,
+ *tbl_id, *group_id);
+free_flow_table:
+ mlx5dr_cmd_destroy_flow_table(dmn->mdev, *tbl_id,
+ MLX5_FLOW_TABLE_TYPE_FDB);
+ return ret;
+}
+
+void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn,
+ u32 tbl_id, u32 group_id)
+{
+ mlx5dr_cmd_del_flow_table_entry(dmn->mdev, FS_FT_FDB, tbl_id);
+ mlx5dr_cmd_destroy_flow_group(dmn->mdev,
+ MLX5_FLOW_TABLE_TYPE_FDB,
+ tbl_id, group_id);
+ mlx5dr_cmd_destroy_flow_table(dmn->mdev, tbl_id,
+ MLX5_FLOW_TABLE_TYPE_FDB);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index 32e94d2ee5e4..e4cff7abb348 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -209,7 +209,7 @@ static void dr_rule_rehash_copy_ste_ctrl(struct mlx5dr_matcher *matcher,
/* We need to copy the refcount since this ste
* may have been traversed several times
*/
- refcount_set(&new_ste->refcount, refcount_read(&cur_ste->refcount));
+ new_ste->refcount = cur_ste->refcount;
/* Link old STEs rule_mem list to the new ste */
mlx5dr_rule_update_rule_member(cur_ste, new_ste);
@@ -638,6 +638,9 @@ static int dr_rule_add_member(struct mlx5dr_rule_rx_tx *nic_rule,
if (!rule_mem)
return -ENOMEM;
+ INIT_LIST_HEAD(&rule_mem->list);
+ INIT_LIST_HEAD(&rule_mem->use_ste_list);
+
rule_mem->ste = ste;
list_add_tail(&rule_mem->list, &nic_rule->rule_members_list);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index a5a266983dd3..c6c7d1defbd7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -348,7 +348,7 @@ static void dr_ste_replace(struct mlx5dr_ste *dst, struct mlx5dr_ste *src)
if (dst->next_htbl)
dst->next_htbl->pointing_ste = dst;
- refcount_set(&dst->refcount, refcount_read(&src->refcount));
+ dst->refcount = src->refcount;
INIT_LIST_HEAD(&dst->rule_list);
list_splice_tail_init(&src->rule_list, &dst->rule_list);
@@ -565,7 +565,7 @@ bool mlx5dr_ste_is_not_valid_entry(u8 *p_hw_ste)
bool mlx5dr_ste_not_used_ste(struct mlx5dr_ste *ste)
{
- return !refcount_read(&ste->refcount);
+ return !ste->refcount;
}
/* Init one ste as a pattern for ste data array */
@@ -689,14 +689,14 @@ struct mlx5dr_ste_htbl *mlx5dr_ste_htbl_alloc(struct mlx5dr_icm_pool *pool,
htbl->ste_arr = chunk->ste_arr;
htbl->hw_ste_arr = chunk->hw_ste_arr;
htbl->miss_list = chunk->miss_list;
- refcount_set(&htbl->refcount, 0);
+ htbl->refcount = 0;
for (i = 0; i < chunk->num_of_entries; i++) {
struct mlx5dr_ste *ste = &htbl->ste_arr[i];
ste->hw_ste = htbl->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
ste->htbl = htbl;
- refcount_set(&ste->refcount, 0);
+ ste->refcount = 0;
INIT_LIST_HEAD(&ste->miss_list_node);
INIT_LIST_HEAD(&htbl->miss_list[i]);
INIT_LIST_HEAD(&ste->rule_list);
@@ -713,7 +713,7 @@ out_free_htbl:
int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl)
{
- if (refcount_read(&htbl->refcount))
+ if (htbl->refcount)
return -EBUSY;
mlx5dr_icm_free_chunk(htbl->chunk);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index e178d8d3dbc9..14ce2d7dbb66 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -211,6 +211,9 @@ static int dr_table_destroy_sw_owned_tbl(struct mlx5dr_table *tbl)
static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
{
+ bool en_encap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
+ bool en_decap = !!(tbl->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
+ struct mlx5dr_cmd_create_flow_table_attr ft_attr = {};
u64 icm_addr_rx = 0;
u64 icm_addr_tx = 0;
int ret;
@@ -221,18 +224,21 @@ static int dr_table_create_sw_owned_tbl(struct mlx5dr_table *tbl)
if (tbl->tx.s_anchor)
icm_addr_tx = tbl->tx.s_anchor->chunk->icm_addr;
- ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev,
- tbl->table_type,
- icm_addr_rx,
- icm_addr_tx,
- tbl->dmn->info.caps.max_ft_level - 1,
- true, false, NULL,
- &tbl->table_id);
+ ft_attr.table_type = tbl->table_type;
+ ft_attr.icm_addr_rx = icm_addr_rx;
+ ft_attr.icm_addr_tx = icm_addr_tx;
+ ft_attr.level = tbl->dmn->info.caps.max_ft_level - 1;
+ ft_attr.sw_owner = true;
+ ft_attr.decap_en = en_decap;
+ ft_attr.reformat_en = en_encap;
+
+ ret = mlx5dr_cmd_create_flow_table(tbl->dmn->mdev, &ft_attr,
+ NULL, &tbl->table_id);
return ret;
}
-struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level)
+struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level, u32 flags)
{
struct mlx5dr_table *tbl;
int ret;
@@ -245,6 +251,7 @@ struct mlx5dr_table *mlx5dr_table_create(struct mlx5dr_domain *dmn, u32 level)
tbl->dmn = dmn;
tbl->level = level;
+ tbl->flags = flags;
refcount_set(&tbl->refcount, 1);
ret = dr_table_init(tbl);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 290fe61c33d0..dffe35145d19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -123,7 +123,7 @@ struct mlx5dr_matcher_rx_tx;
struct mlx5dr_ste {
u8 *hw_ste;
/* refcount: indicates the num of rules that using this ste */
- refcount_t refcount;
+ u32 refcount;
/* attached to the miss_list head at each htbl entry */
struct list_head miss_list_node;
@@ -155,7 +155,7 @@ struct mlx5dr_ste_htbl_ctrl {
struct mlx5dr_ste_htbl {
u8 lu_type;
u16 byte_mask;
- refcount_t refcount;
+ u32 refcount;
struct mlx5dr_icm_chunk *chunk;
struct mlx5dr_ste *ste_arr;
u8 *hw_ste_arr;
@@ -206,13 +206,14 @@ int mlx5dr_ste_htbl_free(struct mlx5dr_ste_htbl *htbl);
static inline void mlx5dr_htbl_put(struct mlx5dr_ste_htbl *htbl)
{
- if (refcount_dec_and_test(&htbl->refcount))
+ htbl->refcount--;
+ if (!htbl->refcount)
mlx5dr_ste_htbl_free(htbl);
}
static inline void mlx5dr_htbl_get(struct mlx5dr_ste_htbl *htbl)
{
- refcount_inc(&htbl->refcount);
+ htbl->refcount++;
}
/* STE utils */
@@ -254,14 +255,15 @@ static inline void mlx5dr_ste_put(struct mlx5dr_ste *ste,
struct mlx5dr_matcher *matcher,
struct mlx5dr_matcher_rx_tx *nic_matcher)
{
- if (refcount_dec_and_test(&ste->refcount))
+ ste->refcount--;
+ if (!ste->refcount)
mlx5dr_ste_free(ste, matcher, nic_matcher);
}
/* initial as 0, increased only when ste appears in a new rule */
static inline void mlx5dr_ste_get(struct mlx5dr_ste *ste)
{
- refcount_inc(&ste->refcount);
+ ste->refcount++;
}
void mlx5dr_ste_set_hit_addr_by_next_htbl(u8 *hw_ste,
@@ -679,6 +681,7 @@ struct mlx5dr_table {
u32 level;
u32 table_type;
u32 table_id;
+ u32 flags;
struct list_head matcher_list;
struct mlx5dr_action *miss_action;
refcount_t refcount;
@@ -742,10 +745,14 @@ struct mlx5dr_action {
union {
struct mlx5dr_table *tbl;
struct {
- struct mlx5_flow_table *ft;
+ struct mlx5dr_domain *dmn;
+ u32 id;
+ u32 group_id;
+ enum fs_flow_table_type type;
u64 rx_icm_addr;
u64 tx_icm_addr;
- struct mlx5_core_dev *mdev;
+ struct mlx5dr_action **ref_actions;
+ u32 num_of_ref_actions;
} fw_tbl;
};
} dest_tbl;
@@ -867,6 +874,17 @@ struct mlx5dr_cmd_query_flow_table_details {
u64 sw_owner_icm_root_0;
};
+struct mlx5dr_cmd_create_flow_table_attr {
+ u32 table_type;
+ u64 icm_addr_rx;
+ u64 icm_addr_tx;
+ u8 level;
+ bool sw_owner;
+ bool term_tbl;
+ bool decap_en;
+ bool reformat_en;
+};
+
/* internal API functions */
int mlx5dr_cmd_query_device(struct mlx5_core_dev *mdev,
struct mlx5dr_cmd_caps *caps);
@@ -904,12 +922,7 @@ int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
u32 table_id,
u32 group_id);
int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
- u32 table_type,
- u64 icm_addr_rx,
- u64 icm_addr_tx,
- u8 level,
- bool sw_owner,
- bool term_tbl,
+ struct mlx5dr_cmd_create_flow_table_attr *attr,
u64 *fdb_rx_icm_addr,
u32 *table_id);
int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
@@ -1051,6 +1064,43 @@ int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
struct mlx5dr_action *action);
+struct mlx5dr_cmd_ft_info {
+ u32 id;
+ u16 vport;
+ enum fs_flow_table_type type;
+};
+
+struct mlx5dr_cmd_flow_destination_hw_info {
+ enum mlx5_flow_destination_type type;
+ union {
+ u32 tir_num;
+ u32 ft_num;
+ u32 ft_id;
+ u32 counter_id;
+ struct {
+ u16 num;
+ u16 vhca_id;
+ u32 reformat_id;
+ u8 flags;
+ } vport;
+ };
+};
+
+struct mlx5dr_cmd_fte_info {
+ u32 dests_size;
+ u32 index;
+ struct mlx5_flow_context flow_context;
+ u32 *val;
+ struct mlx5_flow_act action;
+ struct mlx5dr_cmd_flow_destination_hw_info *dest_arr;
+};
+
+int mlx5dr_cmd_set_fte(struct mlx5_core_dev *dev,
+ int opmod, int modify_mask,
+ struct mlx5dr_cmd_ft_info *ft,
+ u32 group_id,
+ struct mlx5dr_cmd_fte_info *fte);
+
struct mlx5dr_fw_recalc_cs_ft {
u64 rx_icm_addr;
u32 table_id;
@@ -1065,4 +1115,12 @@ void mlx5dr_fw_destroy_recalc_cs_ft(struct mlx5dr_domain *dmn,
int mlx5dr_domain_cache_get_recalc_cs_ft_addr(struct mlx5dr_domain *dmn,
u32 vport_num,
u64 *rx_icm_addr);
+int mlx5dr_fw_create_md_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_cmd_flow_destination_hw_info *dest,
+ int num_dest,
+ bool reformat_req,
+ u32 *tbl_id,
+ u32 *group_id);
+void mlx5dr_fw_destroy_md_tbl(struct mlx5dr_domain *dmn, u32 tbl_id,
+ u32 group_id);
#endif /* _DR_TYPES_H_ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 3d587d0bdbbe..b43275cde8bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -74,7 +74,7 @@ static int mlx5_cmd_dr_create_flow_table(struct mlx5_flow_root_namespace *ns,
next_ft);
tbl = mlx5dr_table_create(ns->fs_dr_domain.dr_domain,
- ft->level);
+ ft->level, ft->flags);
if (!tbl) {
mlx5_core_err(ns->dev, "Failed creating dr flow_table\n");
return -EINVAL;
@@ -184,13 +184,13 @@ static struct mlx5dr_action *create_vport_action(struct mlx5dr_domain *domain,
dest_attr->vport.vhca_id);
}
-static struct mlx5dr_action *create_ft_action(struct mlx5_core_dev *dev,
+static struct mlx5dr_action *create_ft_action(struct mlx5dr_domain *domain,
struct mlx5_flow_rule *dst)
{
struct mlx5_flow_table *dest_ft = dst->dest_attr.ft;
if (mlx5_dr_is_fw_table(dest_ft->flags))
- return mlx5dr_create_action_dest_flow_fw_table(dest_ft, dev);
+ return mlx5dr_action_create_dest_flow_fw_table(domain, dest_ft);
return mlx5dr_action_create_dest_table(dest_ft->fs_dr_table.dr_table);
}
@@ -206,6 +206,12 @@ static struct mlx5dr_action *create_action_push_vlan(struct mlx5dr_domain *domai
return mlx5dr_action_create_push_vlan(domain, htonl(vlan_hdr));
}
+static bool contain_vport_reformat_action(struct mlx5_flow_rule *dst)
+{
+ return dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+ dst->dest_attr.vport.flags & MLX5_FLOW_DEST_VPORT_REFORMAT_ID;
+}
+
#define MLX5_FLOW_CONTEXT_ACTION_MAX 20
static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_table *ft,
@@ -213,7 +219,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
struct fs_fte *fte)
{
struct mlx5dr_domain *domain = ns->fs_dr_domain.dr_domain;
- struct mlx5dr_action *term_action = NULL;
+ struct mlx5dr_action_dest *term_actions;
struct mlx5dr_match_parameters params;
struct mlx5_core_dev *dev = ns->dev;
struct mlx5dr_action **fs_dr_actions;
@@ -223,6 +229,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
struct mlx5dr_rule *rule;
struct mlx5_flow_rule *dst;
int fs_dr_num_actions = 0;
+ int num_term_actions = 0;
int num_actions = 0;
size_t match_sz;
int err = 0;
@@ -233,18 +240,38 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX, sizeof(*actions),
GFP_KERNEL);
- if (!actions)
- return -ENOMEM;
+ if (!actions) {
+ err = -ENOMEM;
+ goto out_err;
+ }
fs_dr_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
sizeof(*fs_dr_actions), GFP_KERNEL);
if (!fs_dr_actions) {
- kfree(actions);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto free_actions_alloc;
+ }
+
+ term_actions = kcalloc(MLX5_FLOW_CONTEXT_ACTION_MAX,
+ sizeof(*term_actions), GFP_KERNEL);
+ if (!term_actions) {
+ err = -ENOMEM;
+ goto free_fs_dr_actions_alloc;
}
match_sz = sizeof(fte->val);
+ /* Drop reformat action bit if destination vport set with reformat */
+ if (fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+ list_for_each_entry(dst, &fte->node.children, node.list) {
+ if (!contain_vport_reformat_action(dst))
+ continue;
+
+ fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+ break;
+ }
+ }
+
/* The order of the actions are must to be keep, only the following
* order is supported by SW steering:
* TX: push vlan -> modify header -> encap
@@ -335,7 +362,7 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
goto free_actions;
}
fs_dr_actions[fs_dr_num_actions++] = tmp_action;
- term_action = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
}
if (fte->flow_context.flow_tag) {
@@ -354,7 +381,8 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
enum mlx5_flow_destination_type type = dst->dest_attr.type;
u32 id;
- if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) {
+ if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX ||
+ num_term_actions >= MLX5_FLOW_CONTEXT_ACTION_MAX) {
err = -ENOSPC;
goto free_actions;
}
@@ -373,13 +401,13 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
actions[num_actions++] = tmp_action;
break;
case MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE:
- tmp_action = create_ft_action(dev, dst);
+ tmp_action = create_ft_action(domain, dst);
if (!tmp_action) {
err = -ENOMEM;
goto free_actions;
}
fs_dr_actions[fs_dr_num_actions++] = tmp_action;
- term_action = tmp_action;
+ term_actions[num_term_actions++].dest = tmp_action;
break;
case MLX5_FLOW_DESTINATION_TYPE_VPORT:
tmp_action = create_vport_action(domain, dst);
@@ -388,7 +416,14 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
goto free_actions;
}
fs_dr_actions[fs_dr_num_actions++] = tmp_action;
- term_action = tmp_action;
+ term_actions[num_term_actions].dest = tmp_action;
+
+ if (dst->dest_attr.vport.flags &
+ MLX5_FLOW_DEST_VPORT_REFORMAT_ID)
+ term_actions[num_term_actions].reformat =
+ dst->dest_attr.vport.pkt_reformat->action.dr_action;
+
+ num_term_actions++;
break;
default:
err = -EOPNOTSUPP;
@@ -399,9 +434,22 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
params.match_sz = match_sz;
params.match_buf = (u64 *)fte->val;
-
- if (term_action)
- actions[num_actions++] = term_action;
+ if (num_term_actions == 1) {
+ if (term_actions->reformat)
+ actions[num_actions++] = term_actions->reformat;
+
+ actions[num_actions++] = term_actions->dest;
+ } else if (num_term_actions > 1) {
+ tmp_action = mlx5dr_action_create_mult_dest_tbl(domain,
+ term_actions,
+ num_term_actions);
+ if (!tmp_action) {
+ err = -EOPNOTSUPP;
+ goto free_actions;
+ }
+ fs_dr_actions[fs_dr_num_actions++] = tmp_action;
+ actions[num_actions++] = tmp_action;
+ }
rule = mlx5dr_rule_create(group->fs_dr_matcher.dr_matcher,
&params,
@@ -412,7 +460,9 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
goto free_actions;
}
+ kfree(term_actions);
kfree(actions);
+
fte->fs_dr_rule.dr_rule = rule;
fte->fs_dr_rule.num_actions = fs_dr_num_actions;
fte->fs_dr_rule.dr_actions = fs_dr_actions;
@@ -420,13 +470,18 @@ static int mlx5_cmd_dr_create_fte(struct mlx5_flow_root_namespace *ns,
return 0;
free_actions:
- for (i = 0; i < fs_dr_num_actions; i++)
+ /* Free in reverse order to handle action dependencies */
+ for (i = fs_dr_num_actions - 1; i >= 0; i--)
if (!IS_ERR_OR_NULL(fs_dr_actions[i]))
mlx5dr_action_destroy(fs_dr_actions[i]);
- mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err);
- kfree(actions);
+ kfree(term_actions);
+free_fs_dr_actions_alloc:
kfree(fs_dr_actions);
+free_actions_alloc:
+ kfree(actions);
+out_err:
+ mlx5_core_err(dev, "Failed to create dr rule err(%d)\n", err);
return err;
}
@@ -533,7 +588,8 @@ static int mlx5_cmd_dr_delete_fte(struct mlx5_flow_root_namespace *ns,
if (err)
return err;
- for (i = 0; i < rule->num_actions; i++)
+ /* Free in reverse order to handle action dependencies */
+ for (i = rule->num_actions - 1; i >= 0; i--)
if (!IS_ERR_OR_NULL(rule->dr_actions[i]))
mlx5dr_action_destroy(rule->dr_actions[i]);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index adda9cbfba45..e1edc9c247b7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -33,6 +33,11 @@ struct mlx5dr_match_parameters {
u64 *match_buf; /* Device spec format */
};
+struct mlx5dr_action_dest {
+ struct mlx5dr_action *dest;
+ struct mlx5dr_action *reformat;
+};
+
#ifdef CONFIG_MLX5_SW_STEERING
struct mlx5dr_domain *
@@ -46,7 +51,7 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn);
struct mlx5dr_table *
-mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level);
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags);
int mlx5dr_table_destroy(struct mlx5dr_table *table);
@@ -75,14 +80,19 @@ struct mlx5dr_action *
mlx5dr_action_create_dest_table(struct mlx5dr_table *table);
struct mlx5dr_action *
-mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
- struct mlx5_core_dev *mdev);
+mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain,
+ struct mlx5_flow_table *ft);
struct mlx5dr_action *
mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
u32 vport, u8 vhca_id_valid,
u16 vhca_id);
+struct mlx5dr_action *
+mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action_dest *dests,
+ u32 num_of_dests);
+
struct mlx5dr_action *mlx5dr_action_create_drop(void);
struct mlx5dr_action *mlx5dr_action_create_tag(u32 tag_value);
@@ -131,7 +141,7 @@ mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn) { }
static inline struct mlx5dr_table *
-mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level) { return NULL; }
+mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags) { return NULL; }
static inline int
mlx5dr_table_destroy(struct mlx5dr_table *table) { return 0; }
@@ -165,8 +175,8 @@ static inline struct mlx5dr_action *
mlx5dr_action_create_dest_table(struct mlx5dr_table *table) { return NULL; }
static inline struct mlx5dr_action *
-mlx5dr_create_action_dest_flow_fw_table(struct mlx5_flow_table *ft,
- struct mlx5_core_dev *mdev) { return NULL; }
+mlx5dr_action_create_dest_flow_fw_table(struct mlx5dr_domain *domain,
+ struct mlx5_flow_table *ft) { return NULL; }
static inline struct mlx5dr_action *
mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
@@ -174,6 +184,11 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *domain,
u16 vhca_id) { return NULL; }
static inline struct mlx5dr_action *
+mlx5dr_action_create_mult_dest_tbl(struct mlx5dr_domain *dmn,
+ struct mlx5dr_action_dest *dests,
+ u32 num_of_dests) { return NULL; }
+
+static inline struct mlx5dr_action *
mlx5dr_action_create_drop(void) { return NULL; }
static inline struct mlx5dr_action *
diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
index 544344ac4894..79057af4fe99 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c
@@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/netlink.h>
+#include <linux/vmalloc.h>
#include <linux/xz.h>
#include "mlxfw_mfa2.h"
#include "mlxfw_mfa2_file.h"
@@ -548,7 +549,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file,
comp_size = be32_to_cpu(comp->size);
comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len;
- comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL);
+ comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size);
if (!comp_data)
return ERR_PTR(-ENOMEM);
comp_data->comp.data_size = comp_size;
@@ -570,7 +571,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file,
comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len;
return &comp_data->comp;
err_out:
- kfree(comp_data);
+ vfree(comp_data);
return ERR_PTR(err);
}
@@ -579,7 +580,7 @@ void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp)
const struct mlxfw_mfa2_comp_data *comp_data;
comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp);
- kfree(comp_data);
+ vfree(comp_data);
}
void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 5294a1622643..fd59280cf979 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3477,10 +3477,10 @@ MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
MLXSW_ITEM32(reg, qeec, local_port, 0x00, 16, 8);
enum mlxsw_reg_qeec_hr {
- MLXSW_REG_QEEC_HIERARCY_PORT,
- MLXSW_REG_QEEC_HIERARCY_GROUP,
- MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
- MLXSW_REG_QEEC_HIERARCY_TC,
+ MLXSW_REG_QEEC_HR_PORT,
+ MLXSW_REG_QEEC_HR_GROUP,
+ MLXSW_REG_QEEC_HR_SUBGROUP,
+ MLXSW_REG_QEEC_HR_TC,
};
/* reg_qeec_element_hierarchy
@@ -3618,8 +3618,7 @@ static inline void mlxsw_reg_qeec_ptps_pack(char *payload, u8 local_port,
{
MLXSW_REG_ZERO(qeec, payload);
mlxsw_reg_qeec_local_port_set(payload, local_port);
- mlxsw_reg_qeec_element_hierarchy_set(payload,
- MLXSW_REG_QEEC_HIERARCY_PORT);
+ mlxsw_reg_qeec_element_hierarchy_set(payload, MLXSW_REG_QEEC_HR_PORT);
mlxsw_reg_qeec_ptps_set(payload, ptps);
}
@@ -3749,6 +3748,38 @@ mlxsw_reg_qpdsm_prio_pack(char *payload, unsigned short prio, u8 dscp)
mlxsw_reg_qpdsm_prio_entry_color2_dscp_set(payload, prio, dscp);
}
+/* QPDP - QoS Port DSCP to Priority Mapping Register
+ * -------------------------------------------------
+ * This register controls the port default Switch Priority and Color. The
+ * default Switch Priority and Color are used for frames where the trust state
+ * uses default values. All member ports of a LAG should be configured with the
+ * same default values.
+ */
+#define MLXSW_REG_QPDP_ID 0x4007
+#define MLXSW_REG_QPDP_LEN 0x8
+
+MLXSW_REG_DEFINE(qpdp, MLXSW_REG_QPDP_ID, MLXSW_REG_QPDP_LEN);
+
+/* reg_qpdp_local_port
+ * Local Port. Supported for data packets from CPU port.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpdp, local_port, 0x00, 16, 8);
+
+/* reg_qpdp_switch_prio
+ * Default port Switch Priority (default 0)
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpdp, switch_prio, 0x04, 0, 4);
+
+static inline void mlxsw_reg_qpdp_pack(char *payload, u8 local_port,
+ u8 switch_prio)
+{
+ MLXSW_REG_ZERO(qpdp, payload);
+ mlxsw_reg_qpdp_local_port_set(payload, local_port);
+ mlxsw_reg_qpdp_switch_prio_set(payload, switch_prio);
+}
+
/* QPDPM - QoS Port DSCP to Priority Mapping Register
* --------------------------------------------------
* This register controls the mapping from DSCP field to
@@ -5472,6 +5503,7 @@ enum mlxsw_reg_htgt_trap_group {
MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
+ MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP,
__MLXSW_REG_HTGT_TRAP_GROUP_MAX,
MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
@@ -10580,6 +10612,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
MLXSW_REG(qeec),
MLXSW_REG(qrwe),
MLXSW_REG(qpdsm),
+ MLXSW_REG(qpdp),
MLXSW_REG(qpdpm),
MLXSW_REG(qtctm),
MLXSW_REG(qpsc),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 556dca328bb5..2f0b516ee03f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -45,11 +45,9 @@
#include "spectrum_ptp.h"
#include "../mlxfw/mlxfw.h"
-#define MLXSW_SP_FWREV_MINOR_TO_BRANCH(minor) ((minor) / 100)
-
#define MLXSW_SP1_FWREV_MAJOR 13
#define MLXSW_SP1_FWREV_MINOR 2000
-#define MLXSW_SP1_FWREV_SUBMINOR 2308
+#define MLXSW_SP1_FWREV_SUBMINOR 2714
#define MLXSW_SP1_FWREV_CAN_RESET_MINOR 1702
static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
@@ -66,7 +64,7 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
#define MLXSW_SP2_FWREV_MAJOR 29
#define MLXSW_SP2_FWREV_MINOR 2000
-#define MLXSW_SP2_FWREV_SUBMINOR 2308
+#define MLXSW_SP2_FWREV_SUBMINOR 2714
static const struct mlxsw_fw_rev mlxsw_sp2_fw_rev = {
.major = MLXSW_SP2_FWREV_MAJOR,
@@ -423,13 +421,12 @@ static int mlxsw_sp_fw_rev_validate(struct mlxsw_sp *mlxsw_sp)
rev->major, req_rev->major);
return -EINVAL;
}
- if (MLXSW_SP_FWREV_MINOR_TO_BRANCH(rev->minor) ==
- MLXSW_SP_FWREV_MINOR_TO_BRANCH(req_rev->minor) &&
- mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev))
+ if (mlxsw_core_fw_rev_minor_subminor_validate(rev, req_rev))
return 0;
- dev_info(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver\n",
- rev->major, rev->minor, rev->subminor);
+ dev_err(mlxsw_sp->bus_info->dev, "The firmware version %d.%d.%d is incompatible with the driver (required >= %d.%d.%d)\n",
+ rev->major, rev->minor, rev->subminor, req_rev->major,
+ req_rev->minor, req_rev->subminor);
dev_info(mlxsw_sp->bus_info->dev, "Flashing firmware using file %s\n",
fw_filename);
@@ -1796,6 +1793,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data);
case TC_SETUP_QDISC_PRIO:
return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data);
+ case TC_SETUP_QDISC_ETS:
+ return mlxsw_sp_setup_tc_ets(mlxsw_sp_port, type_data);
default:
return -EOPNOTSUPP;
}
@@ -3602,26 +3601,25 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
* one subgroup, which are all member in the same group.
*/
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_GROUP, 0, 0, false,
- 0);
+ MLXSW_REG_QEEC_HR_GROUP, 0, 0, false, 0);
if (err)
return err;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ MLXSW_REG_QEEC_HR_SUBGROUP, i,
0, false, 0);
if (err)
return err;
}
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_TC, i, i,
+ MLXSW_REG_QEEC_HR_TC, i, i,
false, 0);
if (err)
return err;
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_TC,
+ MLXSW_REG_QEEC_HR_TC,
i + 8, i,
true, 100);
if (err)
@@ -3633,13 +3631,13 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
* for the initial configuration.
*/
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_PORT, 0, 0,
+ MLXSW_REG_QEEC_HR_PORT, 0, 0,
MLXSW_REG_QEEC_MAS_DIS);
if (err)
return err;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0,
MLXSW_REG_QEEC_MAS_DIS);
if (err)
@@ -3647,14 +3645,14 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
}
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_TC,
+ MLXSW_REG_QEEC_HR_TC,
i, i,
MLXSW_REG_QEEC_MAS_DIS);
if (err)
return err;
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_TC,
+ MLXSW_REG_QEEC_HR_TC,
i + 8, i,
MLXSW_REG_QEEC_MAS_DIS);
if (err)
@@ -3664,7 +3662,7 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
/* Configure the min shaper for multicast TCs. */
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_min_bw_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_TC,
+ MLXSW_REG_QEEC_HR_TC,
i + 8, i,
MLXSW_REG_QEEC_MIS_MIN);
if (err)
@@ -4542,8 +4540,16 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false),
MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false),
- MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
- MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false),
+ MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false),
+ MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false),
+ MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_CLASS_E, FORWARD,
+ ROUTER_EXP, false),
+ MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_MC_DMAC, FORWARD,
+ ROUTER_EXP, false),
+ MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_DIP, FORWARD,
+ ROUTER_EXP, false),
+ MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD,
+ ROUTER_EXP, false),
/* PKT Sample trap */
MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
false, SP_IP2ME, DISCARD),
@@ -4626,6 +4632,10 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
rate = 19 * 1024;
burst_size = 12;
break;
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
+ rate = 360;
+ burst_size = 7;
+ break;
default:
continue;
}
@@ -4665,6 +4675,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
+ case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
priority = 5;
tc = 5;
break;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 347bec9d1ecf..948ef4720d40 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -852,6 +852,8 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_red_qopt_offload *p);
int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p);
+int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_ets_qopt_offload *p);
/* spectrum_fid.c */
bool mlxsw_sp_fid_is_dummy(struct mlxsw_sp *mlxsw_sp, u16 fid_index);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
index 21296fa7f7fb..db66f2b56a6d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c
@@ -160,7 +160,7 @@ static int __mlxsw_sp_dcbnl_ieee_setets(struct mlxsw_sp_port *mlxsw_sp_port,
u8 weight = ets->tc_tx_bw[i];
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ MLXSW_REG_QEEC_HR_SUBGROUP, i,
0, dwrr, weight);
if (err) {
netdev_err(dev, "Failed to link subgroup ETS element %d to group\n",
@@ -198,7 +198,7 @@ err_port_ets_set:
u8 weight = my_ets->tc_tx_bw[i];
err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_SUBGROUP, i,
+ MLXSW_REG_QEEC_HR_SUBGROUP, i,
0, dwrr, weight);
}
return err;
@@ -369,6 +369,17 @@ err_update_qrwe:
}
static int
+mlxsw_sp_port_dcb_app_update_qpdp(struct mlxsw_sp_port *mlxsw_sp_port,
+ u8 default_prio)
+{
+ struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+ char qpdp_pl[MLXSW_REG_QPDP_LEN];
+
+ mlxsw_reg_qpdp_pack(qpdp_pl, mlxsw_sp_port->local_port, default_prio);
+ return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpdp), qpdp_pl);
+}
+
+static int
mlxsw_sp_port_dcb_app_update_qpdpm(struct mlxsw_sp_port *mlxsw_sp_port,
struct dcb_ieee_app_dscp_map *map)
{
@@ -405,6 +416,12 @@ static int mlxsw_sp_port_dcb_app_update(struct mlxsw_sp_port *mlxsw_sp_port)
int err;
default_prio = mlxsw_sp_port_dcb_app_default_prio(mlxsw_sp_port);
+ err = mlxsw_sp_port_dcb_app_update_qpdp(mlxsw_sp_port, default_prio);
+ if (err) {
+ netdev_err(mlxsw_sp_port->dev, "Couldn't configure port default priority\n");
+ return err;
+ }
+
have_dscp = mlxsw_sp_port_dcb_app_prio_dscp_map(mlxsw_sp_port,
&prio_map);
@@ -507,7 +524,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
err = mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0,
maxrate->tc_maxrate[i]);
if (err) {
@@ -523,7 +540,7 @@ static int mlxsw_sp_dcbnl_ieee_setmaxrate(struct net_device *dev,
err_port_ets_maxrate_set:
for (i--; i >= 0; i--)
mlxsw_sp_port_ets_maxrate_set(mlxsw_sp_port,
- MLXSW_REG_QEEC_HIERARCY_SUBGROUP,
+ MLXSW_REG_QEEC_HR_SUBGROUP,
i, 0, my_maxrate->tc_maxrate[i]);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index ec2ff3d7f41c..4aaaa4937b1a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -920,6 +920,7 @@ static int mlxsw_sp_ptp_get_message_types(const struct hwtstamp_config *config,
egr_types = 0xff;
break;
case HWTSTAMP_TX_ONESTEP_SYNC:
+ case HWTSTAMP_TX_ONESTEP_P2P:
return -ERANGE;
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
index 68cc6737d45c..54807b4930fe 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c
@@ -18,6 +18,7 @@ enum mlxsw_sp_qdisc_type {
MLXSW_SP_QDISC_NO_QDISC,
MLXSW_SP_QDISC_RED,
MLXSW_SP_QDISC_PRIO,
+ MLXSW_SP_QDISC_ETS,
};
struct mlxsw_sp_qdisc_ops {
@@ -471,14 +472,16 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port,
}
static int
-mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
- struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+__mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
{
int i;
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i,
MLXSW_SP_PORT_DEFAULT_TCLASS);
+ mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HR_SUBGROUP,
+ i, 0, false, 0);
mlxsw_sp_qdisc_destroy(mlxsw_sp_port,
&mlxsw_sp_port->tclass_qdiscs[i]);
mlxsw_sp_port->tclass_qdiscs[i].prio_bitmap = 0;
@@ -488,36 +491,58 @@ mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
}
static int
-mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
- struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
- void *params)
+mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
{
- struct tc_prio_qopt_offload_params *p = params;
+ return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port);
+}
- if (p->bands > IEEE_8021QAZ_MAX_TCS)
+static int
+__mlxsw_sp_qdisc_ets_check_params(unsigned int nbands)
+{
+ if (nbands > IEEE_8021QAZ_MAX_TCS)
return -EOPNOTSUPP;
return 0;
}
static int
-mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
- struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
- void *params)
+mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
{
struct tc_prio_qopt_offload_params *p = params;
+
+ return __mlxsw_sp_qdisc_ets_check_params(p->bands);
+}
+
+static int
+__mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+ unsigned int nbands,
+ const unsigned int *quanta,
+ const unsigned int *weights,
+ const u8 *priomap)
+{
struct mlxsw_sp_qdisc *child_qdisc;
int tclass, i, band, backlog;
u8 old_priomap;
int err;
- for (band = 0; band < p->bands; band++) {
+ for (band = 0; band < nbands; band++) {
tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
old_priomap = child_qdisc->prio_bitmap;
child_qdisc->prio_bitmap = 0;
+
+ err = mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HR_SUBGROUP,
+ tclass, 0, !!quanta[band],
+ weights[band]);
+ if (err)
+ return err;
+
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
- if (p->priomap[i] == band) {
+ if (priomap[i] == band) {
child_qdisc->prio_bitmap |= BIT(i);
if (BIT(i) & old_priomap)
continue;
@@ -540,21 +565,46 @@ mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
child_qdisc = &mlxsw_sp_port->tclass_qdiscs[tclass];
child_qdisc->prio_bitmap = 0;
mlxsw_sp_qdisc_destroy(mlxsw_sp_port, child_qdisc);
+ mlxsw_sp_port_ets_set(mlxsw_sp_port,
+ MLXSW_REG_QEEC_HR_SUBGROUP,
+ tclass, 0, false, 0);
}
return 0;
}
+static int
+mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct tc_prio_qopt_offload_params *p = params;
+ unsigned int zeroes[TCQ_ETS_MAX_BANDS] = {0};
+
+ return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands,
+ zeroes, zeroes, p->priomap);
+}
+
+static void
+__mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct gnet_stats_queue *qstats)
+{
+ u64 backlog;
+
+ backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
+ mlxsw_sp_qdisc->stats_base.backlog);
+ qstats->backlog -= backlog;
+}
+
static void
mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
void *params)
{
struct tc_prio_qopt_offload_params *p = params;
- u64 backlog;
- backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp,
- mlxsw_sp_qdisc->stats_base.backlog);
- p->qstats->backlog -= backlog;
+ __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc,
+ p->qstats);
}
static int
@@ -631,31 +681,104 @@ static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = {
.clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
};
-/* Grafting is not supported in mlxsw. It will result in un-offloading of the
- * grafted qdisc as well as the qdisc in the qdisc new location.
- * (However, if the graft is to the location where the qdisc is already at, it
- * will be ignored completely and won't cause un-offloading).
+static int
+mlxsw_sp_qdisc_ets_check_params(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct tc_ets_qopt_offload_replace_params *p = params;
+
+ return __mlxsw_sp_qdisc_ets_check_params(p->bands);
+}
+
+static int
+mlxsw_sp_qdisc_ets_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct tc_ets_qopt_offload_replace_params *p = params;
+
+ return __mlxsw_sp_qdisc_ets_replace(mlxsw_sp_port, p->bands,
+ p->quanta, p->weights, p->priomap);
+}
+
+static void
+mlxsw_sp_qdisc_ets_unoffload(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ void *params)
+{
+ struct tc_ets_qopt_offload_replace_params *p = params;
+
+ __mlxsw_sp_qdisc_ets_unoffload(mlxsw_sp_port, mlxsw_sp_qdisc,
+ p->qstats);
+}
+
+static int
+mlxsw_sp_qdisc_ets_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc)
+{
+ return __mlxsw_sp_qdisc_ets_destroy(mlxsw_sp_port);
+}
+
+static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_ets = {
+ .type = MLXSW_SP_QDISC_ETS,
+ .check_params = mlxsw_sp_qdisc_ets_check_params,
+ .replace = mlxsw_sp_qdisc_ets_replace,
+ .unoffload = mlxsw_sp_qdisc_ets_unoffload,
+ .destroy = mlxsw_sp_qdisc_ets_destroy,
+ .get_stats = mlxsw_sp_qdisc_get_prio_stats,
+ .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats,
+};
+
+/* Linux allows linking of Qdiscs to arbitrary classes (so long as the resulting
+ * graph is free of cycles). These operations do not change the parent handle
+ * though, which means it can be incomplete (if there is more than one class
+ * where the Qdisc in question is grafted) or outright wrong (if the Qdisc was
+ * linked to a different class and then removed from the original class).
+ *
+ * E.g. consider this sequence of operations:
+ *
+ * # tc qdisc add dev swp1 root handle 1: prio
+ * # tc qdisc add dev swp1 parent 1:3 handle 13: red limit 1000000 avpkt 10000
+ * RED: set bandwidth to 10Mbit
+ * # tc qdisc link dev swp1 handle 13: parent 1:2
+ *
+ * At this point, both 1:2 and 1:3 have the same RED Qdisc instance as their
+ * child. But RED will still only claim that 1:3 is its parent. If it's removed
+ * from that band, its only parent will be 1:2, but it will continue to claim
+ * that it is in fact 1:3.
+ *
+ * The notification for child Qdisc replace (e.g. TC_RED_REPLACE) comes before
+ * the notification for parent graft (e.g. TC_PRIO_GRAFT). We take the replace
+ * notification to offload the child Qdisc, based on its parent handle, and use
+ * the graft operation to validate that the class where the child is actually
+ * grafted corresponds to the parent handle. If the two don't match, we
+ * unoffload the child.
*/
static int
-mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
- struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
- struct tc_prio_qopt_offload_graft_params *p)
+__mlxsw_sp_qdisc_ets_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ u8 band, u32 child_handle)
{
- int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->band);
+ int tclass_num = MLXSW_SP_PRIO_BAND_TO_TCLASS(band);
struct mlxsw_sp_qdisc *old_qdisc;
- /* Check if the grafted qdisc is already in its "new" location. If so -
- * nothing needs to be done.
- */
- if (p->band < IEEE_8021QAZ_MAX_TCS &&
- mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == p->child_handle)
+ if (band < IEEE_8021QAZ_MAX_TCS &&
+ mlxsw_sp_port->tclass_qdiscs[tclass_num].handle == child_handle)
return 0;
+ if (!child_handle) {
+ /* This is an invisible FIFO replacing the original Qdisc.
+ * Ignore it--the original Qdisc's destroy will follow.
+ */
+ return 0;
+ }
+
/* See if the grafted qdisc is already offloaded on any tclass. If so,
* unoffload it.
*/
old_qdisc = mlxsw_sp_qdisc_find_by_handle(mlxsw_sp_port,
- p->child_handle);
+ child_handle);
if (old_qdisc)
mlxsw_sp_qdisc_destroy(mlxsw_sp_port, old_qdisc);
@@ -664,6 +787,15 @@ mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
return -EOPNOTSUPP;
}
+static int
+mlxsw_sp_qdisc_prio_graft(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc,
+ struct tc_prio_qopt_offload_graft_params *p)
+{
+ return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+ p->band, p->child_handle);
+}
+
int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
struct tc_prio_qopt_offload *p)
{
@@ -697,6 +829,40 @@ int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port,
}
}
+int mlxsw_sp_setup_tc_ets(struct mlxsw_sp_port *mlxsw_sp_port,
+ struct tc_ets_qopt_offload *p)
+{
+ struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
+
+ mlxsw_sp_qdisc = mlxsw_sp_qdisc_find(mlxsw_sp_port, p->parent, true);
+ if (!mlxsw_sp_qdisc)
+ return -EOPNOTSUPP;
+
+ if (p->command == TC_ETS_REPLACE)
+ return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle,
+ mlxsw_sp_qdisc,
+ &mlxsw_sp_qdisc_ops_ets,
+ &p->replace_params);
+
+ if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle,
+ MLXSW_SP_QDISC_ETS))
+ return -EOPNOTSUPP;
+
+ switch (p->command) {
+ case TC_ETS_DESTROY:
+ return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc);
+ case TC_ETS_STATS:
+ return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc,
+ &p->stats);
+ case TC_ETS_GRAFT:
+ return __mlxsw_sp_qdisc_ets_graft(mlxsw_sp_port, mlxsw_sp_qdisc,
+ p->graft_params.band,
+ p->graft_params.child_handle);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port)
{
struct mlxsw_sp_qdisc *mlxsw_sp_qdisc;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 30bfe3880faf..7f70aa799064 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -382,9 +382,10 @@ enum mlxsw_sp_fib_entry_type {
};
struct mlxsw_sp_nexthop_group;
+struct mlxsw_sp_fib_entry;
struct mlxsw_sp_fib_node {
- struct list_head entry_list;
+ struct mlxsw_sp_fib_entry *fib_entry;
struct list_head list;
struct rhash_head ht_node;
struct mlxsw_sp_fib *fib;
@@ -397,7 +398,6 @@ struct mlxsw_sp_fib_entry_decap {
};
struct mlxsw_sp_fib_entry {
- struct list_head list;
struct mlxsw_sp_fib_node *fib_node;
enum mlxsw_sp_fib_entry_type type;
struct list_head nexthop_group_node;
@@ -1162,7 +1162,6 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
const union mlxsw_sp_l3addr *addr,
enum mlxsw_sp_fib_entry_type type)
{
- struct mlxsw_sp_fib_entry *fib_entry;
struct mlxsw_sp_fib_node *fib_node;
unsigned char addr_prefix_len;
struct mlxsw_sp_fib *fib;
@@ -1191,15 +1190,10 @@ mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
addr_prefix_len);
- if (!fib_node || list_empty(&fib_node->entry_list))
- return NULL;
-
- fib_entry = list_first_entry(&fib_node->entry_list,
- struct mlxsw_sp_fib_entry, list);
- if (fib_entry->type != type)
+ if (!fib_node || fib_node->fib_entry->type != type)
return NULL;
- return fib_entry;
+ return fib_node->fib_entry;
}
/* Given an IPIP entry, find the corresponding decap route. */
@@ -1209,7 +1203,6 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
{
static struct mlxsw_sp_fib_node *fib_node;
const struct mlxsw_sp_ipip_ops *ipip_ops;
- struct mlxsw_sp_fib_entry *fib_entry;
unsigned char saddr_prefix_len;
union mlxsw_sp_l3addr saddr;
struct mlxsw_sp_fib *ul_fib;
@@ -1244,15 +1237,11 @@ mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
saddr_prefix_len);
- if (!fib_node || list_empty(&fib_node->entry_list))
- return NULL;
-
- fib_entry = list_first_entry(&fib_node->entry_list,
- struct mlxsw_sp_fib_entry, list);
- if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
+ if (!fib_node ||
+ fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
return NULL;
- return fib_entry;
+ return fib_node->fib_entry;
}
static struct mlxsw_sp_ipip_entry *
@@ -3231,10 +3220,6 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
return 0;
}
-static bool
-mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
- const struct mlxsw_sp_fib_entry *fib_entry);
-
static int
mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_nexthop_group *nh_grp)
@@ -3243,9 +3228,6 @@ mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
int err;
list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
- if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
- fib_entry))
- continue;
err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
if (err)
return err;
@@ -3263,12 +3245,8 @@ mlxsw_sp_nexthop_fib_entries_refresh(struct mlxsw_sp_nexthop_group *nh_grp)
enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_WRITE;
struct mlxsw_sp_fib_entry *fib_entry;
- list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
- if (!mlxsw_sp_fib_node_entry_is_first(fib_entry->fib_node,
- fib_entry))
- continue;
+ list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node)
mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
- }
}
static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
@@ -3845,7 +3823,7 @@ static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
key.fib_nh = fib_nh;
nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
- if (WARN_ON_ONCE(!nh))
+ if (!nh)
return;
switch (event) {
@@ -4491,6 +4469,19 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
}
}
+static void
+mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
+{
+ switch (fib_entry->type) {
+ case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
+ mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
+ break;
+ default:
+ break;
+ }
+}
+
static struct mlxsw_sp_fib4_entry *
mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node,
@@ -4523,6 +4514,7 @@ mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
return fib4_entry;
err_nexthop4_group_get:
+ mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry);
err_fib4_entry_type_set:
kfree(fib4_entry);
return ERR_PTR(err);
@@ -4532,6 +4524,7 @@ static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib4_entry *fib4_entry)
{
mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
+ mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common);
kfree(fib4_entry);
}
@@ -4555,15 +4548,14 @@ mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
if (!fib_node)
return NULL;
- list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
- if (fib4_entry->tb_id == fen_info->tb_id &&
- fib4_entry->tos == fen_info->tos &&
- fib4_entry->type == fen_info->type &&
- mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
- fen_info->fi) {
- return fib4_entry;
- }
- }
+ fib4_entry = container_of(fib_node->fib_entry,
+ struct mlxsw_sp_fib4_entry, common);
+ if (fib4_entry->tb_id == fen_info->tb_id &&
+ fib4_entry->tos == fen_info->tos &&
+ fib4_entry->type == fen_info->type &&
+ mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
+ fen_info->fi)
+ return fib4_entry;
return NULL;
}
@@ -4611,7 +4603,6 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
if (!fib_node)
return NULL;
- INIT_LIST_HEAD(&fib_node->entry_list);
list_add(&fib_node->list, &fib->node_list);
memcpy(fib_node->key.addr, addr, addr_len);
fib_node->key.prefix_len = prefix_len;
@@ -4622,18 +4613,9 @@ mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
{
list_del(&fib_node->list);
- WARN_ON(!list_empty(&fib_node->entry_list));
kfree(fib_node);
}
-static bool
-mlxsw_sp_fib_node_entry_is_first(const struct mlxsw_sp_fib_node *fib_node,
- const struct mlxsw_sp_fib_entry *fib_entry)
-{
- return list_first_entry(&fib_node->entry_list,
- struct mlxsw_sp_fib_entry, list) == fib_entry;
-}
-
static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node)
{
@@ -4773,200 +4755,48 @@ static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
{
struct mlxsw_sp_vr *vr = fib_node->fib->vr;
- if (!list_empty(&fib_node->entry_list))
+ if (fib_node->fib_entry)
return;
mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
mlxsw_sp_fib_node_destroy(fib_node);
mlxsw_sp_vr_put(mlxsw_sp, vr);
}
-static struct mlxsw_sp_fib4_entry *
-mlxsw_sp_fib4_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
- const struct mlxsw_sp_fib4_entry *new4_entry)
-{
- struct mlxsw_sp_fib4_entry *fib4_entry;
-
- list_for_each_entry(fib4_entry, &fib_node->entry_list, common.list) {
- if (fib4_entry->tb_id > new4_entry->tb_id)
- continue;
- if (fib4_entry->tb_id != new4_entry->tb_id)
- break;
- if (fib4_entry->tos > new4_entry->tos)
- continue;
- if (fib4_entry->prio >= new4_entry->prio ||
- fib4_entry->tos < new4_entry->tos)
- return fib4_entry;
- }
-
- return NULL;
-}
-
-static int
-mlxsw_sp_fib4_node_list_append(struct mlxsw_sp_fib4_entry *fib4_entry,
- struct mlxsw_sp_fib4_entry *new4_entry)
-{
- struct mlxsw_sp_fib_node *fib_node;
-
- if (WARN_ON(!fib4_entry))
- return -EINVAL;
-
- fib_node = fib4_entry->common.fib_node;
- list_for_each_entry_from(fib4_entry, &fib_node->entry_list,
- common.list) {
- if (fib4_entry->tb_id != new4_entry->tb_id ||
- fib4_entry->tos != new4_entry->tos ||
- fib4_entry->prio != new4_entry->prio)
- break;
- }
-
- list_add_tail(&new4_entry->common.list, &fib4_entry->common.list);
- return 0;
-}
-
-static int
-mlxsw_sp_fib4_node_list_insert(struct mlxsw_sp_fib4_entry *new4_entry,
- bool replace, bool append)
-{
- struct mlxsw_sp_fib_node *fib_node = new4_entry->common.fib_node;
- struct mlxsw_sp_fib4_entry *fib4_entry;
-
- fib4_entry = mlxsw_sp_fib4_node_entry_find(fib_node, new4_entry);
-
- if (append)
- return mlxsw_sp_fib4_node_list_append(fib4_entry, new4_entry);
- if (replace && WARN_ON(!fib4_entry))
- return -EINVAL;
-
- /* Insert new entry before replaced one, so that we can later
- * remove the second.
- */
- if (fib4_entry) {
- list_add_tail(&new4_entry->common.list,
- &fib4_entry->common.list);
- } else {
- struct mlxsw_sp_fib4_entry *last;
-
- list_for_each_entry(last, &fib_node->entry_list, common.list) {
- if (new4_entry->tb_id > last->tb_id)
- break;
- fib4_entry = last;
- }
-
- if (fib4_entry)
- list_add(&new4_entry->common.list,
- &fib4_entry->common.list);
- else
- list_add(&new4_entry->common.list,
- &fib_node->entry_list);
- }
-
- return 0;
-}
-
-static void
-mlxsw_sp_fib4_node_list_remove(struct mlxsw_sp_fib4_entry *fib4_entry)
-{
- list_del(&fib4_entry->common.list);
-}
-
-static int mlxsw_sp_fib_node_entry_add(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib_entry *fib_entry)
-{
- struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-
- if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
- return 0;
-
- /* To prevent packet loss, overwrite the previously offloaded
- * entry.
- */
- if (!list_is_singular(&fib_node->entry_list)) {
- enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
- struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
-
- mlxsw_sp_fib_entry_offload_refresh(n, op, 0);
- }
-
- return mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
-}
-
-static void mlxsw_sp_fib_node_entry_del(struct mlxsw_sp *mlxsw_sp,
+static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_entry *fib_entry)
{
struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
-
- if (!mlxsw_sp_fib_node_entry_is_first(fib_node, fib_entry))
- return;
-
- /* Promote the next entry by overwriting the deleted entry */
- if (!list_is_singular(&fib_node->entry_list)) {
- struct mlxsw_sp_fib_entry *n = list_next_entry(fib_entry, list);
- enum mlxsw_reg_ralue_op op = MLXSW_REG_RALUE_OP_WRITE_DELETE;
-
- mlxsw_sp_fib_entry_update(mlxsw_sp, n);
- mlxsw_sp_fib_entry_offload_refresh(fib_entry, op, 0);
- return;
- }
-
- mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
-}
-
-static int mlxsw_sp_fib4_node_entry_link(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib4_entry *fib4_entry,
- bool replace, bool append)
-{
int err;
- err = mlxsw_sp_fib4_node_list_insert(fib4_entry, replace, append);
- if (err)
- return err;
+ fib_node->fib_entry = fib_entry;
- err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib4_entry->common);
+ err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
if (err)
- goto err_fib_node_entry_add;
+ goto err_fib_entry_update;
return 0;
-err_fib_node_entry_add:
- mlxsw_sp_fib4_node_list_remove(fib4_entry);
+err_fib_entry_update:
+ fib_node->fib_entry = NULL;
return err;
}
static void
-mlxsw_sp_fib4_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib4_entry *fib4_entry)
+mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
+ struct mlxsw_sp_fib_entry *fib_entry)
{
- mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib4_entry->common);
- mlxsw_sp_fib4_node_list_remove(fib4_entry);
-
- if (fib4_entry->common.type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP)
- mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, &fib4_entry->common);
-}
-
-static void mlxsw_sp_fib4_entry_replace(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib4_entry *fib4_entry,
- bool replace)
-{
- struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
- struct mlxsw_sp_fib4_entry *replaced;
-
- if (!replace)
- return;
-
- /* We inserted the new entry before replaced one */
- replaced = list_next_entry(fib4_entry, common.list);
+ struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
- mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, replaced);
- mlxsw_sp_fib4_entry_destroy(mlxsw_sp, replaced);
- mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
+ fib_node->fib_entry = NULL;
}
static int
-mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
- const struct fib_entry_notifier_info *fen_info,
- bool replace, bool append)
+mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
+ const struct fib_entry_notifier_info *fen_info)
{
- struct mlxsw_sp_fib4_entry *fib4_entry;
+ struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
+ struct mlxsw_sp_fib_entry *replaced;
struct mlxsw_sp_fib_node *fib_node;
int err;
@@ -4989,18 +4819,26 @@ mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp,
goto err_fib4_entry_create;
}
- err = mlxsw_sp_fib4_node_entry_link(mlxsw_sp, fib4_entry, replace,
- append);
+ replaced = fib_node->fib_entry;
+ err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
if (err) {
dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
- goto err_fib4_node_entry_link;
+ goto err_fib_node_entry_link;
}
- mlxsw_sp_fib4_entry_replace(mlxsw_sp, fib4_entry, replace);
+ /* Nothing to replace */
+ if (!replaced)
+ return 0;
+
+ mlxsw_sp_fib_entry_offload_unset(replaced);
+ fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
+ common);
+ mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
return 0;
-err_fib4_node_entry_link:
+err_fib_node_entry_link:
+ fib_node->fib_entry = replaced;
mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
err_fib4_entry_create:
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
@@ -5021,7 +4859,7 @@ static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
return;
fib_node = fib4_entry->common.fib_node;
- mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
+ mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
}
@@ -5083,13 +4921,6 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
kfree(mlxsw_sp_rt6);
}
-static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt)
-{
- /* RTF_CACHE routes are ignored */
- return !(rt->fib6_flags & RTF_ADDRCONF) &&
- rt->fib6_nh->fib_nh_gw_family;
-}
-
static struct fib6_info *
mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
{
@@ -5097,37 +4928,6 @@ mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
list)->rt;
}
-static struct mlxsw_sp_fib6_entry *
-mlxsw_sp_fib6_node_mp_entry_find(const struct mlxsw_sp_fib_node *fib_node,
- const struct fib6_info *nrt, bool replace)
-{
- struct mlxsw_sp_fib6_entry *fib6_entry;
-
- if (!mlxsw_sp_fib6_rt_can_mp(nrt) || replace)
- return NULL;
-
- list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
- struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
-
- /* RT6_TABLE_LOCAL and RT6_TABLE_MAIN share the same
- * virtual router.
- */
- if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
- continue;
- if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
- break;
- if (rt->fib6_metric < nrt->fib6_metric)
- continue;
- if (rt->fib6_metric == nrt->fib6_metric &&
- mlxsw_sp_fib6_rt_can_mp(rt))
- return fib6_entry;
- if (rt->fib6_metric > nrt->fib6_metric)
- break;
- }
-
- return NULL;
-}
-
static struct mlxsw_sp_rt6 *
mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
const struct fib6_info *rt)
@@ -5345,16 +5145,16 @@ mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
* currently associated with it in the device's table is that
* of the old group. Start using the new one instead.
*/
- err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
+ err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
if (err)
- goto err_fib_node_entry_add;
+ goto err_fib_entry_update;
if (list_empty(&old_nh_grp->fib_list))
mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
return 0;
-err_fib_node_entry_add:
+err_fib_entry_update:
mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
err_nexthop6_group_get:
list_add_tail(&fib6_entry->common.nexthop_group_node,
@@ -5519,112 +5319,13 @@ static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
}
static struct mlxsw_sp_fib6_entry *
-mlxsw_sp_fib6_node_entry_find(const struct mlxsw_sp_fib_node *fib_node,
- const struct fib6_info *nrt, bool replace)
-{
- struct mlxsw_sp_fib6_entry *fib6_entry, *fallback = NULL;
-
- list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
- struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
-
- if (rt->fib6_table->tb6_id > nrt->fib6_table->tb6_id)
- continue;
- if (rt->fib6_table->tb6_id != nrt->fib6_table->tb6_id)
- break;
- if (replace && rt->fib6_metric == nrt->fib6_metric) {
- if (mlxsw_sp_fib6_rt_can_mp(rt) ==
- mlxsw_sp_fib6_rt_can_mp(nrt))
- return fib6_entry;
- if (mlxsw_sp_fib6_rt_can_mp(nrt))
- fallback = fallback ?: fib6_entry;
- }
- if (rt->fib6_metric > nrt->fib6_metric)
- return fallback ?: fib6_entry;
- }
-
- return fallback;
-}
-
-static int
-mlxsw_sp_fib6_node_list_insert(struct mlxsw_sp_fib6_entry *new6_entry,
- bool *p_replace)
-{
- struct mlxsw_sp_fib_node *fib_node = new6_entry->common.fib_node;
- struct fib6_info *nrt = mlxsw_sp_fib6_entry_rt(new6_entry);
- struct mlxsw_sp_fib6_entry *fib6_entry;
-
- fib6_entry = mlxsw_sp_fib6_node_entry_find(fib_node, nrt, *p_replace);
-
- if (*p_replace && !fib6_entry)
- *p_replace = false;
-
- if (fib6_entry) {
- list_add_tail(&new6_entry->common.list,
- &fib6_entry->common.list);
- } else {
- struct mlxsw_sp_fib6_entry *last;
-
- list_for_each_entry(last, &fib_node->entry_list, common.list) {
- struct fib6_info *rt = mlxsw_sp_fib6_entry_rt(last);
-
- if (nrt->fib6_table->tb6_id > rt->fib6_table->tb6_id)
- break;
- fib6_entry = last;
- }
-
- if (fib6_entry)
- list_add(&new6_entry->common.list,
- &fib6_entry->common.list);
- else
- list_add(&new6_entry->common.list,
- &fib_node->entry_list);
- }
-
- return 0;
-}
-
-static void
-mlxsw_sp_fib6_node_list_remove(struct mlxsw_sp_fib6_entry *fib6_entry)
-{
- list_del(&fib6_entry->common.list);
-}
-
-static int mlxsw_sp_fib6_node_entry_link(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib6_entry *fib6_entry,
- bool *p_replace)
-{
- int err;
-
- err = mlxsw_sp_fib6_node_list_insert(fib6_entry, p_replace);
- if (err)
- return err;
-
- err = mlxsw_sp_fib_node_entry_add(mlxsw_sp, &fib6_entry->common);
- if (err)
- goto err_fib_node_entry_add;
-
- return 0;
-
-err_fib_node_entry_add:
- mlxsw_sp_fib6_node_list_remove(fib6_entry);
- return err;
-}
-
-static void
-mlxsw_sp_fib6_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib6_entry *fib6_entry)
-{
- mlxsw_sp_fib_node_entry_del(mlxsw_sp, &fib6_entry->common);
- mlxsw_sp_fib6_node_list_remove(fib6_entry);
-}
-
-static struct mlxsw_sp_fib6_entry *
mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
const struct fib6_info *rt)
{
struct mlxsw_sp_fib6_entry *fib6_entry;
struct mlxsw_sp_fib_node *fib_node;
struct mlxsw_sp_fib *fib;
+ struct fib6_info *cmp_rt;
struct mlxsw_sp_vr *vr;
vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
@@ -5638,40 +5339,23 @@ mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
if (!fib_node)
return NULL;
- list_for_each_entry(fib6_entry, &fib_node->entry_list, common.list) {
- struct fib6_info *iter_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
-
- if (rt->fib6_table->tb6_id == iter_rt->fib6_table->tb6_id &&
- rt->fib6_metric == iter_rt->fib6_metric &&
- mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
- return fib6_entry;
- }
+ fib6_entry = container_of(fib_node->fib_entry,
+ struct mlxsw_sp_fib6_entry, common);
+ cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
+ if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
+ rt->fib6_metric == cmp_rt->fib6_metric &&
+ mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
+ return fib6_entry;
return NULL;
}
-static void mlxsw_sp_fib6_entry_replace(struct mlxsw_sp *mlxsw_sp,
- struct mlxsw_sp_fib6_entry *fib6_entry,
- bool replace)
+static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
{
- struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
- struct mlxsw_sp_fib6_entry *replaced;
-
- if (!replace)
- return;
-
- replaced = list_next_entry(fib6_entry, common.list);
-
- mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, replaced);
- mlxsw_sp_fib6_entry_destroy(mlxsw_sp, replaced);
- mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
-}
-
-static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
- struct fib6_info **rt_arr,
- unsigned int nrt6, bool replace)
-{
- struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
+ struct mlxsw_sp_fib_entry *replaced;
struct mlxsw_sp_fib_node *fib_node;
struct fib6_info *rt = rt_arr[0];
int err;
@@ -5693,18 +5377,6 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
if (IS_ERR(fib_node))
return PTR_ERR(fib_node);
- /* Before creating a new entry, try to append route to an existing
- * multipath entry.
- */
- fib6_entry = mlxsw_sp_fib6_node_mp_entry_find(fib_node, rt, replace);
- if (fib6_entry) {
- err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry,
- rt_arr, nrt6);
- if (err)
- goto err_fib6_entry_nexthop_add;
- return 0;
- }
-
fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
nrt6);
if (IS_ERR(fib6_entry)) {
@@ -5712,17 +5384,70 @@ static int mlxsw_sp_router_fib6_add(struct mlxsw_sp *mlxsw_sp,
goto err_fib6_entry_create;
}
- err = mlxsw_sp_fib6_node_entry_link(mlxsw_sp, fib6_entry, &replace);
+ replaced = fib_node->fib_entry;
+ err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
if (err)
- goto err_fib6_node_entry_link;
+ goto err_fib_node_entry_link;
+
+ /* Nothing to replace */
+ if (!replaced)
+ return 0;
- mlxsw_sp_fib6_entry_replace(mlxsw_sp, fib6_entry, replace);
+ mlxsw_sp_fib_entry_offload_unset(replaced);
+ fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
+ common);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
return 0;
-err_fib6_node_entry_link:
+err_fib_node_entry_link:
+ fib_node->fib_entry = replaced;
mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
err_fib6_entry_create:
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ return err;
+}
+
+static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
+ struct fib6_info **rt_arr,
+ unsigned int nrt6)
+{
+ struct mlxsw_sp_fib6_entry *fib6_entry;
+ struct mlxsw_sp_fib_node *fib_node;
+ struct fib6_info *rt = rt_arr[0];
+ int err;
+
+ if (mlxsw_sp->router->aborted)
+ return 0;
+
+ if (rt->fib6_src.plen)
+ return -EINVAL;
+
+ if (mlxsw_sp_fib6_rt_should_ignore(rt))
+ return 0;
+
+ fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
+ &rt->fib6_dst.addr,
+ sizeof(rt->fib6_dst.addr),
+ rt->fib6_dst.plen,
+ MLXSW_SP_L3_PROTO_IPV6);
+ if (IS_ERR(fib_node))
+ return PTR_ERR(fib_node);
+
+ if (WARN_ON_ONCE(!fib_node->fib_entry)) {
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
+ return -EINVAL;
+ }
+
+ fib6_entry = container_of(fib_node->fib_entry,
+ struct mlxsw_sp_fib6_entry, common);
+ err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
+ nrt6);
+ if (err)
+ goto err_fib6_entry_nexthop_add;
+
+ return 0;
+
err_fib6_entry_nexthop_add:
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
return err;
@@ -5742,8 +5467,13 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
if (mlxsw_sp_fib6_rt_should_ignore(rt))
return;
+ /* Multipath routes are first added to the FIB trie and only then
+ * notified. If we vetoed the addition, we will get a delete
+ * notification for a route we do not have. Therefore, do not warn if
+ * route was not found.
+ */
fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
- if (WARN_ON(!fib6_entry))
+ if (!fib6_entry)
return;
/* If not all the nexthops are deleted, then only reduce the nexthop
@@ -5757,7 +5487,7 @@ static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
fib_node = fib6_entry->common.fib_node;
- mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
}
@@ -5911,39 +5641,25 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node)
{
- struct mlxsw_sp_fib4_entry *fib4_entry, *tmp;
-
- list_for_each_entry_safe(fib4_entry, tmp, &fib_node->entry_list,
- common.list) {
- bool do_break = &tmp->common.list == &fib_node->entry_list;
+ struct mlxsw_sp_fib4_entry *fib4_entry;
- mlxsw_sp_fib4_node_entry_unlink(mlxsw_sp, fib4_entry);
- mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
- mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
- /* Break when entry list is empty and node was freed.
- * Otherwise, we'll access freed memory in the next
- * iteration.
- */
- if (do_break)
- break;
- }
+ fib4_entry = container_of(fib_node->fib_entry,
+ struct mlxsw_sp_fib4_entry, common);
+ mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
+ mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
}
static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
struct mlxsw_sp_fib_node *fib_node)
{
- struct mlxsw_sp_fib6_entry *fib6_entry, *tmp;
-
- list_for_each_entry_safe(fib6_entry, tmp, &fib_node->entry_list,
- common.list) {
- bool do_break = &tmp->common.list == &fib_node->entry_list;
+ struct mlxsw_sp_fib6_entry *fib6_entry;
- mlxsw_sp_fib6_node_entry_unlink(mlxsw_sp, fib6_entry);
- mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
- mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
- if (do_break)
- break;
- }
+ fib6_entry = container_of(fib_node->fib_entry,
+ struct mlxsw_sp_fib6_entry, common);
+ mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
+ mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
+ mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
}
static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
@@ -6094,7 +5810,6 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
struct mlxsw_sp_fib_event_work *fib_work =
container_of(work, struct mlxsw_sp_fib_event_work, work);
struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
- bool replace, append;
int err;
/* Protect internal structures from changes */
@@ -6102,13 +5817,9 @@ static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
mlxsw_sp_span_respin(mlxsw_sp);
switch (fib_work->event) {
- case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
- case FIB_EVENT_ENTRY_ADD:
- replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
- append = fib_work->event == FIB_EVENT_ENTRY_APPEND;
- err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info,
- replace, append);
+ case FIB_EVENT_ENTRY_REPLACE:
+ err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
+ &fib_work->fen_info);
if (err)
mlxsw_sp_router_fib_abort(mlxsw_sp);
fib_info_put(fib_work->fen_info.fi);
@@ -6133,20 +5844,24 @@ static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
struct mlxsw_sp_fib_event_work *fib_work =
container_of(work, struct mlxsw_sp_fib_event_work, work);
struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
- bool replace;
int err;
rtnl_lock();
mlxsw_sp_span_respin(mlxsw_sp);
switch (fib_work->event) {
- case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_ADD:
- replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
- err = mlxsw_sp_router_fib6_add(mlxsw_sp,
- fib_work->fib6_work.rt_arr,
- fib_work->fib6_work.nrt6,
- replace);
+ case FIB_EVENT_ENTRY_REPLACE:
+ err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
+ fib_work->fib6_work.rt_arr,
+ fib_work->fib6_work.nrt6);
+ if (err)
+ mlxsw_sp_router_fib_abort(mlxsw_sp);
+ mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
+ break;
+ case FIB_EVENT_ENTRY_APPEND:
+ err = mlxsw_sp_router_fib6_append(mlxsw_sp,
+ fib_work->fib6_work.rt_arr,
+ fib_work->fib6_work.nrt6);
if (err)
mlxsw_sp_router_fib_abort(mlxsw_sp);
mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
@@ -6211,8 +5926,6 @@ static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
- case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_DEL:
fen_info = container_of(info, struct fib_entry_notifier_info,
info);
@@ -6240,7 +5953,7 @@ static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
switch (fib_work->event) {
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND: /* fall through */
case FIB_EVENT_ENTRY_DEL:
fen6_info = container_of(info, struct fib6_entry_notifier_info,
info);
@@ -6343,9 +6056,9 @@ static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
err = mlxsw_sp_router_fib_rule_event(event, info,
router->mlxsw_sp);
return notifier_from_errno(err);
- case FIB_EVENT_ENTRY_ADD:
+ case FIB_EVENT_ENTRY_ADD: /* fall through */
case FIB_EVENT_ENTRY_REPLACE: /* fall through */
- case FIB_EVENT_ENTRY_APPEND: /* fall through */
+ case FIB_EVENT_ENTRY_APPEND:
if (router->aborted) {
NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
return notifier_from_errno(-EINVAL);
@@ -7074,6 +6787,9 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
rif = mlxsw_sp->router->rifs[i];
+ if (rif && rif->ops &&
+ rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
+ continue;
if (rif && rif->dev && rif->dev != dev &&
!ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
mlxsw_sp->mac_mask)) {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 0c1c142bb6b0..3d2331be05d8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -80,10 +80,14 @@ enum {
MLXSW_TRAP_ID_DISCARD_ING_ROUTER_UC_DIP_MC_DMAC = 0x161,
MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LB = 0x162,
MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_MC = 0x163,
+ MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_CLASS_E = 0x164,
MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_LB = 0x165,
MLXSW_TRAP_ID_DISCARD_ING_ROUTER_CORRUPTED_IP_HDR = 0x167,
+ MLXSW_TRAP_ID_DISCARD_ING_ROUTER_MC_DMAC = 0x168,
+ MLXSW_TRAP_ID_DISCARD_ING_ROUTER_SIP_DIP = 0x169,
MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_SIP_BC = 0x16A,
MLXSW_TRAP_ID_DISCARD_ING_ROUTER_IPV4_DIP_LOCAL_NET = 0x16B,
+ MLXSW_TRAP_ID_DISCARD_ING_ROUTER_DIP_LINK_LOCAL = 0x16C,
MLXSW_TRAP_ID_DISCARD_ROUTER_LPM4 = 0x17B,
MLXSW_TRAP_ID_DISCARD_ROUTER_LPM6 = 0x17C,
MLXSW_TRAP_ID_DISCARD_IPV6_MC_DIP_RESERVED_SCOPE = 0x1B0,
diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c
index da329ca115cc..f3f6dfe3eddc 100644
--- a/drivers/net/ethernet/micrel/ks8842.c
+++ b/drivers/net/ethernet/micrel/ks8842.c
@@ -1103,7 +1103,7 @@ static void ks8842_tx_timeout_work(struct work_struct *work)
__ks8842_start_new_rx_dma(netdev);
}
-static void ks8842_tx_timeout(struct net_device *netdev)
+static void ks8842_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct ks8842_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index e102e1560ac7..d1444ba36e10 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -4896,7 +4896,7 @@ unlock:
* triggered to free up resources so that the transmit routine can continue
* sending out packets. The hardware is reset to correct the problem.
*/
-static void netdev_tx_timeout(struct net_device *dev)
+static void netdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
static unsigned long last_reset;
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 0567e4f387a5..09cdc2f2e7ff 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1325,7 +1325,7 @@ static irqreturn_t enc28j60_irq(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void enc28j60_tx_timeout(struct net_device *ndev)
+static void enc28j60_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct enc28j60_net *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 52c41d11f565..39925e4bf2ec 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -892,7 +892,7 @@ static netdev_tx_t encx24j600_tx(struct sk_buff *skb, struct net_device *dev)
}
/* Deal with a transmit timeout */
-static void encx24j600_tx_timeout(struct net_device *dev)
+static void encx24j600_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct encx24j600_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c
index afe52463dc57..9399f6a98748 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.c
@@ -1265,6 +1265,9 @@ int lan743x_ptp_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
lan743x_ptp_set_sync_ts_insert(adapter, true);
break;
+ case HWTSTAMP_TX_ONESTEP_P2P:
+ ret = -ERANGE;
+ break;
default:
netif_warn(adapter, drv, adapter->netdev,
" tx_type = %d, UNKNOWN\n", config.tx_type);
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 985b46d7e3d1..86d543ab1ab9 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -500,13 +500,14 @@ EXPORT_SYMBOL(ocelot_port_enable);
static int ocelot_port_open(struct net_device *dev)
{
struct ocelot_port_private *priv = netdev_priv(dev);
- struct ocelot *ocelot = priv->port.ocelot;
+ struct ocelot_port *ocelot_port = &priv->port;
+ struct ocelot *ocelot = ocelot_port->ocelot;
int port = priv->chip_port;
int err;
if (priv->serdes) {
err = phy_set_mode_ext(priv->serdes, PHY_MODE_ETHERNET,
- priv->phy_mode);
+ ocelot_port->phy_mode);
if (err) {
netdev_err(dev, "Could not set mode of SerDes\n");
return err;
@@ -514,7 +515,7 @@ static int ocelot_port_open(struct net_device *dev)
}
err = phy_connect_direct(dev, priv->phy, &ocelot_port_adjust_link,
- priv->phy_mode);
+ ocelot_port->phy_mode);
if (err) {
netdev_err(dev, "Could not attach to PHY\n");
return err;
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index c259114c48fd..04372ba72fec 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -18,11 +18,11 @@
#include <linux/ptp_clock_kernel.h>
#include <linux/regmap.h>
+#include <soc/mscc/ocelot_qsys.h>
#include <soc/mscc/ocelot_sys.h>
+#include <soc/mscc/ocelot_dev.h>
+#include <soc/mscc/ocelot_ana.h>
#include <soc/mscc/ocelot.h>
-#include "ocelot_ana.h"
-#include "ocelot_dev.h"
-#include "ocelot_qsys.h"
#include "ocelot_rew.h"
#include "ocelot_qs.h"
#include "ocelot_tc.h"
@@ -68,7 +68,6 @@ struct ocelot_port_private {
u8 vlan_aware;
- phy_interface_t phy_mode;
struct phy *serdes;
struct ocelot_port_tc tc;
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 2da8eee27e98..b38820849faa 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -402,9 +402,9 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
of_get_phy_mode(portnp, &phy_mode);
- priv->phy_mode = phy_mode;
+ ocelot_port->phy_mode = phy_mode;
- switch (priv->phy_mode) {
+ switch (ocelot_port->phy_mode) {
case PHY_INTERFACE_MODE_NA:
continue;
case PHY_INTERFACE_MODE_SGMII:
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index c979f38a2e0c..2ee0d0be113a 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -2892,7 +2892,7 @@ drop:
static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
struct net_device *dev)
{
- struct sk_buff *segs, *curr;
+ struct sk_buff *segs, *curr, *next;
struct myri10ge_priv *mgp = netdev_priv(dev);
struct myri10ge_slice_state *ss;
netdev_tx_t status;
@@ -2901,10 +2901,8 @@ static netdev_tx_t myri10ge_sw_tso(struct sk_buff *skb,
if (IS_ERR(segs))
goto drop;
- while (segs) {
- curr = segs;
- segs = segs->next;
- curr->next = NULL;
+ skb_list_walk_safe(segs, curr, next) {
+ skb_mark_not_on_list(curr);
status = myri10ge_xmit(curr, dev);
if (status != 0) {
dev_kfree_skb_any(curr);
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index 1a2634cbbb69..d21d706b83a7 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -612,7 +612,7 @@ static void undo_cable_magic(struct net_device *dev);
static void check_link(struct net_device *dev);
static void netdev_timer(struct timer_list *t);
static void dump_ring(struct net_device *dev);
-static void ns_tx_timeout(struct net_device *dev);
+static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue);
static int alloc_ring(struct net_device *dev);
static void refill_rx(struct net_device *dev);
static void init_ring(struct net_device *dev);
@@ -1881,7 +1881,7 @@ static void dump_ring(struct net_device *dev)
}
}
-static void ns_tx_timeout(struct net_device *dev)
+static void ns_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct netdev_private *np = netdev_priv(dev);
void __iomem * ioaddr = ns_ioaddr(dev);
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 6af9a7eee114..be5f62f06785 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1549,7 +1549,7 @@ static int ns83820_stop(struct net_device *ndev)
return 0;
}
-static void ns83820_tx_timeout(struct net_device *ndev)
+static void ns83820_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct ns83820 *dev = PRIV(ndev);
u32 tx_done_idx;
@@ -1603,7 +1603,7 @@ static void ns83820_tx_watch(struct timer_list *t)
ndev->name,
dev->tx_done_idx, dev->tx_free_idx,
atomic_read(&dev->nr_tx_skbs));
- ns83820_tx_timeout(ndev);
+ ns83820_tx_timeout(ndev, UINT_MAX);
}
mod_timer(&dev->tx_watchdog, jiffies + 2*HZ);
diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index b339125b2f09..fdebc8598b22 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -161,7 +161,7 @@ static int sonic_close(struct net_device *dev)
return 0;
}
-static void sonic_tx_timeout(struct net_device *dev)
+static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct sonic_local *lp = netdev_priv(dev);
int i;
diff --git a/drivers/net/ethernet/natsemi/sonic.h b/drivers/net/ethernet/natsemi/sonic.h
index 2b27f7049acb..f1544481aac1 100644
--- a/drivers/net/ethernet/natsemi/sonic.h
+++ b/drivers/net/ethernet/natsemi/sonic.h
@@ -336,7 +336,7 @@ static int sonic_close(struct net_device *dev);
static struct net_device_stats *sonic_get_stats(struct net_device *dev);
static void sonic_multicast_list(struct net_device *dev);
static int sonic_init(struct net_device *dev);
-static void sonic_tx_timeout(struct net_device *dev);
+static void sonic_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void sonic_msg_init(struct net_device *dev);
/* Internal inlines for reading/writing DMA buffers. Note that bus
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index e0b2bf327905..0ec6b8e8b549 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -7238,7 +7238,7 @@ out_unlock:
* void
*/
-static void s2io_tx_watchdog(struct net_device *dev)
+static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue)
{
struct s2io_nic *sp = netdev_priv(dev);
struct swStat *swstats = &sp->mac_control.stats_info->sw_stat;
diff --git a/drivers/net/ethernet/neterion/s2io.h b/drivers/net/ethernet/neterion/s2io.h
index 0a921f30f98f..6fa3159a977f 100644
--- a/drivers/net/ethernet/neterion/s2io.h
+++ b/drivers/net/ethernet/neterion/s2io.h
@@ -1065,7 +1065,7 @@ static void s2io_txpic_intr_handle(struct s2io_nic *sp);
static void tx_intr_handler(struct fifo_info *fifo_data);
static void s2io_handle_errors(void * dev_id);
-static void s2io_tx_watchdog(struct net_device *dev);
+static void s2io_tx_watchdog(struct net_device *dev, unsigned int txqueue);
static void s2io_set_multicast(struct net_device *dev);
static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp);
static void s2io_link(struct s2io_nic * sp, int link);
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 1d334f2e0a56..9b63574b6202 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3273,7 +3273,7 @@ static int vxge_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
* This function is triggered if the Tx Queue is stopped
* for a pre-defined amount of time when the Interface is still up.
*/
-static void vxge_tx_watchdog(struct net_device *dev)
+static void vxge_tx_watchdog(struct net_device *dev, unsigned int txqueue)
{
struct vxgedev *vdev;
diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig
index bac5be4d4f43..a3f68a718813 100644
--- a/drivers/net/ethernet/netronome/Kconfig
+++ b/drivers/net/ethernet/netronome/Kconfig
@@ -31,6 +31,7 @@ config NFP_APP_FLOWER
bool "NFP4000/NFP6000 TC Flower offload support"
depends on NFP
depends on NET_SWITCHDEV
+ depends on IPV6!=m || NFP=m
default y
---help---
Enable driver support for TC Flower offload on NFP4000 and NFP6000.
diff --git a/drivers/net/ethernet/netronome/nfp/abm/cls.c b/drivers/net/ethernet/netronome/nfp/abm/cls.c
index 9f8a1f69c0c4..23ebddfb9532 100644
--- a/drivers/net/ethernet/netronome/nfp/abm/cls.c
+++ b/drivers/net/ethernet/netronome/nfp/abm/cls.c
@@ -176,10 +176,8 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
u8 mask, val;
int err;
- if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack)) {
- err = -EOPNOTSUPP;
+ if (!nfp_abm_u32_check_knode(alink->abm, knode, proto, extack))
goto err_delete;
- }
tos_off = proto == htons(ETH_P_IP) ? 16 : 20;
@@ -200,18 +198,14 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
if ((iter->val & cmask) == (val & cmask) &&
iter->band != knode->res->classid) {
NL_SET_ERR_MSG_MOD(extack, "conflict with already offloaded filter");
- err = -EOPNOTSUPP;
goto err_delete;
}
}
if (!match) {
match = kzalloc(sizeof(*match), GFP_KERNEL);
- if (!match) {
- err = -ENOMEM;
- goto err_delete;
- }
-
+ if (!match)
+ return -ENOMEM;
list_add(&match->list, &alink->dscp_map);
}
match->handle = knode->handle;
@@ -227,7 +221,7 @@ nfp_abm_u32_knode_replace(struct nfp_abm_link *alink,
err_delete:
nfp_abm_u32_knode_delete(alink, knode);
- return err;
+ return -EOPNOTSUPP;
}
static int nfp_abm_setup_tc_block_cb(enum tc_setup_type type,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index c80bb83c8ac9..0a721f6e8676 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -2652,17 +2652,17 @@ static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
switch (meta->insn.off) {
case offsetof(struct __sk_buff, len):
- if (size != FIELD_SIZEOF(struct __sk_buff, len))
+ if (size != sizeof_field(struct __sk_buff, len))
return -EOPNOTSUPP;
wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
break;
case offsetof(struct __sk_buff, data):
- if (size != FIELD_SIZEOF(struct __sk_buff, data))
+ if (size != sizeof_field(struct __sk_buff, data))
return -EOPNOTSUPP;
wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
break;
case offsetof(struct __sk_buff, data_end):
- if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
+ if (size != sizeof_field(struct __sk_buff, data_end))
return -EOPNOTSUPP;
emit_alu(nfp_prog, dst,
plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
@@ -2683,12 +2683,12 @@ static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
switch (meta->insn.off) {
case offsetof(struct xdp_md, data):
- if (size != FIELD_SIZEOF(struct xdp_md, data))
+ if (size != sizeof_field(struct xdp_md, data))
return -EOPNOTSUPP;
wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
break;
case offsetof(struct xdp_md, data_end):
- if (size != FIELD_SIZEOF(struct xdp_md, data_end))
+ if (size != sizeof_field(struct xdp_md, data_end))
return -EOPNOTSUPP;
emit_alu(nfp_prog, dst,
plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c
index 8f732771d3fa..11c83a99b014 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c
@@ -15,7 +15,7 @@
const struct rhashtable_params nfp_bpf_maps_neutral_params = {
.nelem_hint = 4,
- .key_len = FIELD_SIZEOF(struct bpf_map, id),
+ .key_len = sizeof_field(struct bpf_map, id),
.key_offset = offsetof(struct nfp_bpf_neutral_map, map_id),
.head_offset = offsetof(struct nfp_bpf_neutral_map, l),
.automatic_shrinking = true,
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
index 95a0d3910e31..ac02369174a9 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c
@@ -374,7 +374,7 @@ nfp_bpf_map_alloc(struct nfp_app_bpf *bpf, struct bpf_offloaded_map *offmap)
}
use_map_size = DIV_ROUND_UP(offmap->map.value_size, 4) *
- FIELD_SIZEOF(struct nfp_bpf_map, use_map[0]);
+ sizeof_field(struct nfp_bpf_map, use_map[0]);
nfp_map = kzalloc(sizeof(*nfp_map) + use_map_size, GFP_USER);
if (!nfp_map)
diff --git a/drivers/net/ethernet/netronome/nfp/ccm.h b/drivers/net/ethernet/netronome/nfp/ccm.h
index a460c75522be..d81d450be50e 100644
--- a/drivers/net/ethernet/netronome/nfp/ccm.h
+++ b/drivers/net/ethernet/netronome/nfp/ccm.h
@@ -26,6 +26,7 @@ enum nfp_ccm_type {
NFP_CCM_TYPE_CRYPTO_ADD = 10,
NFP_CCM_TYPE_CRYPTO_DEL = 11,
NFP_CCM_TYPE_CRYPTO_UPDATE = 12,
+ NFP_CCM_TYPE_CRYPTO_RESYNC = 13,
__NFP_CCM_TYPE_MAX,
};
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
index 60372ddf69f0..bffe58bb2f27 100644
--- a/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
+++ b/drivers/net/ethernet/netronome/nfp/crypto/crypto.h
@@ -4,6 +4,10 @@
#ifndef NFP_CRYPTO_H
#define NFP_CRYPTO_H 1
+struct net_device;
+struct nfp_net;
+struct nfp_net_tls_resync_req;
+
struct nfp_net_tls_offload_ctx {
__be32 fw_handle[2];
@@ -17,11 +21,22 @@ struct nfp_net_tls_offload_ctx {
#ifdef CONFIG_TLS_DEVICE
int nfp_net_tls_init(struct nfp_net *nn);
+int nfp_net_tls_rx_resync_req(struct net_device *netdev,
+ struct nfp_net_tls_resync_req *req,
+ void *pkt, unsigned int pkt_len);
#else
static inline int nfp_net_tls_init(struct nfp_net *nn)
{
return 0;
}
+
+static inline int
+nfp_net_tls_rx_resync_req(struct net_device *netdev,
+ struct nfp_net_tls_resync_req *req,
+ void *pkt, unsigned int pkt_len)
+{
+ return -EOPNOTSUPP;
+}
#endif
#endif
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/fw.h b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
index 67413d946c4a..8d1458896bcb 100644
--- a/drivers/net/ethernet/netronome/nfp/crypto/fw.h
+++ b/drivers/net/ethernet/netronome/nfp/crypto/fw.h
@@ -9,6 +9,14 @@
#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_ENC 0
#define NFP_NET_CRYPTO_OP_TLS_1_2_AES_GCM_128_DEC 1
+struct nfp_net_tls_resync_req {
+ __be32 fw_handle[2];
+ __be32 tcp_seq;
+ u8 l3_offset;
+ u8 l4_offset;
+ u8 resv[2];
+};
+
struct nfp_crypto_reply_simple {
struct nfp_ccm_hdr hdr;
__be32 error;
diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
index 96a96b35c0ca..7c50e3dfb9d5 100644
--- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c
+++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c
@@ -5,6 +5,7 @@
#include <linux/ipv6.h>
#include <linux/skbuff.h>
#include <linux/string.h>
+#include <net/inet6_hashtables.h>
#include <net/tls.h>
#include "../ccm.h"
@@ -391,8 +392,9 @@ nfp_net_tls_add(struct net_device *netdev, struct sock *sk,
if (direction == TLS_OFFLOAD_CTX_DIR_TX)
return 0;
- tls_offload_rx_resync_set_type(sk,
- TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT);
+ if (!nn->tlv_caps.tls_resync_ss)
+ tls_offload_rx_resync_set_type(sk, TLS_OFFLOAD_SYNC_TYPE_CORE_NEXT_HINT);
+
return 0;
err_fw_remove:
@@ -424,6 +426,7 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq,
struct nfp_net *nn = netdev_priv(netdev);
struct nfp_net_tls_offload_ctx *ntls;
struct nfp_crypto_req_update *req;
+ enum nfp_ccm_type type;
struct sk_buff *skb;
gfp_t flags;
int err;
@@ -442,15 +445,18 @@ nfp_net_tls_resync(struct net_device *netdev, struct sock *sk, u32 seq,
req->tcp_seq = cpu_to_be32(seq);
memcpy(req->rec_no, rcd_sn, sizeof(req->rec_no));
+ type = NFP_CCM_TYPE_CRYPTO_UPDATE;
if (direction == TLS_OFFLOAD_CTX_DIR_TX) {
- err = nfp_net_tls_communicate_simple(nn, skb, "sync",
- NFP_CCM_TYPE_CRYPTO_UPDATE);
+ err = nfp_net_tls_communicate_simple(nn, skb, "sync", type);
if (err)
return err;
ntls->next_seq = seq;
} else {
- nfp_ccm_mbox_post(nn, skb, NFP_CCM_TYPE_CRYPTO_UPDATE,
+ if (nn->tlv_caps.tls_resync_ss)
+ type = NFP_CCM_TYPE_CRYPTO_RESYNC;
+ nfp_ccm_mbox_post(nn, skb, type,
sizeof(struct nfp_crypto_reply_simple));
+ atomic_inc(&nn->ktls_rx_resync_sent);
}
return 0;
@@ -462,6 +468,79 @@ static const struct tlsdev_ops nfp_net_tls_ops = {
.tls_dev_resync = nfp_net_tls_resync,
};
+int nfp_net_tls_rx_resync_req(struct net_device *netdev,
+ struct nfp_net_tls_resync_req *req,
+ void *pkt, unsigned int pkt_len)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+ struct nfp_net_tls_offload_ctx *ntls;
+ struct ipv6hdr *ipv6h;
+ struct tcphdr *th;
+ struct iphdr *iph;
+ struct sock *sk;
+ __be32 tcp_seq;
+ int err;
+
+ iph = pkt + req->l3_offset;
+ ipv6h = pkt + req->l3_offset;
+ th = pkt + req->l4_offset;
+
+ if ((u8 *)&th[1] > (u8 *)pkt + pkt_len) {
+ netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu pkt_len: %u)\n",
+ req->l3_offset, req->l4_offset, pkt_len);
+ err = -EINVAL;
+ goto err_cnt_ign;
+ }
+
+ switch (iph->version) {
+ case 4:
+ sk = inet_lookup_established(dev_net(netdev), &tcp_hashinfo,
+ iph->saddr, th->source, iph->daddr,
+ th->dest, netdev->ifindex);
+ break;
+#if IS_ENABLED(CONFIG_IPV6)
+ case 6:
+ sk = __inet6_lookup_established(dev_net(netdev), &tcp_hashinfo,
+ &ipv6h->saddr, th->source,
+ &ipv6h->daddr, ntohs(th->dest),
+ netdev->ifindex, 0);
+ break;
+#endif
+ default:
+ netdev_warn_once(netdev, "invalid TLS RX resync request (l3_off: %hhu l4_off: %hhu ipver: %u)\n",
+ req->l3_offset, req->l4_offset, iph->version);
+ err = -EINVAL;
+ goto err_cnt_ign;
+ }
+
+ err = 0;
+ if (!sk)
+ goto err_cnt_ign;
+ if (!tls_is_sk_rx_device_offloaded(sk) ||
+ sk->sk_shutdown & RCV_SHUTDOWN)
+ goto err_put_sock;
+
+ ntls = tls_driver_ctx(sk, TLS_OFFLOAD_CTX_DIR_RX);
+ /* some FW versions can't report the handle and report 0s */
+ if (memchr_inv(&req->fw_handle, 0, sizeof(req->fw_handle)) &&
+ memcmp(&req->fw_handle, &ntls->fw_handle, sizeof(ntls->fw_handle)))
+ goto err_put_sock;
+
+ /* copy to ensure alignment */
+ memcpy(&tcp_seq, &req->tcp_seq, sizeof(tcp_seq));
+ tls_offload_rx_resync_request(sk, tcp_seq);
+ atomic_inc(&nn->ktls_rx_resync_req);
+
+ sock_gen_put(sk);
+ return 0;
+
+err_put_sock:
+ sock_gen_put(sk);
+err_cnt_ign:
+ atomic_inc(&nn->ktls_rx_resync_ign);
+ return err;
+}
+
static int nfp_net_tls_reset(struct nfp_net *nn)
{
struct nfp_crypto_req_reset *req;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 1b019fdfcd97..c06600fb47ff 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -22,8 +22,9 @@
#define NFP_FL_TUNNEL_CSUM cpu_to_be16(0x01)
#define NFP_FL_TUNNEL_KEY cpu_to_be16(0x04)
#define NFP_FL_TUNNEL_GENEVE_OPT cpu_to_be16(0x0800)
-#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS IP_TUNNEL_INFO_TX
-#define NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \
+#define NFP_FL_SUPPORTED_TUNNEL_INFO_FLAGS (IP_TUNNEL_INFO_TX | \
+ IP_TUNNEL_INFO_IPV6)
+#define NFP_FL_SUPPORTED_UDP_TUN_FLAGS (NFP_FL_TUNNEL_CSUM | \
NFP_FL_TUNNEL_KEY | \
NFP_FL_TUNNEL_GENEVE_OPT)
@@ -394,19 +395,26 @@ nfp_fl_push_geneve_options(struct nfp_fl_payload *nfp_fl, int *list_len,
}
static int
-nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
- const struct flow_action_entry *act,
- struct nfp_fl_pre_tunnel *pre_tun,
- enum nfp_flower_tun_type tun_type,
- struct net_device *netdev, struct netlink_ext_ack *extack)
+nfp_fl_set_tun(struct nfp_app *app, struct nfp_fl_set_tun *set_tun,
+ const struct flow_action_entry *act,
+ struct nfp_fl_pre_tunnel *pre_tun,
+ enum nfp_flower_tun_type tun_type,
+ struct net_device *netdev, struct netlink_ext_ack *extack)
{
- size_t act_size = sizeof(struct nfp_fl_set_ipv4_tun);
const struct ip_tunnel_info *ip_tun = act->tunnel;
+ bool ipv6 = ip_tunnel_info_af(ip_tun) == AF_INET6;
+ size_t act_size = sizeof(struct nfp_fl_set_tun);
struct nfp_flower_priv *priv = app->priv;
u32 tmp_set_ip_tun_type_index = 0;
/* Currently support one pre-tunnel so index is always 0. */
int pretun_idx = 0;
+ if (!IS_ENABLED(CONFIG_IPV6) && ipv6)
+ return -EOPNOTSUPP;
+
+ if (ipv6 && !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN))
+ return -EOPNOTSUPP;
+
BUILD_BUG_ON(NFP_FL_TUNNEL_CSUM != TUNNEL_CSUM ||
NFP_FL_TUNNEL_KEY != TUNNEL_KEY ||
NFP_FL_TUNNEL_GENEVE_OPT != TUNNEL_GENEVE_OPT);
@@ -417,19 +425,35 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
return -EOPNOTSUPP;
}
- set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL;
+ set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_TUNNEL;
set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ;
/* Set tunnel type and pre-tunnel index. */
tmp_set_ip_tun_type_index |=
- FIELD_PREP(NFP_FL_IPV4_TUNNEL_TYPE, tun_type) |
- FIELD_PREP(NFP_FL_IPV4_PRE_TUN_INDEX, pretun_idx);
+ FIELD_PREP(NFP_FL_TUNNEL_TYPE, tun_type) |
+ FIELD_PREP(NFP_FL_PRE_TUN_INDEX, pretun_idx);
set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index);
set_tun->tun_id = ip_tun->key.tun_id;
if (ip_tun->key.ttl) {
set_tun->ttl = ip_tun->key.ttl;
+#ifdef CONFIG_IPV6
+ } else if (ipv6) {
+ struct net *net = dev_net(netdev);
+ struct flowi6 flow = {};
+ struct dst_entry *dst;
+
+ flow.daddr = ip_tun->key.u.ipv6.dst;
+ flow.flowi4_proto = IPPROTO_UDP;
+ dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &flow, NULL);
+ if (!IS_ERR(dst)) {
+ set_tun->ttl = ip6_dst_hoplimit(dst);
+ dst_release(dst);
+ } else {
+ set_tun->ttl = net->ipv6.devconf_all->hop_limit;
+ }
+#endif
} else {
struct net *net = dev_net(netdev);
struct flowi4 flow = {};
@@ -455,7 +479,7 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
set_tun->tos = ip_tun->key.tos;
if (!(ip_tun->key.tun_flags & NFP_FL_TUNNEL_KEY) ||
- ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_IPV4_UDP_TUN_FLAGS) {
+ ip_tun->key.tun_flags & ~NFP_FL_SUPPORTED_UDP_TUN_FLAGS) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support tunnel flag offload");
return -EOPNOTSUPP;
}
@@ -467,7 +491,12 @@ nfp_fl_set_ipv4_tun(struct nfp_app *app, struct nfp_fl_set_ipv4_tun *set_tun,
}
/* Complete pre_tunnel action. */
- pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
+ if (ipv6) {
+ pre_tun->flags |= cpu_to_be16(NFP_FL_PRE_TUN_IPV6);
+ pre_tun->ipv6_dst = ip_tun->key.u.ipv6.dst;
+ } else {
+ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst;
+ }
return 0;
}
@@ -956,8 +985,8 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
struct nfp_flower_pedit_acts *set_act, bool *pkt_host,
struct netlink_ext_ack *extack, int act_idx)
{
- struct nfp_fl_set_ipv4_tun *set_tun;
struct nfp_fl_pre_tunnel *pre_tun;
+ struct nfp_fl_set_tun *set_tun;
struct nfp_fl_push_vlan *psh_v;
struct nfp_fl_push_mpls *psh_m;
struct nfp_fl_pop_vlan *pop_v;
@@ -1032,7 +1061,7 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
* If none, the packet falls back before applying other actions.
*/
if (*a_len + sizeof(struct nfp_fl_pre_tunnel) +
- sizeof(struct nfp_fl_set_ipv4_tun) > NFP_FL_MAX_A_SIZ) {
+ sizeof(struct nfp_fl_set_tun) > NFP_FL_MAX_A_SIZ) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: maximum allowed action list size exceeded at tunnel encap");
return -EOPNOTSUPP;
}
@@ -1046,11 +1075,11 @@ nfp_flower_loop_action(struct nfp_app *app, const struct flow_action_entry *act,
return err;
set_tun = (void *)&nfp_fl->action_data[*a_len];
- err = nfp_fl_set_ipv4_tun(app, set_tun, act, pre_tun,
- *tun_type, netdev, extack);
+ err = nfp_fl_set_tun(app, set_tun, act, pre_tun, *tun_type,
+ netdev, extack);
if (err)
return err;
- *a_len += sizeof(struct nfp_fl_set_ipv4_tun);
+ *a_len += sizeof(struct nfp_fl_set_tun);
}
break;
case FLOW_ACTION_TUNNEL_DECAP:
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index 05981b54eaab..a595ddb92bff 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -270,11 +270,17 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
}
goto err_default;
case NFP_FLOWER_CMSG_TYPE_NO_NEIGH:
- nfp_tunnel_request_route(app, skb);
+ nfp_tunnel_request_route_v4(app, skb);
+ break;
+ case NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6:
+ nfp_tunnel_request_route_v6(app, skb);
break;
case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS:
nfp_tunnel_keep_alive(app, skb);
break;
+ case NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6:
+ nfp_tunnel_keep_alive_v6(app, skb);
+ break;
case NFP_FLOWER_CMSG_TYPE_QOS_STATS:
nfp_flower_stats_rlim_reply(app, skb);
break;
@@ -361,7 +367,8 @@ void nfp_flower_cmsg_rx(struct nfp_app *app, struct sk_buff *skb)
nfp_flower_process_mtu_ack(app, skb)) {
/* Handle MTU acks outside wq to prevent RTNL conflict. */
dev_consume_skb_any(skb);
- } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH) {
+ } else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH ||
+ cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6) {
/* Acks from the NFP that the route is added - ignore. */
dev_consume_skb_any(skb);
} else if (cmsg_hdr->type == NFP_FLOWER_CMSG_TYPE_PORT_REIFY) {
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
index 7eb2ec8969c3..9b50d76bbc09 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h
@@ -26,6 +26,7 @@
#define NFP_FLOWER_LAYER2_GRE BIT(0)
#define NFP_FLOWER_LAYER2_GENEVE BIT(5)
#define NFP_FLOWER_LAYER2_GENEVE_OP BIT(6)
+#define NFP_FLOWER_LAYER2_TUN_IPV6 BIT(7)
#define NFP_FLOWER_MASK_VLAN_PRIO GENMASK(15, 13)
#define NFP_FLOWER_MASK_VLAN_PRESENT BIT(12)
@@ -63,6 +64,7 @@
#define NFP_FL_MAX_GENEVE_OPT_ACT 32
#define NFP_FL_MAX_GENEVE_OPT_CNT 64
#define NFP_FL_MAX_GENEVE_OPT_KEY 32
+#define NFP_FL_MAX_GENEVE_OPT_KEY_V6 8
/* Action opcodes */
#define NFP_FL_ACTION_OPCODE_OUTPUT 0
@@ -70,7 +72,7 @@
#define NFP_FL_ACTION_OPCODE_POP_VLAN 2
#define NFP_FL_ACTION_OPCODE_PUSH_MPLS 3
#define NFP_FL_ACTION_OPCODE_POP_MPLS 4
-#define NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL 6
+#define NFP_FL_ACTION_OPCODE_SET_TUNNEL 6
#define NFP_FL_ACTION_OPCODE_SET_ETHERNET 7
#define NFP_FL_ACTION_OPCODE_SET_MPLS 8
#define NFP_FL_ACTION_OPCODE_SET_IPV4_ADDRS 9
@@ -99,8 +101,8 @@
/* Tunnel ports */
#define NFP_FL_PORT_TYPE_TUN 0x50000000
-#define NFP_FL_IPV4_TUNNEL_TYPE GENMASK(7, 4)
-#define NFP_FL_IPV4_PRE_TUN_INDEX GENMASK(2, 0)
+#define NFP_FL_TUNNEL_TYPE GENMASK(7, 4)
+#define NFP_FL_PRE_TUN_INDEX GENMASK(2, 0)
#define NFP_FLOWER_WORKQ_MAX_SKBS 30000
@@ -206,13 +208,16 @@ struct nfp_fl_pre_lag {
struct nfp_fl_pre_tunnel {
struct nfp_fl_act_head head;
- __be16 reserved;
- __be32 ipv4_dst;
- /* reserved for use with IPv6 addresses */
- __be32 extra[3];
+ __be16 flags;
+ union {
+ __be32 ipv4_dst;
+ struct in6_addr ipv6_dst;
+ };
};
-struct nfp_fl_set_ipv4_tun {
+#define NFP_FL_PRE_TUN_IPV6 BIT(0)
+
+struct nfp_fl_set_tun {
struct nfp_fl_act_head head;
__be16 reserved;
__be64 tun_id __packed;
@@ -387,6 +392,11 @@ struct nfp_flower_tun_ipv4 {
__be32 dst;
};
+struct nfp_flower_tun_ipv6 {
+ struct in6_addr src;
+ struct in6_addr dst;
+};
+
struct nfp_flower_tun_ip_ext {
u8 tos;
u8 ttl;
@@ -416,6 +426,42 @@ struct nfp_flower_ipv4_udp_tun {
__be32 tun_id;
};
+/* Flow Frame IPv6 UDP TUNNEL --> Tunnel details (11W/44B)
+ * -----------------------------------------------------------------
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 31 - 0 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 63 - 32 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 95 - 64 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 127 - 96 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 31 - 0 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 63 - 32 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 95 - 64 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 127 - 96 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved | tos | ttl |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VNI | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv6_udp_tun {
+ struct nfp_flower_tun_ipv6 ipv6;
+ __be16 reserved1;
+ struct nfp_flower_tun_ip_ext ip_ext;
+ __be32 reserved2;
+ __be32 tun_id;
+};
+
/* Flow Frame GRE TUNNEL --> Tunnel details (6W/24B)
* -----------------------------------------------------------------
* 3 2 1
@@ -445,6 +491,46 @@ struct nfp_flower_ipv4_gre_tun {
__be32 reserved2;
};
+/* Flow Frame GRE TUNNEL V6 --> Tunnel details (12W/48B)
+ * -----------------------------------------------------------------
+ * 3 2 1
+ * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 31 - 0 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 63 - 32 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 95 - 64 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_src, 127 - 96 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 31 - 0 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 63 - 32 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 95 - 64 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | ipv6_addr_dst, 127 - 96 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | tun_flags | tos | ttl |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved | Ethertype |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Key |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+struct nfp_flower_ipv6_gre_tun {
+ struct nfp_flower_tun_ipv6 ipv6;
+ __be16 tun_flags;
+ struct nfp_flower_tun_ip_ext ip_ext;
+ __be16 reserved1;
+ __be16 ethertype;
+ __be32 tun_key;
+ __be32 reserved2;
+};
+
struct nfp_flower_geneve_options {
u8 data[NFP_FL_MAX_GENEVE_OPT_KEY];
};
@@ -485,6 +571,10 @@ enum nfp_flower_cmsg_type_port {
NFP_FLOWER_CMSG_TYPE_QOS_DEL = 19,
NFP_FLOWER_CMSG_TYPE_QOS_STATS = 20,
NFP_FLOWER_CMSG_TYPE_PRE_TUN_RULE = 21,
+ NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6 = 22,
+ NFP_FLOWER_CMSG_TYPE_NO_NEIGH_V6 = 23,
+ NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 = 24,
+ NFP_FLOWER_CMSG_TYPE_ACTIVE_TUNS_V6 = 25,
NFP_FLOWER_CMSG_TYPE_MAX = 32,
};
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 31d94592a7c0..d55d0d33bc45 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -24,7 +24,7 @@ struct nfp_app;
#define NFP_FL_STAT_ID_MU_NUM GENMASK(31, 22)
#define NFP_FL_STAT_ID_STAT GENMASK(21, 0)
-#define NFP_FL_STATS_ELEM_RS FIELD_SIZEOF(struct nfp_fl_stats_id, \
+#define NFP_FL_STATS_ELEM_RS sizeof_field(struct nfp_fl_stats_id, \
init_unalloc)
#define NFP_FLOWER_MASK_ENTRY_RS 256
#define NFP_FLOWER_MASK_ELEMENT_RS 1
@@ -43,6 +43,7 @@ struct nfp_app;
#define NFP_FL_FEATS_VF_RLIM BIT(4)
#define NFP_FL_FEATS_FLOW_MOD BIT(5)
#define NFP_FL_FEATS_PRE_TUN_RULES BIT(6)
+#define NFP_FL_FEATS_IPV6_TUN BIT(7)
#define NFP_FL_FEATS_FLOW_MERGE BIT(30)
#define NFP_FL_FEATS_LAG BIT(31)
@@ -62,18 +63,26 @@ struct nfp_fl_stats_id {
* struct nfp_fl_tunnel_offloads - priv data for tunnel offloads
* @offloaded_macs: Hashtable of the offloaded MAC addresses
* @ipv4_off_list: List of IPv4 addresses to offload
- * @neigh_off_list: List of neighbour offloads
+ * @ipv6_off_list: List of IPv6 addresses to offload
+ * @neigh_off_list_v4: List of IPv4 neighbour offloads
+ * @neigh_off_list_v6: List of IPv6 neighbour offloads
* @ipv4_off_lock: Lock for the IPv4 address list
- * @neigh_off_lock: Lock for the neighbour address list
+ * @ipv6_off_lock: Lock for the IPv6 address list
+ * @neigh_off_lock_v4: Lock for the IPv4 neighbour address list
+ * @neigh_off_lock_v6: Lock for the IPv6 neighbour address list
* @mac_off_ids: IDA to manage id assignment for offloaded MACs
* @neigh_nb: Notifier to monitor neighbour state
*/
struct nfp_fl_tunnel_offloads {
struct rhashtable offloaded_macs;
struct list_head ipv4_off_list;
- struct list_head neigh_off_list;
+ struct list_head ipv6_off_list;
+ struct list_head neigh_off_list_v4;
+ struct list_head neigh_off_list_v6;
struct mutex ipv4_off_lock;
- spinlock_t neigh_off_lock;
+ struct mutex ipv6_off_lock;
+ spinlock_t neigh_off_lock_v4;
+ spinlock_t neigh_off_lock_v6;
struct ida mac_off_ids;
struct notifier_block neigh_nb;
};
@@ -273,12 +282,25 @@ struct nfp_fl_stats {
u64 used;
};
+/**
+ * struct nfp_ipv6_addr_entry - cached IPv6 addresses
+ * @ipv6_addr: IP address
+ * @ref_count: number of rules currently using this IP
+ * @list: list pointer
+ */
+struct nfp_ipv6_addr_entry {
+ struct in6_addr ipv6_addr;
+ int ref_count;
+ struct list_head list;
+};
+
struct nfp_fl_payload {
struct nfp_fl_rule_metadata meta;
unsigned long tc_flower_cookie;
struct rhash_head fl_node;
struct rcu_head rcu;
__be32 nfp_tun_ipv4_addr;
+ struct nfp_ipv6_addr_entry *nfp_tun_ipv6;
struct net_device *ingress_dev;
char *unmasked_data;
char *mask_data;
@@ -396,8 +418,14 @@ int nfp_tunnel_mac_event_handler(struct nfp_app *app,
unsigned long event, void *ptr);
void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4);
void nfp_tunnel_add_ipv4_off(struct nfp_app *app, __be32 ipv4);
-void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb);
+void
+nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry);
+struct nfp_ipv6_addr_entry *
+nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6);
+void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb);
+void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb);
void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb);
+void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb);
void nfp_flower_lag_init(struct nfp_fl_lag *lag);
void nfp_flower_lag_cleanup(struct nfp_fl_lag *lag);
int nfp_flower_lag_reset(struct nfp_fl_lag *lag);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 9cc3ba17ff69..546bc01d507d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -10,9 +10,8 @@
static void
nfp_flower_compile_meta_tci(struct nfp_flower_meta_tci *ext,
struct nfp_flower_meta_tci *msk,
- struct flow_cls_offload *flow, u8 key_type)
+ struct flow_rule *rule, u8 key_type)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
u16 tmp_tci;
memset(ext, 0, sizeof(struct nfp_flower_meta_tci));
@@ -77,11 +76,8 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
static void
nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
- struct nfp_flower_mac_mpls *msk,
- struct flow_cls_offload *flow)
+ struct nfp_flower_mac_mpls *msk, struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
@@ -130,10 +126,8 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
static void
nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
struct nfp_flower_tp_ports *msk,
- struct flow_cls_offload *flow)
+ struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
memset(ext, 0, sizeof(struct nfp_flower_tp_ports));
memset(msk, 0, sizeof(struct nfp_flower_tp_ports));
@@ -150,11 +144,8 @@ nfp_flower_compile_tport(struct nfp_flower_tp_ports *ext,
static void
nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
- struct nfp_flower_ip_ext *msk,
- struct flow_cls_offload *flow)
+ struct nfp_flower_ip_ext *msk, struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
struct flow_match_basic match;
@@ -224,10 +215,8 @@ nfp_flower_compile_ip_ext(struct nfp_flower_ip_ext *ext,
static void
nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
- struct nfp_flower_ipv4 *msk,
- struct flow_cls_offload *flow)
+ struct nfp_flower_ipv4 *msk, struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
struct flow_match_ipv4_addrs match;
memset(ext, 0, sizeof(struct nfp_flower_ipv4));
@@ -241,16 +230,13 @@ nfp_flower_compile_ipv4(struct nfp_flower_ipv4 *ext,
msk->ipv4_dst = match.mask->dst;
}
- nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+ nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
}
static void
nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
- struct nfp_flower_ipv6 *msk,
- struct flow_cls_offload *flow)
+ struct nfp_flower_ipv6 *msk, struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
memset(ext, 0, sizeof(struct nfp_flower_ipv6));
memset(msk, 0, sizeof(struct nfp_flower_ipv6));
@@ -264,16 +250,15 @@ nfp_flower_compile_ipv6(struct nfp_flower_ipv6 *ext,
msk->ipv6_dst = match.mask->dst;
}
- nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+ nfp_flower_compile_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
}
static int
-nfp_flower_compile_geneve_opt(void *ext, void *msk,
- struct flow_cls_offload *flow)
+nfp_flower_compile_geneve_opt(void *ext, void *msk, struct flow_rule *rule)
{
struct flow_match_enc_opts match;
- flow_rule_match_enc_opts(flow->rule, &match);
+ flow_rule_match_enc_opts(rule, &match);
memcpy(ext, match.key->data, match.key->len);
memcpy(msk, match.mask->data, match.mask->len);
@@ -283,10 +268,8 @@ nfp_flower_compile_geneve_opt(void *ext, void *msk,
static void
nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
struct nfp_flower_tun_ipv4 *msk,
- struct flow_cls_offload *flow)
+ struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
struct flow_match_ipv4_addrs match;
@@ -299,12 +282,26 @@ nfp_flower_compile_tun_ipv4_addrs(struct nfp_flower_tun_ipv4 *ext,
}
static void
+nfp_flower_compile_tun_ipv6_addrs(struct nfp_flower_tun_ipv6 *ext,
+ struct nfp_flower_tun_ipv6 *msk,
+ struct flow_rule *rule)
+{
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+ struct flow_match_ipv6_addrs match;
+
+ flow_rule_match_enc_ipv6_addrs(rule, &match);
+ ext->src = match.key->src;
+ ext->dst = match.key->dst;
+ msk->src = match.mask->src;
+ msk->dst = match.mask->dst;
+ }
+}
+
+static void
nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
struct nfp_flower_tun_ip_ext *msk,
- struct flow_cls_offload *flow)
+ struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
struct flow_match_ip match;
@@ -317,57 +314,97 @@ nfp_flower_compile_tun_ip_ext(struct nfp_flower_tun_ip_ext *ext,
}
static void
-nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
- struct nfp_flower_ipv4_gre_tun *msk,
- struct flow_cls_offload *flow)
+nfp_flower_compile_tun_udp_key(__be32 *key, __be32 *key_msk,
+ struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
- memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
- memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+ struct flow_match_enc_keyid match;
+ u32 vni;
- /* NVGRE is the only supported GRE tunnel type */
- ext->ethertype = cpu_to_be16(ETH_P_TEB);
- msk->ethertype = cpu_to_be16(~0);
+ flow_rule_match_enc_keyid(rule, &match);
+ vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET;
+ *key = cpu_to_be32(vni);
+ vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET;
+ *key_msk = cpu_to_be32(vni);
+ }
+}
+static void
+nfp_flower_compile_tun_gre_key(__be32 *key, __be32 *key_msk, __be16 *flags,
+ __be16 *flags_msk, struct flow_rule *rule)
+{
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
struct flow_match_enc_keyid match;
flow_rule_match_enc_keyid(rule, &match);
- ext->tun_key = match.key->keyid;
- msk->tun_key = match.mask->keyid;
+ *key = match.key->keyid;
+ *key_msk = match.mask->keyid;
- ext->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
- msk->tun_flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+ *flags = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
+ *flags_msk = cpu_to_be16(NFP_FL_GRE_FLAG_KEY);
}
+}
+
+static void
+nfp_flower_compile_ipv4_gre_tun(struct nfp_flower_ipv4_gre_tun *ext,
+ struct nfp_flower_ipv4_gre_tun *msk,
+ struct flow_rule *rule)
+{
+ memset(ext, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+ memset(msk, 0, sizeof(struct nfp_flower_ipv4_gre_tun));
+
+ /* NVGRE is the only supported GRE tunnel type */
+ ext->ethertype = cpu_to_be16(ETH_P_TEB);
+ msk->ethertype = cpu_to_be16(~0);
- nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
- nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+ nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule);
+ nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
+ nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key,
+ &ext->tun_flags, &msk->tun_flags, rule);
}
static void
nfp_flower_compile_ipv4_udp_tun(struct nfp_flower_ipv4_udp_tun *ext,
struct nfp_flower_ipv4_udp_tun *msk,
- struct flow_cls_offload *flow)
+ struct flow_rule *rule)
{
- struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
-
memset(ext, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
memset(msk, 0, sizeof(struct nfp_flower_ipv4_udp_tun));
- if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
- struct flow_match_enc_keyid match;
- u32 temp_vni;
+ nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, rule);
+ nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
+ nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule);
+}
- flow_rule_match_enc_keyid(rule, &match);
- temp_vni = be32_to_cpu(match.key->keyid) << NFP_FL_TUN_VNI_OFFSET;
- ext->tun_id = cpu_to_be32(temp_vni);
- temp_vni = be32_to_cpu(match.mask->keyid) << NFP_FL_TUN_VNI_OFFSET;
- msk->tun_id = cpu_to_be32(temp_vni);
- }
+static void
+nfp_flower_compile_ipv6_udp_tun(struct nfp_flower_ipv6_udp_tun *ext,
+ struct nfp_flower_ipv6_udp_tun *msk,
+ struct flow_rule *rule)
+{
+ memset(ext, 0, sizeof(struct nfp_flower_ipv6_udp_tun));
+ memset(msk, 0, sizeof(struct nfp_flower_ipv6_udp_tun));
+
+ nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule);
+ nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
+ nfp_flower_compile_tun_udp_key(&ext->tun_id, &msk->tun_id, rule);
+}
+
+static void
+nfp_flower_compile_ipv6_gre_tun(struct nfp_flower_ipv6_gre_tun *ext,
+ struct nfp_flower_ipv6_gre_tun *msk,
+ struct flow_rule *rule)
+{
+ memset(ext, 0, sizeof(struct nfp_flower_ipv6_gre_tun));
+ memset(msk, 0, sizeof(struct nfp_flower_ipv6_gre_tun));
+
+ /* NVGRE is the only supported GRE tunnel type */
+ ext->ethertype = cpu_to_be16(ETH_P_TEB);
+ msk->ethertype = cpu_to_be16(~0);
- nfp_flower_compile_tun_ipv4_addrs(&ext->ipv4, &msk->ipv4, flow);
- nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, flow);
+ nfp_flower_compile_tun_ipv6_addrs(&ext->ipv6, &msk->ipv6, rule);
+ nfp_flower_compile_tun_ip_ext(&ext->ip_ext, &msk->ip_ext, rule);
+ nfp_flower_compile_tun_gre_key(&ext->tun_key, &msk->tun_key,
+ &ext->tun_flags, &msk->tun_flags, rule);
}
int nfp_flower_compile_flow_match(struct nfp_app *app,
@@ -378,6 +415,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
enum nfp_flower_tun_type tun_type,
struct netlink_ext_ack *extack)
{
+ struct flow_rule *rule = flow_cls_offload_flow_rule(flow);
u32 port_id;
int err;
u8 *ext;
@@ -393,7 +431,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
nfp_flower_compile_meta_tci((struct nfp_flower_meta_tci *)ext,
(struct nfp_flower_meta_tci *)msk,
- flow, key_ls->key_layer);
+ rule, key_ls->key_layer);
ext += sizeof(struct nfp_flower_meta_tci);
msk += sizeof(struct nfp_flower_meta_tci);
@@ -425,7 +463,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
(struct nfp_flower_mac_mpls *)msk,
- flow);
+ rule);
ext += sizeof(struct nfp_flower_mac_mpls);
msk += sizeof(struct nfp_flower_mac_mpls);
}
@@ -433,7 +471,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
if (NFP_FLOWER_LAYER_TP & key_ls->key_layer) {
nfp_flower_compile_tport((struct nfp_flower_tp_ports *)ext,
(struct nfp_flower_tp_ports *)msk,
- flow);
+ rule);
ext += sizeof(struct nfp_flower_tp_ports);
msk += sizeof(struct nfp_flower_tp_ports);
}
@@ -441,7 +479,7 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
if (NFP_FLOWER_LAYER_IPV4 & key_ls->key_layer) {
nfp_flower_compile_ipv4((struct nfp_flower_ipv4 *)ext,
(struct nfp_flower_ipv4 *)msk,
- flow);
+ rule);
ext += sizeof(struct nfp_flower_ipv4);
msk += sizeof(struct nfp_flower_ipv4);
}
@@ -449,43 +487,83 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
if (NFP_FLOWER_LAYER_IPV6 & key_ls->key_layer) {
nfp_flower_compile_ipv6((struct nfp_flower_ipv6 *)ext,
(struct nfp_flower_ipv6 *)msk,
- flow);
+ rule);
ext += sizeof(struct nfp_flower_ipv6);
msk += sizeof(struct nfp_flower_ipv6);
}
if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) {
- __be32 tun_dst;
-
- nfp_flower_compile_ipv4_gre_tun((void *)ext, (void *)msk, flow);
- tun_dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst;
- ext += sizeof(struct nfp_flower_ipv4_gre_tun);
- msk += sizeof(struct nfp_flower_ipv4_gre_tun);
-
- /* Store the tunnel destination in the rule data.
- * This must be present and be an exact match.
- */
- nfp_flow->nfp_tun_ipv4_addr = tun_dst;
- nfp_tunnel_add_ipv4_off(app, tun_dst);
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+ struct nfp_flower_ipv6_gre_tun *gre_match;
+ struct nfp_ipv6_addr_entry *entry;
+ struct in6_addr *dst;
+
+ nfp_flower_compile_ipv6_gre_tun((void *)ext,
+ (void *)msk, rule);
+ gre_match = (struct nfp_flower_ipv6_gre_tun *)ext;
+ dst = &gre_match->ipv6.dst;
+ ext += sizeof(struct nfp_flower_ipv6_gre_tun);
+ msk += sizeof(struct nfp_flower_ipv6_gre_tun);
+
+ entry = nfp_tunnel_add_ipv6_off(app, dst);
+ if (!entry)
+ return -EOPNOTSUPP;
+
+ nfp_flow->nfp_tun_ipv6 = entry;
+ } else {
+ __be32 dst;
+
+ nfp_flower_compile_ipv4_gre_tun((void *)ext,
+ (void *)msk, rule);
+ dst = ((struct nfp_flower_ipv4_gre_tun *)ext)->ipv4.dst;
+ ext += sizeof(struct nfp_flower_ipv4_gre_tun);
+ msk += sizeof(struct nfp_flower_ipv4_gre_tun);
+
+ /* Store the tunnel destination in the rule data.
+ * This must be present and be an exact match.
+ */
+ nfp_flow->nfp_tun_ipv4_addr = dst;
+ nfp_tunnel_add_ipv4_off(app, dst);
+ }
}
if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
- __be32 tun_dst;
-
- nfp_flower_compile_ipv4_udp_tun((void *)ext, (void *)msk, flow);
- tun_dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst;
- ext += sizeof(struct nfp_flower_ipv4_udp_tun);
- msk += sizeof(struct nfp_flower_ipv4_udp_tun);
-
- /* Store the tunnel destination in the rule data.
- * This must be present and be an exact match.
- */
- nfp_flow->nfp_tun_ipv4_addr = tun_dst;
- nfp_tunnel_add_ipv4_off(app, tun_dst);
+ if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
+ struct nfp_flower_ipv6_udp_tun *udp_match;
+ struct nfp_ipv6_addr_entry *entry;
+ struct in6_addr *dst;
+
+ nfp_flower_compile_ipv6_udp_tun((void *)ext,
+ (void *)msk, rule);
+ udp_match = (struct nfp_flower_ipv6_udp_tun *)ext;
+ dst = &udp_match->ipv6.dst;
+ ext += sizeof(struct nfp_flower_ipv6_udp_tun);
+ msk += sizeof(struct nfp_flower_ipv6_udp_tun);
+
+ entry = nfp_tunnel_add_ipv6_off(app, dst);
+ if (!entry)
+ return -EOPNOTSUPP;
+
+ nfp_flow->nfp_tun_ipv6 = entry;
+ } else {
+ __be32 dst;
+
+ nfp_flower_compile_ipv4_udp_tun((void *)ext,
+ (void *)msk, rule);
+ dst = ((struct nfp_flower_ipv4_udp_tun *)ext)->ipv4.dst;
+ ext += sizeof(struct nfp_flower_ipv4_udp_tun);
+ msk += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ /* Store the tunnel destination in the rule data.
+ * This must be present and be an exact match.
+ */
+ nfp_flow->nfp_tun_ipv4_addr = dst;
+ nfp_tunnel_add_ipv4_off(app, dst);
+ }
if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE_OP) {
- err = nfp_flower_compile_geneve_opt(ext, msk, flow);
+ err = nfp_flower_compile_geneve_opt(ext, msk, rule);
if (err)
return err;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
index 7c4a15e967df..5defd31d481c 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c
@@ -65,17 +65,17 @@ static int nfp_get_stats_entry(struct nfp_app *app, u32 *stats_context_id)
freed_stats_id = priv->stats_ring_size;
/* Check for unallocated entries first. */
if (priv->stats_ids.init_unalloc > 0) {
- if (priv->active_mem_unit == priv->total_mem_units) {
- priv->stats_ids.init_unalloc--;
- priv->active_mem_unit = 0;
- }
-
*stats_context_id =
FIELD_PREP(NFP_FL_STAT_ID_STAT,
priv->stats_ids.init_unalloc - 1) |
FIELD_PREP(NFP_FL_STAT_ID_MU_NUM,
priv->active_mem_unit);
- priv->active_mem_unit++;
+
+ if (++priv->active_mem_unit == priv->total_mem_units) {
+ priv->stats_ids.init_unalloc--;
+ priv->active_mem_unit = 0;
+ }
+
return 0;
}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 987ae221f6be..7ca5c1becfcf 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -54,6 +54,10 @@
(BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS))
+#define NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R \
+ (BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | \
+ BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS))
+
#define NFP_FLOWER_MERGE_FIELDS \
(NFP_FLOWER_LAYER_PORT | \
NFP_FLOWER_LAYER_MAC | \
@@ -64,7 +68,8 @@
#define NFP_FLOWER_PRE_TUN_RULE_FIELDS \
(NFP_FLOWER_LAYER_PORT | \
NFP_FLOWER_LAYER_MAC | \
- NFP_FLOWER_LAYER_IPV4)
+ NFP_FLOWER_LAYER_IPV4 | \
+ NFP_FLOWER_LAYER_IPV6)
struct nfp_flower_merge_check {
union {
@@ -146,10 +151,11 @@ static bool nfp_flower_check_higher_than_l3(struct flow_cls_offload *f)
static int
nfp_flower_calc_opt_layer(struct flow_dissector_key_enc_opts *enc_opts,
- u32 *key_layer_two, int *key_size,
+ u32 *key_layer_two, int *key_size, bool ipv6,
struct netlink_ext_ack *extack)
{
- if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY) {
+ if (enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY ||
+ (ipv6 && enc_opts->len > NFP_FL_MAX_GENEVE_OPT_KEY_V6)) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: geneve options exceed maximum length");
return -EOPNOTSUPP;
}
@@ -167,7 +173,7 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
struct flow_dissector_key_enc_opts *enc_op,
u32 *key_layer_two, u8 *key_layer, int *key_size,
struct nfp_flower_priv *priv,
- enum nfp_flower_tun_type *tun_type,
+ enum nfp_flower_tun_type *tun_type, bool ipv6,
struct netlink_ext_ack *extack)
{
int err;
@@ -176,7 +182,15 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
case htons(IANA_VXLAN_UDP_PORT):
*tun_type = NFP_FL_TUNNEL_VXLAN;
*key_layer |= NFP_FLOWER_LAYER_VXLAN;
- *key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ if (ipv6) {
+ *key_layer |= NFP_FLOWER_LAYER_EXT_META;
+ *key_size += sizeof(struct nfp_flower_ext_meta);
+ *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6;
+ *key_size += sizeof(struct nfp_flower_ipv6_udp_tun);
+ } else {
+ *key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+ }
if (enc_op) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on vxlan tunnels");
@@ -192,7 +206,13 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
*key_layer |= NFP_FLOWER_LAYER_EXT_META;
*key_size += sizeof(struct nfp_flower_ext_meta);
*key_layer_two |= NFP_FLOWER_LAYER2_GENEVE;
- *key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+
+ if (ipv6) {
+ *key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6;
+ *key_size += sizeof(struct nfp_flower_ipv6_udp_tun);
+ } else {
+ *key_size += sizeof(struct nfp_flower_ipv4_udp_tun);
+ }
if (!enc_op)
break;
@@ -200,8 +220,8 @@ nfp_flower_calc_udp_tun_layer(struct flow_dissector_key_ports *enc_ports,
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: loaded firmware does not support geneve option offload");
return -EOPNOTSUPP;
}
- err = nfp_flower_calc_opt_layer(enc_op, key_layer_two,
- key_size, extack);
+ err = nfp_flower_calc_opt_layer(enc_op, key_layer_two, key_size,
+ ipv6, extack);
if (err)
return err;
break;
@@ -237,6 +257,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
/* If any tun dissector is used then the required set must be used. */
if (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR &&
+ (dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R)
+ != NFP_FLOWER_WHITELIST_TUN_DISSECTOR_V6_R &&
(dissector->used_keys & NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R)
!= NFP_FLOWER_WHITELIST_TUN_DISSECTOR_R) {
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel match not supported");
@@ -268,8 +290,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
struct flow_match_enc_opts enc_op = { NULL, NULL };
struct flow_match_ipv4_addrs ipv4_addrs;
+ struct flow_match_ipv6_addrs ipv6_addrs;
struct flow_match_control enc_ctl;
struct flow_match_ports enc_ports;
+ bool ipv6_tun = false;
flow_rule_match_enc_control(rule, &enc_ctl);
@@ -277,38 +301,62 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
NL_SET_ERR_MSG_MOD(extack, "unsupported offload: wildcarded protocols on tunnels are not supported");
return -EOPNOTSUPP;
}
- if (enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only IPv4 tunnels are supported");
+
+ ipv6_tun = enc_ctl.key->addr_type ==
+ FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (ipv6_tun &&
+ !(priv->flower_ext_feats & NFP_FL_FEATS_IPV6_TUN)) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: firmware does not support IPv6 tunnels");
return -EOPNOTSUPP;
}
- /* These fields are already verified as used. */
- flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs);
- if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported");
+ if (!ipv6_tun &&
+ enc_ctl.key->addr_type != FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: tunnel address type not IPv4 or IPv6");
return -EOPNOTSUPP;
}
+ if (ipv6_tun) {
+ flow_rule_match_enc_ipv6_addrs(rule, &ipv6_addrs);
+ if (memchr_inv(&ipv6_addrs.mask->dst, 0xff,
+ sizeof(ipv6_addrs.mask->dst))) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv6 destination address is supported");
+ return -EOPNOTSUPP;
+ }
+ } else {
+ flow_rule_match_enc_ipv4_addrs(rule, &ipv4_addrs);
+ if (ipv4_addrs.mask->dst != cpu_to_be32(~0)) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: only an exact match IPv4 destination address is supported");
+ return -EOPNOTSUPP;
+ }
+ }
+
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_OPTS))
flow_rule_match_enc_opts(rule, &enc_op);
-
if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
/* check if GRE, which has no enc_ports */
- if (netif_is_gretap(netdev)) {
- *tun_type = NFP_FL_TUNNEL_GRE;
- key_layer |= NFP_FLOWER_LAYER_EXT_META;
- key_size += sizeof(struct nfp_flower_ext_meta);
- key_layer_two |= NFP_FLOWER_LAYER2_GRE;
- key_size +=
- sizeof(struct nfp_flower_ipv4_gre_tun);
+ if (!netif_is_gretap(netdev)) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels");
+ return -EOPNOTSUPP;
+ }
- if (enc_op.key) {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels");
- return -EOPNOTSUPP;
- }
+ *tun_type = NFP_FL_TUNNEL_GRE;
+ key_layer |= NFP_FLOWER_LAYER_EXT_META;
+ key_size += sizeof(struct nfp_flower_ext_meta);
+ key_layer_two |= NFP_FLOWER_LAYER2_GRE;
+
+ if (ipv6_tun) {
+ key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6;
+ key_size +=
+ sizeof(struct nfp_flower_ipv6_udp_tun);
} else {
- NL_SET_ERR_MSG_MOD(extack, "unsupported offload: an exact match on L4 destination port is required for non-GRE tunnels");
+ key_size +=
+ sizeof(struct nfp_flower_ipv4_udp_tun);
+ }
+
+ if (enc_op.key) {
+ NL_SET_ERR_MSG_MOD(extack, "unsupported offload: encap options not supported on GRE tunnels");
return -EOPNOTSUPP;
}
} else {
@@ -323,7 +371,8 @@ nfp_flower_calculate_key_layers(struct nfp_app *app,
&key_layer_two,
&key_layer,
&key_size, priv,
- tun_type, extack);
+ tun_type, ipv6_tun,
+ extack);
if (err)
return err;
@@ -491,6 +540,7 @@ nfp_flower_allocate_new(struct nfp_fl_key_ls *key_layer)
goto err_free_mask;
flow_pay->nfp_tun_ipv4_addr = 0;
+ flow_pay->nfp_tun_ipv6 = NULL;
flow_pay->meta.flags = 0;
INIT_LIST_HEAD(&flow_pay->linked_flows);
flow_pay->in_hw = false;
@@ -517,10 +567,12 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow,
struct nfp_fl_set_ip4_addrs *ipv4_add;
struct nfp_fl_set_ipv6_addr *ipv6_add;
struct nfp_fl_push_vlan *push_vlan;
+ struct nfp_fl_pre_tunnel *pre_tun;
struct nfp_fl_set_tport *tport;
struct nfp_fl_set_eth *eth;
struct nfp_fl_act_head *a;
unsigned int act_off = 0;
+ bool ipv6_tun = false;
u8 act_id = 0;
u8 *ports;
int i;
@@ -542,14 +594,18 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow,
case NFP_FL_ACTION_OPCODE_POP_VLAN:
merge->tci = cpu_to_be16(0);
break;
- case NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL:
+ case NFP_FL_ACTION_OPCODE_SET_TUNNEL:
/* New tunnel header means l2 to l4 can be matched. */
eth_broadcast_addr(&merge->l2.mac_dst[0]);
eth_broadcast_addr(&merge->l2.mac_src[0]);
memset(&merge->l4, 0xff,
sizeof(struct nfp_flower_tp_ports));
- memset(&merge->ipv4, 0xff,
- sizeof(struct nfp_flower_ipv4));
+ if (ipv6_tun)
+ memset(&merge->ipv6, 0xff,
+ sizeof(struct nfp_flower_ipv6));
+ else
+ memset(&merge->ipv4, 0xff,
+ sizeof(struct nfp_flower_ipv4));
break;
case NFP_FL_ACTION_OPCODE_SET_ETHERNET:
eth = (struct nfp_fl_set_eth *)a;
@@ -597,6 +653,10 @@ nfp_flower_update_merge_with_actions(struct nfp_fl_payload *flow,
ports[i] |= tport->tp_port_mask[i];
break;
case NFP_FL_ACTION_OPCODE_PRE_TUNNEL:
+ pre_tun = (struct nfp_fl_pre_tunnel *)a;
+ ipv6_tun = be16_to_cpu(pre_tun->flags) &
+ NFP_FL_PRE_TUN_IPV6;
+ break;
case NFP_FL_ACTION_OPCODE_PRE_LAG:
case NFP_FL_ACTION_OPCODE_PUSH_GENEVE:
break;
@@ -765,15 +825,15 @@ nfp_fl_verify_post_tun_acts(char *acts, int len, struct nfp_fl_push_vlan **vlan)
static int
nfp_fl_push_vlan_after_tun(char *acts, int len, struct nfp_fl_push_vlan *vlan)
{
- struct nfp_fl_set_ipv4_tun *tun;
+ struct nfp_fl_set_tun *tun;
struct nfp_fl_act_head *a;
unsigned int act_off = 0;
while (act_off < len) {
a = (struct nfp_fl_act_head *)&acts[act_off];
- if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL) {
- tun = (struct nfp_fl_set_ipv4_tun *)a;
+ if (a->jump_id == NFP_FL_ACTION_OPCODE_SET_TUNNEL) {
+ tun = (struct nfp_fl_set_tun *)a;
tun->outer_vlan_tpid = vlan->vlan_tpid;
tun->outer_vlan_tci = vlan->vlan_tci;
@@ -1058,15 +1118,22 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
return -EOPNOTSUPP;
}
- if (key_layer & NFP_FLOWER_LAYER_IPV4) {
+ if (key_layer & NFP_FLOWER_LAYER_IPV4 ||
+ key_layer & NFP_FLOWER_LAYER_IPV6) {
+ /* Flags and proto fields have same offset in IPv4 and IPv6. */
int ip_flags = offsetof(struct nfp_flower_ipv4, ip_ext.flags);
int ip_proto = offsetof(struct nfp_flower_ipv4, ip_ext.proto);
+ int size;
int i;
+ size = key_layer & NFP_FLOWER_LAYER_IPV4 ?
+ sizeof(struct nfp_flower_ipv4) :
+ sizeof(struct nfp_flower_ipv6);
+
mask += sizeof(struct nfp_flower_mac_mpls);
/* Ensure proto and flags are the only IP layer fields. */
- for (i = 0; i < sizeof(struct nfp_flower_ipv4); i++)
+ for (i = 0; i < size; i++)
if (mask[i] && i != ip_flags && i != ip_proto) {
NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: only flags and proto can be matched in ip header");
return -EOPNOTSUPP;
@@ -1195,6 +1262,8 @@ err_remove_rhash:
err_release_metadata:
nfp_modify_flow_metadata(app, flow_pay);
err_destroy_flow:
+ if (flow_pay->nfp_tun_ipv6)
+ nfp_tunnel_put_ipv6_off(app, flow_pay->nfp_tun_ipv6);
kfree(flow_pay->action_data);
kfree(flow_pay->mask_data);
kfree(flow_pay->unmasked_data);
@@ -1311,6 +1380,9 @@ nfp_flower_del_offload(struct nfp_app *app, struct net_device *netdev,
if (nfp_flow->nfp_tun_ipv4_addr)
nfp_tunnel_del_ipv4_off(app, nfp_flow->nfp_tun_ipv4_addr);
+ if (nfp_flow->nfp_tun_ipv6)
+ nfp_tunnel_put_ipv6_off(app, nfp_flow->nfp_tun_ipv6);
+
if (!nfp_flow->in_hw) {
err = 0;
goto err_free_merge_flow;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
index 2600ce476d6b..2df3deedf9fd 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
@@ -55,6 +55,25 @@ struct nfp_tun_active_tuns {
};
/**
+ * struct nfp_tun_active_tuns_v6 - periodic message of active IPv6 tunnels
+ * @seq: sequence number of the message
+ * @count: number of tunnels report in message
+ * @flags: options part of the request
+ * @tun_info.ipv6: dest IPv6 address of active route
+ * @tun_info.egress_port: port the encapsulated packet egressed
+ * @tun_info: tunnels that have sent traffic in reported period
+ */
+struct nfp_tun_active_tuns_v6 {
+ __be32 seq;
+ __be32 count;
+ __be32 flags;
+ struct route_ip_info_v6 {
+ struct in6_addr ipv6;
+ __be32 egress_port;
+ } tun_info[];
+};
+
+/**
* struct nfp_tun_neigh - neighbour/route entry on the NFP
* @dst_ipv4: destination IPv4 address
* @src_ipv4: source IPv4 address
@@ -71,6 +90,22 @@ struct nfp_tun_neigh {
};
/**
+ * struct nfp_tun_neigh_v6 - neighbour/route entry on the NFP
+ * @dst_ipv6: destination IPv6 address
+ * @src_ipv6: source IPv6 address
+ * @dst_addr: destination MAC address
+ * @src_addr: source MAC address
+ * @port_id: NFP port to output packet on - associated with source IPv6
+ */
+struct nfp_tun_neigh_v6 {
+ struct in6_addr dst_ipv6;
+ struct in6_addr src_ipv6;
+ u8 dst_addr[ETH_ALEN];
+ u8 src_addr[ETH_ALEN];
+ __be32 port_id;
+};
+
+/**
* struct nfp_tun_req_route_ipv4 - NFP requests a route/neighbour lookup
* @ingress_port: ingress port of packet that signalled request
* @ipv4_addr: destination ipv4 address for route
@@ -83,13 +118,23 @@ struct nfp_tun_req_route_ipv4 {
};
/**
- * struct nfp_ipv4_route_entry - routes that are offloaded to the NFP
- * @ipv4_addr: destination of route
+ * struct nfp_tun_req_route_ipv6 - NFP requests an IPv6 route/neighbour lookup
+ * @ingress_port: ingress port of packet that signalled request
+ * @ipv6_addr: destination ipv6 address for route
+ */
+struct nfp_tun_req_route_ipv6 {
+ __be32 ingress_port;
+ struct in6_addr ipv6_addr;
+};
+
+/**
+ * struct nfp_offloaded_route - routes that are offloaded to the NFP
* @list: list pointer
+ * @ip_add: destination of route - can be IPv4 or IPv6
*/
-struct nfp_ipv4_route_entry {
- __be32 ipv4_addr;
+struct nfp_offloaded_route {
struct list_head list;
+ u8 ip_add[];
};
#define NFP_FL_IPV4_ADDRS_MAX 32
@@ -116,6 +161,18 @@ struct nfp_ipv4_addr_entry {
struct list_head list;
};
+#define NFP_FL_IPV6_ADDRS_MAX 4
+
+/**
+ * struct nfp_tun_ipv6_addr - set the IP address list on the NFP
+ * @count: number of IPs populated in the array
+ * @ipv6_addr: array of IPV6_ADDRS_MAX 128 bit IPv6 addresses
+ */
+struct nfp_tun_ipv6_addr {
+ __be32 count;
+ struct in6_addr ipv6_addr[NFP_FL_IPV6_ADDRS_MAX];
+};
+
#define NFP_TUN_MAC_OFFLOAD_DEL_FLAG 0x2
/**
@@ -206,6 +263,49 @@ void nfp_tunnel_keep_alive(struct nfp_app *app, struct sk_buff *skb)
rcu_read_unlock();
}
+void nfp_tunnel_keep_alive_v6(struct nfp_app *app, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct nfp_tun_active_tuns_v6 *payload;
+ struct net_device *netdev;
+ int count, i, pay_len;
+ struct neighbour *n;
+ void *ipv6_add;
+ u32 port;
+
+ payload = nfp_flower_cmsg_get_data(skb);
+ count = be32_to_cpu(payload->count);
+ if (count > NFP_FL_IPV6_ADDRS_MAX) {
+ nfp_flower_cmsg_warn(app, "IPv6 tunnel keep-alive request exceeds max routes.\n");
+ return;
+ }
+
+ pay_len = nfp_flower_cmsg_get_data_len(skb);
+ if (pay_len != struct_size(payload, tun_info, count)) {
+ nfp_flower_cmsg_warn(app, "Corruption in tunnel keep-alive message.\n");
+ return;
+ }
+
+ rcu_read_lock();
+ for (i = 0; i < count; i++) {
+ ipv6_add = &payload->tun_info[i].ipv6;
+ port = be32_to_cpu(payload->tun_info[i].egress_port);
+ netdev = nfp_app_dev_get(app, port, NULL);
+ if (!netdev)
+ continue;
+
+ n = neigh_lookup(&nd_tbl, ipv6_add, netdev);
+ if (!n)
+ continue;
+
+ /* Update the used timestamp of neighbour */
+ neigh_event_send(n, NULL);
+ neigh_release(n);
+ }
+ rcu_read_unlock();
+#endif
+}
+
static int
nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata,
gfp_t flag)
@@ -224,71 +324,126 @@ nfp_flower_xmit_tun_conf(struct nfp_app *app, u8 mtype, u16 plen, void *pdata,
return 0;
}
-static bool nfp_tun_has_route(struct nfp_app *app, __be32 ipv4_addr)
+static bool
+__nfp_tun_has_route(struct list_head *route_list, spinlock_t *list_lock,
+ void *add, int add_len)
{
- struct nfp_flower_priv *priv = app->priv;
- struct nfp_ipv4_route_entry *entry;
- struct list_head *ptr, *storage;
+ struct nfp_offloaded_route *entry;
- spin_lock_bh(&priv->tun.neigh_off_lock);
- list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
- entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
- if (entry->ipv4_addr == ipv4_addr) {
- spin_unlock_bh(&priv->tun.neigh_off_lock);
+ spin_lock_bh(list_lock);
+ list_for_each_entry(entry, route_list, list)
+ if (!memcmp(entry->ip_add, add, add_len)) {
+ spin_unlock_bh(list_lock);
return true;
}
- }
- spin_unlock_bh(&priv->tun.neigh_off_lock);
+ spin_unlock_bh(list_lock);
return false;
}
-static void nfp_tun_add_route_to_cache(struct nfp_app *app, __be32 ipv4_addr)
+static int
+__nfp_tun_add_route_to_cache(struct list_head *route_list,
+ spinlock_t *list_lock, void *add, int add_len)
{
- struct nfp_flower_priv *priv = app->priv;
- struct nfp_ipv4_route_entry *entry;
- struct list_head *ptr, *storage;
+ struct nfp_offloaded_route *entry;
- spin_lock_bh(&priv->tun.neigh_off_lock);
- list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
- entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
- if (entry->ipv4_addr == ipv4_addr) {
- spin_unlock_bh(&priv->tun.neigh_off_lock);
- return;
+ spin_lock_bh(list_lock);
+ list_for_each_entry(entry, route_list, list)
+ if (!memcmp(entry->ip_add, add, add_len)) {
+ spin_unlock_bh(list_lock);
+ return 0;
}
- }
- entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+
+ entry = kmalloc(sizeof(*entry) + add_len, GFP_ATOMIC);
if (!entry) {
- spin_unlock_bh(&priv->tun.neigh_off_lock);
- nfp_flower_cmsg_warn(app, "Mem error when storing new route.\n");
- return;
+ spin_unlock_bh(list_lock);
+ return -ENOMEM;
}
- entry->ipv4_addr = ipv4_addr;
- list_add_tail(&entry->list, &priv->tun.neigh_off_list);
- spin_unlock_bh(&priv->tun.neigh_off_lock);
+ memcpy(entry->ip_add, add, add_len);
+ list_add_tail(&entry->list, route_list);
+ spin_unlock_bh(list_lock);
+
+ return 0;
}
-static void nfp_tun_del_route_from_cache(struct nfp_app *app, __be32 ipv4_addr)
+static void
+__nfp_tun_del_route_from_cache(struct list_head *route_list,
+ spinlock_t *list_lock, void *add, int add_len)
{
- struct nfp_flower_priv *priv = app->priv;
- struct nfp_ipv4_route_entry *entry;
- struct list_head *ptr, *storage;
+ struct nfp_offloaded_route *entry;
- spin_lock_bh(&priv->tun.neigh_off_lock);
- list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
- entry = list_entry(ptr, struct nfp_ipv4_route_entry, list);
- if (entry->ipv4_addr == ipv4_addr) {
+ spin_lock_bh(list_lock);
+ list_for_each_entry(entry, route_list, list)
+ if (!memcmp(entry->ip_add, add, add_len)) {
list_del(&entry->list);
kfree(entry);
break;
}
- }
- spin_unlock_bh(&priv->tun.neigh_off_lock);
+ spin_unlock_bh(list_lock);
+}
+
+static bool nfp_tun_has_route_v4(struct nfp_app *app, __be32 *ipv4_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+
+ return __nfp_tun_has_route(&priv->tun.neigh_off_list_v4,
+ &priv->tun.neigh_off_lock_v4, ipv4_addr,
+ sizeof(*ipv4_addr));
+}
+
+static bool
+nfp_tun_has_route_v6(struct nfp_app *app, struct in6_addr *ipv6_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+
+ return __nfp_tun_has_route(&priv->tun.neigh_off_list_v6,
+ &priv->tun.neigh_off_lock_v6, ipv6_addr,
+ sizeof(*ipv6_addr));
+}
+
+static void
+nfp_tun_add_route_to_cache_v4(struct nfp_app *app, __be32 *ipv4_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+
+ __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v4,
+ &priv->tun.neigh_off_lock_v4, ipv4_addr,
+ sizeof(*ipv4_addr));
+}
+
+static void
+nfp_tun_add_route_to_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+
+ __nfp_tun_add_route_to_cache(&priv->tun.neigh_off_list_v6,
+ &priv->tun.neigh_off_lock_v6, ipv6_addr,
+ sizeof(*ipv6_addr));
}
static void
-nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
- struct flowi4 *flow, struct neighbour *neigh, gfp_t flag)
+nfp_tun_del_route_from_cache_v4(struct nfp_app *app, __be32 *ipv4_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+
+ __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v4,
+ &priv->tun.neigh_off_lock_v4, ipv4_addr,
+ sizeof(*ipv4_addr));
+}
+
+static void
+nfp_tun_del_route_from_cache_v6(struct nfp_app *app, struct in6_addr *ipv6_addr)
+{
+ struct nfp_flower_priv *priv = app->priv;
+
+ __nfp_tun_del_route_from_cache(&priv->tun.neigh_off_list_v6,
+ &priv->tun.neigh_off_lock_v6, ipv6_addr,
+ sizeof(*ipv6_addr));
+}
+
+static void
+nfp_tun_write_neigh_v4(struct net_device *netdev, struct nfp_app *app,
+ struct flowi4 *flow, struct neighbour *neigh, gfp_t flag)
{
struct nfp_tun_neigh payload;
u32 port_id;
@@ -302,7 +457,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
/* If entry has expired send dst IP with all other fields 0. */
if (!(neigh->nud_state & NUD_VALID) || neigh->dead) {
- nfp_tun_del_route_from_cache(app, payload.dst_ipv4);
+ nfp_tun_del_route_from_cache_v4(app, &payload.dst_ipv4);
/* Trigger ARP to verify invalid neighbour state. */
neigh_event_send(neigh, NULL);
goto send_msg;
@@ -314,7 +469,7 @@ nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
payload.port_id = cpu_to_be32(port_id);
/* Add destination of new route to NFP cache. */
- nfp_tun_add_route_to_cache(app, payload.dst_ipv4);
+ nfp_tun_add_route_to_cache_v4(app, &payload.dst_ipv4);
send_msg:
nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH,
@@ -322,16 +477,54 @@ send_msg:
(unsigned char *)&payload, flag);
}
+static void
+nfp_tun_write_neigh_v6(struct net_device *netdev, struct nfp_app *app,
+ struct flowi6 *flow, struct neighbour *neigh, gfp_t flag)
+{
+ struct nfp_tun_neigh_v6 payload;
+ u32 port_id;
+
+ port_id = nfp_flower_get_port_id_from_netdev(app, netdev);
+ if (!port_id)
+ return;
+
+ memset(&payload, 0, sizeof(struct nfp_tun_neigh_v6));
+ payload.dst_ipv6 = flow->daddr;
+
+ /* If entry has expired send dst IP with all other fields 0. */
+ if (!(neigh->nud_state & NUD_VALID) || neigh->dead) {
+ nfp_tun_del_route_from_cache_v6(app, &payload.dst_ipv6);
+ /* Trigger probe to verify invalid neighbour state. */
+ neigh_event_send(neigh, NULL);
+ goto send_msg;
+ }
+
+ /* Have a valid neighbour so populate rest of entry. */
+ payload.src_ipv6 = flow->saddr;
+ ether_addr_copy(payload.src_addr, netdev->dev_addr);
+ neigh_ha_snapshot(payload.dst_addr, neigh, netdev);
+ payload.port_id = cpu_to_be32(port_id);
+ /* Add destination of new route to NFP cache. */
+ nfp_tun_add_route_to_cache_v6(app, &payload.dst_ipv6);
+
+send_msg:
+ nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6,
+ sizeof(struct nfp_tun_neigh_v6),
+ (unsigned char *)&payload, flag);
+}
+
static int
nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct nfp_flower_priv *app_priv;
struct netevent_redirect *redir;
- struct flowi4 flow = {};
+ struct flowi4 flow4 = {};
+ struct flowi6 flow6 = {};
struct neighbour *n;
struct nfp_app *app;
struct rtable *rt;
+ bool ipv6 = false;
int err;
switch (event) {
@@ -346,7 +539,13 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
return NOTIFY_DONE;
}
- flow.daddr = *(__be32 *)n->primary_key;
+ if (n->tbl->family == AF_INET6)
+ ipv6 = true;
+
+ if (ipv6)
+ flow6.daddr = *(struct in6_addr *)n->primary_key;
+ else
+ flow4.daddr = *(__be32 *)n->primary_key;
app_priv = container_of(nb, struct nfp_flower_priv, tun.neigh_nb);
app = app_priv->app;
@@ -356,28 +555,46 @@ nfp_tun_neigh_event_handler(struct notifier_block *nb, unsigned long event,
return NOTIFY_DONE;
/* Only concerned with changes to routes already added to NFP. */
- if (!nfp_tun_has_route(app, flow.daddr))
+ if ((ipv6 && !nfp_tun_has_route_v6(app, &flow6.daddr)) ||
+ (!ipv6 && !nfp_tun_has_route_v4(app, &flow4.daddr)))
return NOTIFY_DONE;
#if IS_ENABLED(CONFIG_INET)
- /* Do a route lookup to populate flow data. */
- rt = ip_route_output_key(dev_net(n->dev), &flow);
- err = PTR_ERR_OR_ZERO(rt);
- if (err)
+ if (ipv6) {
+#if IS_ENABLED(CONFIG_IPV6)
+ struct dst_entry *dst;
+
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(n->dev), NULL,
+ &flow6, NULL);
+ if (IS_ERR(dst))
+ return NOTIFY_DONE;
+
+ dst_release(dst);
+ flow6.flowi6_proto = IPPROTO_UDP;
+ nfp_tun_write_neigh_v6(n->dev, app, &flow6, n, GFP_ATOMIC);
+#else
return NOTIFY_DONE;
+#endif /* CONFIG_IPV6 */
+ } else {
+ /* Do a route lookup to populate flow data. */
+ rt = ip_route_output_key(dev_net(n->dev), &flow4);
+ err = PTR_ERR_OR_ZERO(rt);
+ if (err)
+ return NOTIFY_DONE;
- ip_rt_put(rt);
+ ip_rt_put(rt);
+
+ flow4.flowi4_proto = IPPROTO_UDP;
+ nfp_tun_write_neigh_v4(n->dev, app, &flow4, n, GFP_ATOMIC);
+ }
#else
return NOTIFY_DONE;
-#endif
-
- flow.flowi4_proto = IPPROTO_UDP;
- nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC);
+#endif /* CONFIG_INET */
return NOTIFY_OK;
}
-void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb)
+void nfp_tunnel_request_route_v4(struct nfp_app *app, struct sk_buff *skb)
{
struct nfp_tun_req_route_ipv4 *payload;
struct net_device *netdev;
@@ -411,7 +628,7 @@ void nfp_tunnel_request_route(struct nfp_app *app, struct sk_buff *skb)
ip_rt_put(rt);
if (!n)
goto fail_rcu_unlock;
- nfp_tun_write_neigh(n->dev, app, &flow, n, GFP_ATOMIC);
+ nfp_tun_write_neigh_v4(n->dev, app, &flow, n, GFP_ATOMIC);
neigh_release(n);
rcu_read_unlock();
return;
@@ -421,6 +638,48 @@ fail_rcu_unlock:
nfp_flower_cmsg_warn(app, "Requested route not found.\n");
}
+void nfp_tunnel_request_route_v6(struct nfp_app *app, struct sk_buff *skb)
+{
+ struct nfp_tun_req_route_ipv6 *payload;
+ struct net_device *netdev;
+ struct flowi6 flow = {};
+ struct dst_entry *dst;
+ struct neighbour *n;
+
+ payload = nfp_flower_cmsg_get_data(skb);
+
+ rcu_read_lock();
+ netdev = nfp_app_dev_get(app, be32_to_cpu(payload->ingress_port), NULL);
+ if (!netdev)
+ goto fail_rcu_unlock;
+
+ flow.daddr = payload->ipv6_addr;
+ flow.flowi6_proto = IPPROTO_UDP;
+
+#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+ dst = ipv6_stub->ipv6_dst_lookup_flow(dev_net(netdev), NULL, &flow,
+ NULL);
+ if (IS_ERR(dst))
+ goto fail_rcu_unlock;
+#else
+ goto fail_rcu_unlock;
+#endif
+
+ n = dst_neigh_lookup(dst, &flow.daddr);
+ dst_release(dst);
+ if (!n)
+ goto fail_rcu_unlock;
+
+ nfp_tun_write_neigh_v6(n->dev, app, &flow, n, GFP_ATOMIC);
+ neigh_release(n);
+ rcu_read_unlock();
+ return;
+
+fail_rcu_unlock:
+ rcu_read_unlock();
+ nfp_flower_cmsg_warn(app, "Requested IPv6 route not found.\n");
+}
+
static void nfp_tun_write_ipv4_list(struct nfp_app *app)
{
struct nfp_flower_priv *priv = app->priv;
@@ -502,6 +761,78 @@ void nfp_tunnel_del_ipv4_off(struct nfp_app *app, __be32 ipv4)
nfp_tun_write_ipv4_list(app);
}
+static void nfp_tun_write_ipv6_list(struct nfp_app *app)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv6_addr_entry *entry;
+ struct nfp_tun_ipv6_addr payload;
+ int count = 0;
+
+ memset(&payload, 0, sizeof(struct nfp_tun_ipv6_addr));
+ mutex_lock(&priv->tun.ipv6_off_lock);
+ list_for_each_entry(entry, &priv->tun.ipv6_off_list, list) {
+ if (count >= NFP_FL_IPV6_ADDRS_MAX) {
+ nfp_flower_cmsg_warn(app, "Too many IPv6 tunnel endpoint addresses, some cannot be offloaded.\n");
+ break;
+ }
+ payload.ipv6_addr[count++] = entry->ipv6_addr;
+ }
+ mutex_unlock(&priv->tun.ipv6_off_lock);
+ payload.count = cpu_to_be32(count);
+
+ nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_IPS_V6,
+ sizeof(struct nfp_tun_ipv6_addr),
+ &payload, GFP_KERNEL);
+}
+
+struct nfp_ipv6_addr_entry *
+nfp_tunnel_add_ipv6_off(struct nfp_app *app, struct in6_addr *ipv6)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ struct nfp_ipv6_addr_entry *entry;
+
+ mutex_lock(&priv->tun.ipv6_off_lock);
+ list_for_each_entry(entry, &priv->tun.ipv6_off_list, list)
+ if (!memcmp(&entry->ipv6_addr, ipv6, sizeof(*ipv6))) {
+ entry->ref_count++;
+ mutex_unlock(&priv->tun.ipv6_off_lock);
+ return entry;
+ }
+
+ entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry) {
+ mutex_unlock(&priv->tun.ipv6_off_lock);
+ nfp_flower_cmsg_warn(app, "Mem error when offloading IP address.\n");
+ return NULL;
+ }
+ entry->ipv6_addr = *ipv6;
+ entry->ref_count = 1;
+ list_add_tail(&entry->list, &priv->tun.ipv6_off_list);
+ mutex_unlock(&priv->tun.ipv6_off_lock);
+
+ nfp_tun_write_ipv6_list(app);
+
+ return entry;
+}
+
+void
+nfp_tunnel_put_ipv6_off(struct nfp_app *app, struct nfp_ipv6_addr_entry *entry)
+{
+ struct nfp_flower_priv *priv = app->priv;
+ bool freed = false;
+
+ mutex_lock(&priv->tun.ipv6_off_lock);
+ if (!--entry->ref_count) {
+ list_del(&entry->list);
+ kfree(entry);
+ freed = true;
+ }
+ mutex_unlock(&priv->tun.ipv6_off_lock);
+
+ if (freed)
+ nfp_tun_write_ipv6_list(app);
+}
+
static int
__nfp_tunnel_offload_mac(struct nfp_app *app, u8 *mac, u16 idx, bool del)
{
@@ -1013,13 +1344,17 @@ int nfp_tunnel_config_start(struct nfp_app *app)
ida_init(&priv->tun.mac_off_ids);
- /* Initialise priv data for IPv4 offloading. */
+ /* Initialise priv data for IPv4/v6 offloading. */
mutex_init(&priv->tun.ipv4_off_lock);
INIT_LIST_HEAD(&priv->tun.ipv4_off_list);
+ mutex_init(&priv->tun.ipv6_off_lock);
+ INIT_LIST_HEAD(&priv->tun.ipv6_off_list);
/* Initialise priv data for neighbour offloading. */
- spin_lock_init(&priv->tun.neigh_off_lock);
- INIT_LIST_HEAD(&priv->tun.neigh_off_list);
+ spin_lock_init(&priv->tun.neigh_off_lock_v4);
+ INIT_LIST_HEAD(&priv->tun.neigh_off_list_v4);
+ spin_lock_init(&priv->tun.neigh_off_lock_v6);
+ INIT_LIST_HEAD(&priv->tun.neigh_off_list_v6);
priv->tun.neigh_nb.notifier_call = nfp_tun_neigh_event_handler;
err = register_netevent_notifier(&priv->tun.neigh_nb);
@@ -1034,9 +1369,11 @@ int nfp_tunnel_config_start(struct nfp_app *app)
void nfp_tunnel_config_stop(struct nfp_app *app)
{
+ struct nfp_offloaded_route *route_entry, *temp;
struct nfp_flower_priv *priv = app->priv;
- struct nfp_ipv4_route_entry *route_entry;
struct nfp_ipv4_addr_entry *ip_entry;
+ struct nfp_tun_neigh_v6 ipv6_route;
+ struct nfp_tun_neigh ipv4_route;
struct list_head *ptr, *storage;
unregister_netevent_notifier(&priv->tun.neigh_nb);
@@ -1050,12 +1387,35 @@ void nfp_tunnel_config_stop(struct nfp_app *app)
kfree(ip_entry);
}
- /* Free any memory that may be occupied by the route list. */
- list_for_each_safe(ptr, storage, &priv->tun.neigh_off_list) {
- route_entry = list_entry(ptr, struct nfp_ipv4_route_entry,
- list);
+ mutex_destroy(&priv->tun.ipv6_off_lock);
+
+ /* Free memory in the route list and remove entries from fw cache. */
+ list_for_each_entry_safe(route_entry, temp,
+ &priv->tun.neigh_off_list_v4, list) {
+ memset(&ipv4_route, 0, sizeof(ipv4_route));
+ memcpy(&ipv4_route.dst_ipv4, &route_entry->ip_add,
+ sizeof(ipv4_route.dst_ipv4));
list_del(&route_entry->list);
kfree(route_entry);
+
+ nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH,
+ sizeof(struct nfp_tun_neigh),
+ (unsigned char *)&ipv4_route,
+ GFP_KERNEL);
+ }
+
+ list_for_each_entry_safe(route_entry, temp,
+ &priv->tun.neigh_off_list_v6, list) {
+ memset(&ipv6_route, 0, sizeof(ipv6_route));
+ memcpy(&ipv6_route.dst_ipv6, &route_entry->ip_add,
+ sizeof(ipv6_route.dst_ipv6));
+ list_del(&route_entry->list);
+ kfree(route_entry);
+
+ nfp_flower_xmit_tun_conf(app, NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6,
+ sizeof(struct nfp_tun_neigh),
+ (unsigned char *)&ipv6_route,
+ GFP_KERNEL);
}
/* Destroy rhash. Entries should be cleaned on netdev notifier unreg. */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 250f510b1d21..ff4438478ea9 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -586,6 +586,9 @@ struct nfp_net_dp {
* @ktls_conn_id_gen: Trivial generator for kTLS connection ids (for TX)
* @ktls_no_space: Counter of firmware rejecting kTLS connection due to
* lack of space
+ * @ktls_rx_resync_req: Counter of TLS RX resync requested
+ * @ktls_rx_resync_ign: Counter of TLS RX resync requests ignored
+ * @ktls_rx_resync_sent: Counter of TLS RX resync completed
* @mbox_cmsg: Common Control Message via vNIC mailbox state
* @mbox_cmsg.queue: CCM mbox queue of pending messages
* @mbox_cmsg.wq: CCM mbox wait queue of waiting processes
@@ -674,6 +677,9 @@ struct nfp_net {
atomic64_t ktls_conn_id_gen;
atomic_t ktls_no_space;
+ atomic_t ktls_rx_resync_req;
+ atomic_t ktls_rx_resync_ign;
+ atomic_t ktls_rx_resync_sent;
struct {
struct sk_buff_head queue;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index bcdcd6de7dea..9bfb3b077bc1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -47,6 +47,7 @@
#include "nfp_net_sriov.h"
#include "nfp_port.h"
#include "crypto/crypto.h"
+#include "crypto/fw.h"
/**
* nfp_net_get_fw_version() - Read and parse the FW version
@@ -1321,17 +1322,11 @@ nfp_net_tx_ring_reset(struct nfp_net_dp *dp, struct nfp_net_tx_ring *tx_ring)
netdev_tx_reset_queue(nd_q);
}
-static void nfp_net_tx_timeout(struct net_device *netdev)
+static void nfp_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct nfp_net *nn = netdev_priv(netdev);
- int i;
- for (i = 0; i < nn->dp.netdev->real_num_tx_queues; i++) {
- if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
- continue;
- nn_warn(nn, "TX timeout on ring: %d\n", i);
- }
- nn_warn(nn, "TX watchdog timeout\n");
+ nn_warn(nn, "TX watchdog timeout on ring: %u\n", txqueue);
}
/* Receive processing
@@ -1667,9 +1662,9 @@ nfp_net_set_hash_desc(struct net_device *netdev, struct nfp_meta_parsed *meta,
&rx_hash->hash);
}
-static void *
+static bool
nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
- void *data, int meta_len)
+ void *data, void *pkt, unsigned int pkt_len, int meta_len)
{
u32 meta_info;
@@ -1699,14 +1694,20 @@ nfp_net_parse_meta(struct net_device *netdev, struct nfp_meta_parsed *meta,
(__force __wsum)__get_unaligned_cpu32(data);
data += 4;
break;
+ case NFP_NET_META_RESYNC_INFO:
+ if (nfp_net_tls_rx_resync_req(netdev, data, pkt,
+ pkt_len))
+ return NULL;
+ data += sizeof(struct nfp_net_tls_resync_req);
+ break;
default:
- return NULL;
+ return true;
}
meta_info >>= NFP_NET_META_FIELD_SIZE;
}
- return data;
+ return data != pkt;
}
static void
@@ -1891,12 +1892,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
nfp_net_set_hash_desc(dp->netdev, &meta,
rxbuf->frag + meta_off, rxd);
} else if (meta_len) {
- void *end;
-
- end = nfp_net_parse_meta(dp->netdev, &meta,
- rxbuf->frag + meta_off,
- meta_len);
- if (unlikely(end != rxbuf->frag + pkt_off)) {
+ if (unlikely(nfp_net_parse_meta(dp->netdev, &meta,
+ rxbuf->frag + meta_off,
+ rxbuf->frag + pkt_off,
+ pkt_len, meta_len))) {
nn_dp_warn(dp, "invalid RX packet metadata\n");
nfp_net_rx_drop(dp, r_vec, rx_ring, rxbuf,
NULL);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
index d835c14b7257..c3a763134e79 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.c
@@ -17,6 +17,30 @@ static void nfp_net_tlv_caps_reset(struct nfp_net_tlv_caps *caps)
caps->mbox_len = NFP_NET_CFG_MBOX_VAL_MAX_SZ;
}
+static bool
+nfp_net_tls_parse_crypto_ops(struct device *dev, struct nfp_net_tlv_caps *caps,
+ u8 __iomem *ctrl_mem, u8 __iomem *data,
+ unsigned int length, unsigned int offset,
+ bool rx_stream_scan)
+{
+ /* Ignore the legacy TLV if new one was already parsed */
+ if (caps->tls_resync_ss && !rx_stream_scan)
+ return true;
+
+ if (length < 32) {
+ dev_err(dev,
+ "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n",
+ length, offset);
+ return false;
+ }
+
+ caps->crypto_ops = readl(data);
+ caps->crypto_enable_off = data - ctrl_mem + 16;
+ caps->tls_resync_ss = rx_stream_scan;
+
+ return true;
+}
+
int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
struct nfp_net_tlv_caps *caps)
{
@@ -104,15 +128,25 @@ int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
caps->mbox_cmsg_types = readl(data);
break;
case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS:
- if (length < 32) {
- dev_err(dev,
- "CRYPTO OPS TLV should be at least 32B, is %dB offset:%u\n",
- length, offset);
+ if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem,
+ data, length, offset,
+ false))
return -EINVAL;
+ break;
+ case NFP_NET_CFG_TLV_TYPE_VNIC_STATS:
+ if ((data - ctrl_mem) % 8) {
+ dev_warn(dev, "VNIC STATS TLV misaligned, ignoring offset:%u len:%u\n",
+ offset, length);
+ break;
}
-
- caps->crypto_ops = readl(data);
- caps->crypto_enable_off = data - ctrl_mem + 16;
+ caps->vnic_stats_off = data - ctrl_mem;
+ caps->vnic_stats_cnt = length / 10;
+ break;
+ case NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN:
+ if (!nfp_net_tls_parse_crypto_ops(dev, caps, ctrl_mem,
+ data, length, offset,
+ true))
+ return -EINVAL;
break;
default:
if (!FIELD_GET(NFP_NET_CFG_TLV_HEADER_REQUIRED, hdr))
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index ee6b24e4eacd..3d61a8cb60b0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -45,6 +45,7 @@
#define NFP_NET_META_PORTID 5
#define NFP_NET_META_CSUM 6 /* checksum complete type */
#define NFP_NET_META_CONN_HANDLE 7
+#define NFP_NET_META_RESYNC_INFO 8 /* RX resync info request */
#define NFP_META_PORT_ID_CTRL ~0U
@@ -479,6 +480,22 @@
* 8 words, bitmaps of supported and enabled crypto operations.
* First 16B (4 words) contains a bitmap of supported crypto operations,
* and next 16B contain the enabled operations.
+ * This capability is made obsolete by ones with better sync methods.
+ *
+ * %NFP_NET_CFG_TLV_TYPE_VNIC_STATS:
+ * Variable, per-vNIC statistics, data should be 8B aligned (FW should insert
+ * zero-length RESERVED TLV to pad).
+ * TLV data has two sections. First is an array of statistics' IDs (2B each).
+ * Second 8B statistics themselves. Statistics are 8B aligned, meaning there
+ * may be a padding between sections.
+ * Number of statistics can be determined as floor(tlv.length / (2 + 8)).
+ * This TLV overwrites %NFP_NET_CFG_STATS_* values (statistics in this TLV
+ * duplicate the old ones, so driver should be careful not to unnecessarily
+ * render both).
+ *
+ * %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN:
+ * Same as %NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS, but crypto TLS does stream scan
+ * RX sync, rather than kernel-assisted sync.
*/
#define NFP_NET_CFG_TLV_TYPE_UNKNOWN 0
#define NFP_NET_CFG_TLV_TYPE_RESERVED 1
@@ -490,6 +507,8 @@
#define NFP_NET_CFG_TLV_TYPE_REPR_CAP 7
#define NFP_NET_CFG_TLV_TYPE_MBOX_CMSG_TYPES 10
#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS 11 /* see crypto/fw.h */
+#define NFP_NET_CFG_TLV_TYPE_VNIC_STATS 12
+#define NFP_NET_CFG_TLV_TYPE_CRYPTO_OPS_RX_SCAN 13
struct device;
@@ -502,6 +521,9 @@ struct device;
* @mbox_cmsg_types: cmsgs which can be passed through the mailbox
* @crypto_ops: supported crypto operations
* @crypto_enable_off: offset of crypto ops enable region
+ * @vnic_stats_off: offset of vNIC stats area
+ * @vnic_stats_cnt: number of vNIC stats
+ * @tls_resync_ss: TLS resync will be performed via stream scan
*/
struct nfp_net_tlv_caps {
u32 me_freq_mhz;
@@ -511,6 +533,9 @@ struct nfp_net_tlv_caps {
u32 mbox_cmsg_types;
u32 crypto_ops;
unsigned int crypto_enable_off;
+ unsigned int vnic_stats_off;
+ unsigned int vnic_stats_cnt;
+ unsigned int tls_resync_ss:1;
};
int nfp_net_tlv_caps_parse(struct device *dev, u8 __iomem *ctrl_mem,
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 1b840ee47339..d648e32c0520 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -148,11 +148,33 @@ static const struct nfp_et_stat nfp_mac_et_stats[] = {
{ "tx_pause_frames_class7", NFP_MAC_STATS_TX_PAUSE_FRAMES_CLASS7, },
};
+static const char nfp_tlv_stat_names[][ETH_GSTRING_LEN] = {
+ [1] = "dev_rx_discards",
+ [2] = "dev_rx_errors",
+ [3] = "dev_rx_bytes",
+ [4] = "dev_rx_uc_bytes",
+ [5] = "dev_rx_mc_bytes",
+ [6] = "dev_rx_bc_bytes",
+ [7] = "dev_rx_pkts",
+ [8] = "dev_rx_mc_pkts",
+ [9] = "dev_rx_bc_pkts",
+
+ [10] = "dev_tx_discards",
+ [11] = "dev_tx_errors",
+ [12] = "dev_tx_bytes",
+ [13] = "dev_tx_uc_bytes",
+ [14] = "dev_tx_mc_bytes",
+ [15] = "dev_tx_bc_bytes",
+ [16] = "dev_tx_pkts",
+ [17] = "dev_tx_mc_pkts",
+ [18] = "dev_tx_bc_pkts",
+};
+
#define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats)
#define NN_ET_SWITCH_STATS_LEN 9
#define NN_RVEC_GATHER_STATS 13
#define NN_RVEC_PER_Q_STATS 3
-#define NN_CTRL_PATH_STATS 1
+#define NN_CTRL_PATH_STATS 4
#define SFP_SFF_REV_COMPLIANCE 1
@@ -454,6 +476,9 @@ static u8 *nfp_vnic_get_sw_stats_strings(struct net_device *netdev, u8 *data)
data = nfp_pr_et(data, "tx_tls_drop_no_sync_data");
data = nfp_pr_et(data, "hw_tls_no_space");
+ data = nfp_pr_et(data, "rx_tls_resync_req_ok");
+ data = nfp_pr_et(data, "rx_tls_resync_req_ign");
+ data = nfp_pr_et(data, "rx_tls_resync_sent");
return data;
}
@@ -502,6 +527,9 @@ static u64 *nfp_vnic_get_sw_stats(struct net_device *netdev, u64 *data)
*data++ = gathered_stats[j];
*data++ = atomic_read(&nn->ktls_no_space);
+ *data++ = atomic_read(&nn->ktls_rx_resync_req);
+ *data++ = atomic_read(&nn->ktls_rx_resync_ign);
+ *data++ = atomic_read(&nn->ktls_rx_resync_sent);
return data;
}
@@ -560,6 +588,65 @@ nfp_vnic_get_hw_stats(u64 *data, u8 __iomem *mem, unsigned int num_vecs)
return data;
}
+static unsigned int nfp_vnic_get_tlv_stats_count(struct nfp_net *nn)
+{
+ return nn->tlv_caps.vnic_stats_cnt + nn->max_r_vecs * 4;
+}
+
+static u8 *nfp_vnic_get_tlv_stats_strings(struct nfp_net *nn, u8 *data)
+{
+ unsigned int i, id;
+ u8 __iomem *mem;
+ u64 id_word = 0;
+
+ mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off;
+ for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++) {
+ if (!(i % 4))
+ id_word = readq(mem + i * 2);
+
+ id = (u16)id_word;
+ id_word >>= 16;
+
+ if (id < ARRAY_SIZE(nfp_tlv_stat_names) &&
+ nfp_tlv_stat_names[id][0]) {
+ memcpy(data, nfp_tlv_stat_names[id], ETH_GSTRING_LEN);
+ data += ETH_GSTRING_LEN;
+ } else {
+ data = nfp_pr_et(data, "dev_unknown_stat%u", id);
+ }
+ }
+
+ for (i = 0; i < nn->max_r_vecs; i++) {
+ data = nfp_pr_et(data, "rxq_%u_pkts", i);
+ data = nfp_pr_et(data, "rxq_%u_bytes", i);
+ data = nfp_pr_et(data, "txq_%u_pkts", i);
+ data = nfp_pr_et(data, "txq_%u_bytes", i);
+ }
+
+ return data;
+}
+
+static u64 *nfp_vnic_get_tlv_stats(struct nfp_net *nn, u64 *data)
+{
+ u8 __iomem *mem;
+ unsigned int i;
+
+ mem = nn->dp.ctrl_bar + nn->tlv_caps.vnic_stats_off;
+ mem += roundup(2 * nn->tlv_caps.vnic_stats_cnt, 8);
+ for (i = 0; i < nn->tlv_caps.vnic_stats_cnt; i++)
+ *data++ = readq(mem + i * 8);
+
+ mem = nn->dp.ctrl_bar;
+ for (i = 0; i < nn->max_r_vecs; i++) {
+ *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i));
+ *data++ = readq(mem + NFP_NET_CFG_RXR_STATS(i) + 8);
+ *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i));
+ *data++ = readq(mem + NFP_NET_CFG_TXR_STATS(i) + 8);
+ }
+
+ return data;
+}
+
static unsigned int nfp_mac_get_stats_count(struct net_device *netdev)
{
struct nfp_port *port;
@@ -609,8 +696,12 @@ static void nfp_net_get_strings(struct net_device *netdev,
switch (stringset) {
case ETH_SS_STATS:
data = nfp_vnic_get_sw_stats_strings(netdev, data);
- data = nfp_vnic_get_hw_stats_strings(data, nn->max_r_vecs,
- false);
+ if (!nn->tlv_caps.vnic_stats_off)
+ data = nfp_vnic_get_hw_stats_strings(data,
+ nn->max_r_vecs,
+ false);
+ else
+ data = nfp_vnic_get_tlv_stats_strings(nn, data);
data = nfp_mac_get_stats_strings(netdev, data);
data = nfp_app_port_get_stats_strings(nn->port, data);
break;
@@ -624,7 +715,11 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
struct nfp_net *nn = netdev_priv(netdev);
data = nfp_vnic_get_sw_stats(netdev, data);
- data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar, nn->max_r_vecs);
+ if (!nn->tlv_caps.vnic_stats_off)
+ data = nfp_vnic_get_hw_stats(data, nn->dp.ctrl_bar,
+ nn->max_r_vecs);
+ else
+ data = nfp_vnic_get_tlv_stats(nn, data);
data = nfp_mac_get_stats(netdev, data);
data = nfp_app_port_get_stats(nn->port, data);
}
@@ -632,13 +727,18 @@ nfp_net_get_stats(struct net_device *netdev, struct ethtool_stats *stats,
static int nfp_net_get_sset_count(struct net_device *netdev, int sset)
{
struct nfp_net *nn = netdev_priv(netdev);
+ unsigned int cnt;
switch (sset) {
case ETH_SS_STATS:
- return nfp_vnic_get_sw_stats_count(netdev) +
- nfp_vnic_get_hw_stats_count(nn->max_r_vecs) +
- nfp_mac_get_stats_count(netdev) +
- nfp_app_port_get_stats_count(nn->port);
+ cnt = nfp_vnic_get_sw_stats_count(netdev);
+ if (!nn->tlv_caps.vnic_stats_off)
+ cnt += nfp_vnic_get_hw_stats_count(nn->max_r_vecs);
+ else
+ cnt += nfp_vnic_get_tlv_stats_count(nn);
+ cnt += nfp_mac_get_stats_count(netdev);
+ cnt += nfp_app_port_get_stats_count(nn->port);
+ return cnt;
default:
return -EOPNOTSUPP;
}
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 6b54cb3b681d..2fc10a36afa4 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2739,7 +2739,7 @@ static int nv_tx_done_optimized(struct net_device *dev, int limit)
* nv_tx_timeout: dev->tx_timeout function
* Called with netif_tx_lock held.
*/
-static void nv_tx_timeout(struct net_device *dev)
+static void nv_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct fe_priv *np = netdev_priv(dev);
u8 __iomem *base = get_hwbase(dev);
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
index 1a3008e33182..b36aa5bf3c5f 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_ethtool.c
@@ -20,7 +20,7 @@ struct pch_gbe_stats {
#define PCH_GBE_STAT(m) \
{ \
.string = #m, \
- .size = FIELD_SIZEOF(struct pch_gbe_hw_stats, m), \
+ .size = sizeof_field(struct pch_gbe_hw_stats, m), \
.offset = offsetof(struct pch_gbe_hw_stats, m), \
}
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 18e6d87c607b..73ec195fbc30 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -2271,7 +2271,7 @@ static int pch_gbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
* pch_gbe_tx_timeout - Respond to a Tx Hang
* @netdev: Network interface device structure
*/
-static void pch_gbe_tx_timeout(struct net_device *netdev)
+static void pch_gbe_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct pch_gbe_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index eee883a2aa8d..70816d2e2990 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -548,7 +548,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val
static int hamachi_open(struct net_device *dev);
static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static void hamachi_timer(struct timer_list *t);
-static void hamachi_tx_timeout(struct net_device *dev);
+static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void hamachi_init_ring(struct net_device *dev);
static netdev_tx_t hamachi_start_xmit(struct sk_buff *skb,
struct net_device *dev);
@@ -1042,7 +1042,7 @@ static void hamachi_timer(struct timer_list *t)
add_timer(&hmp->timer);
}
-static void hamachi_tx_timeout(struct net_device *dev)
+static void hamachi_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
int i;
struct hamachi_private *hmp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index 5113ee647090..520779f05e1a 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -344,7 +344,7 @@ static void mdio_write(void __iomem *ioaddr, int phy_id, int location, int value
static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static int yellowfin_open(struct net_device *dev);
static void yellowfin_timer(struct timer_list *t);
-static void yellowfin_tx_timeout(struct net_device *dev);
+static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue);
static int yellowfin_init_ring(struct net_device *dev);
static netdev_tx_t yellowfin_start_xmit(struct sk_buff *skb,
struct net_device *dev);
@@ -677,7 +677,7 @@ static void yellowfin_timer(struct timer_list *t)
add_timer(&yp->timer);
}
-static void yellowfin_tx_timeout(struct net_device *dev)
+static void yellowfin_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct yellowfin_private *yp = netdev_priv(dev);
void __iomem *ioaddr = yp->base;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic.h b/drivers/net/ethernet/pensando/ionic/ionic.h
index 98e102af7756..bb106a32f416 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic.h
@@ -12,19 +12,27 @@ struct ionic_lif;
#define IONIC_DRV_NAME "ionic"
#define IONIC_DRV_DESCRIPTION "Pensando Ethernet NIC Driver"
-#define IONIC_DRV_VERSION "0.18.0-k"
+#define IONIC_DRV_VERSION "0.20.0-k"
#define PCI_VENDOR_ID_PENSANDO 0x1dd8
#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_PF 0x1002
#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF 0x1003
-#define IONIC_SUBDEV_ID_NAPLES_25 0x4000
-#define IONIC_SUBDEV_ID_NAPLES_100_4 0x4001
-#define IONIC_SUBDEV_ID_NAPLES_100_8 0x4002
-
#define DEVCMD_TIMEOUT 10
+struct ionic_vf {
+ u16 index;
+ u8 macaddr[6];
+ __le32 maxrate;
+ __le16 vlanid;
+ u8 spoofchk;
+ u8 trusted;
+ u8 linkstate;
+ dma_addr_t stats_pa;
+ struct ionic_lif_stats stats;
+};
+
struct ionic {
struct pci_dev *pdev;
struct device *dev;
@@ -46,6 +54,9 @@ struct ionic {
DECLARE_BITMAP(intrs, IONIC_INTR_CTRL_REGS_MAX);
struct work_struct nb_work;
struct notifier_block nb;
+ struct rw_semaphore vf_op_lock; /* lock for VF operations */
+ struct ionic_vf *vfs;
+ int num_vfs;
struct timer_list watchdog_timer;
int watchdog_period;
};
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
index 9a9ab8cb2cb3..448d7b23b2f7 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_bus_pci.c
@@ -104,10 +104,112 @@ void ionic_bus_unmap_dbpage(struct ionic *ionic, void __iomem *page)
iounmap(page);
}
+static void ionic_vf_dealloc_locked(struct ionic *ionic)
+{
+ struct ionic_vf *v;
+ dma_addr_t dma = 0;
+ int i;
+
+ if (!ionic->vfs)
+ return;
+
+ for (i = ionic->num_vfs - 1; i >= 0; i--) {
+ v = &ionic->vfs[i];
+
+ if (v->stats_pa) {
+ (void)ionic_set_vf_config(ionic, i,
+ IONIC_VF_ATTR_STATSADDR,
+ (u8 *)&dma);
+ dma_unmap_single(ionic->dev, v->stats_pa,
+ sizeof(v->stats), DMA_FROM_DEVICE);
+ v->stats_pa = 0;
+ }
+ }
+
+ kfree(ionic->vfs);
+ ionic->vfs = NULL;
+ ionic->num_vfs = 0;
+}
+
+static void ionic_vf_dealloc(struct ionic *ionic)
+{
+ down_write(&ionic->vf_op_lock);
+ ionic_vf_dealloc_locked(ionic);
+ up_write(&ionic->vf_op_lock);
+}
+
+static int ionic_vf_alloc(struct ionic *ionic, int num_vfs)
+{
+ struct ionic_vf *v;
+ int err = 0;
+ int i;
+
+ down_write(&ionic->vf_op_lock);
+
+ ionic->vfs = kcalloc(num_vfs, sizeof(struct ionic_vf), GFP_KERNEL);
+ if (!ionic->vfs) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < num_vfs; i++) {
+ v = &ionic->vfs[i];
+ v->stats_pa = dma_map_single(ionic->dev, &v->stats,
+ sizeof(v->stats), DMA_FROM_DEVICE);
+ if (dma_mapping_error(ionic->dev, v->stats_pa)) {
+ v->stats_pa = 0;
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* ignore failures from older FW, we just won't get stats */
+ (void)ionic_set_vf_config(ionic, i, IONIC_VF_ATTR_STATSADDR,
+ (u8 *)&v->stats_pa);
+ ionic->num_vfs++;
+ }
+
+out:
+ if (err)
+ ionic_vf_dealloc_locked(ionic);
+ up_write(&ionic->vf_op_lock);
+ return err;
+}
+
+static int ionic_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+ struct ionic *ionic = pci_get_drvdata(pdev);
+ struct device *dev = ionic->dev;
+ int ret = 0;
+
+ if (num_vfs > 0) {
+ ret = pci_enable_sriov(pdev, num_vfs);
+ if (ret) {
+ dev_err(dev, "Cannot enable SRIOV: %d\n", ret);
+ goto out;
+ }
+
+ ret = ionic_vf_alloc(ionic, num_vfs);
+ if (ret) {
+ dev_err(dev, "Cannot alloc VFs: %d\n", ret);
+ pci_disable_sriov(pdev);
+ goto out;
+ }
+
+ ret = num_vfs;
+ } else {
+ pci_disable_sriov(pdev);
+ ionic_vf_dealloc(ionic);
+ }
+
+out:
+ return ret;
+}
+
static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
struct ionic *ionic;
+ int num_vfs;
int err;
ionic = ionic_devlink_alloc(dev);
@@ -206,6 +308,15 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
goto err_out_free_lifs;
}
+ init_rwsem(&ionic->vf_op_lock);
+ num_vfs = pci_num_vf(pdev);
+ if (num_vfs) {
+ dev_info(dev, "%d VFs found already enabled\n", num_vfs);
+ err = ionic_vf_alloc(ionic, num_vfs);
+ if (err)
+ dev_err(dev, "Cannot enable existing VFs: %d\n", err);
+ }
+
err = ionic_lifs_register(ionic);
if (err) {
dev_err(dev, "Cannot register LIFs: %d, aborting\n", err);
@@ -223,6 +334,7 @@ static int ionic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err_out_deregister_lifs:
ionic_lifs_unregister(ionic);
err_out_deinit_lifs:
+ ionic_vf_dealloc(ionic);
ionic_lifs_deinit(ionic);
err_out_free_lifs:
ionic_lifs_free(ionic);
@@ -279,6 +391,7 @@ static struct pci_driver ionic_driver = {
.id_table = ionic_id_table,
.probe = ionic_probe,
.remove = ionic_remove,
+ .sriov_configure = ionic_sriov_configure,
};
int ionic_bus_register_driver(void)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index 5f9d2ec70446..87f82f36812f 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -286,6 +286,64 @@ void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type)
ionic_dev_cmd_go(idev, &cmd);
}
+/* VF commands */
+int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
+{
+ union ionic_dev_cmd cmd = {
+ .vf_setattr.opcode = IONIC_CMD_VF_SETATTR,
+ .vf_setattr.attr = attr,
+ .vf_setattr.vf_index = vf,
+ };
+ int err;
+
+ switch (attr) {
+ case IONIC_VF_ATTR_SPOOFCHK:
+ cmd.vf_setattr.spoofchk = *data;
+ dev_dbg(ionic->dev, "%s: vf %d spoof %d\n",
+ __func__, vf, *data);
+ break;
+ case IONIC_VF_ATTR_TRUST:
+ cmd.vf_setattr.trust = *data;
+ dev_dbg(ionic->dev, "%s: vf %d trust %d\n",
+ __func__, vf, *data);
+ break;
+ case IONIC_VF_ATTR_LINKSTATE:
+ cmd.vf_setattr.linkstate = *data;
+ dev_dbg(ionic->dev, "%s: vf %d linkstate %d\n",
+ __func__, vf, *data);
+ break;
+ case IONIC_VF_ATTR_MAC:
+ ether_addr_copy(cmd.vf_setattr.macaddr, data);
+ dev_dbg(ionic->dev, "%s: vf %d macaddr %pM\n",
+ __func__, vf, data);
+ break;
+ case IONIC_VF_ATTR_VLAN:
+ cmd.vf_setattr.vlanid = cpu_to_le16(*(u16 *)data);
+ dev_dbg(ionic->dev, "%s: vf %d vlan %d\n",
+ __func__, vf, *(u16 *)data);
+ break;
+ case IONIC_VF_ATTR_RATE:
+ cmd.vf_setattr.maxrate = cpu_to_le32(*(u32 *)data);
+ dev_dbg(ionic->dev, "%s: vf %d maxrate %d\n",
+ __func__, vf, *(u32 *)data);
+ break;
+ case IONIC_VF_ATTR_STATSADDR:
+ cmd.vf_setattr.stats_pa = cpu_to_le64(*(u64 *)data);
+ dev_dbg(ionic->dev, "%s: vf %d stats_pa 0x%08llx\n",
+ __func__, vf, *(u64 *)data);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ mutex_lock(&ionic->dev_cmd_lock);
+ ionic_dev_cmd_go(&ionic->idev, &cmd);
+ err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+ mutex_unlock(&ionic->dev_cmd_lock);
+
+ return err;
+}
+
/* LIF commands */
void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver)
{
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 4665c5dc5324..7838e342c4fd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -113,6 +113,12 @@ static_assert(sizeof(struct ionic_rxq_desc) == 16);
static_assert(sizeof(struct ionic_rxq_sg_desc) == 128);
static_assert(sizeof(struct ionic_rxq_comp) == 16);
+/* SR/IOV */
+static_assert(sizeof(struct ionic_vf_setattr_cmd) == 64);
+static_assert(sizeof(struct ionic_vf_setattr_comp) == 16);
+static_assert(sizeof(struct ionic_vf_getattr_cmd) == 64);
+static_assert(sizeof(struct ionic_vf_getattr_comp) == 16);
+
struct ionic_devinfo {
u8 asic_type;
u8 asic_rev;
@@ -275,6 +281,7 @@ void ionic_dev_cmd_port_autoneg(struct ionic_dev *idev, u8 an_enable);
void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
+int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index,
dma_addr_t addr);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 39317cdfa6cf..f131adad96e3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -51,6 +51,10 @@ enum ionic_cmd_opcode {
IONIC_CMD_RDMA_CREATE_CQ = 52,
IONIC_CMD_RDMA_CREATE_ADMINQ = 53,
+ /* SR/IOV commands */
+ IONIC_CMD_VF_GETATTR = 60,
+ IONIC_CMD_VF_SETATTR = 61,
+
/* QoS commands */
IONIC_CMD_QOS_CLASS_IDENTIFY = 240,
IONIC_CMD_QOS_CLASS_INIT = 241,
@@ -1639,6 +1643,93 @@ enum ionic_qos_sched_type {
IONIC_QOS_SCHED_TYPE_DWRR = 1, /* Deficit weighted round-robin */
};
+enum ionic_vf_attr {
+ IONIC_VF_ATTR_SPOOFCHK = 1,
+ IONIC_VF_ATTR_TRUST = 2,
+ IONIC_VF_ATTR_MAC = 3,
+ IONIC_VF_ATTR_LINKSTATE = 4,
+ IONIC_VF_ATTR_VLAN = 5,
+ IONIC_VF_ATTR_RATE = 6,
+ IONIC_VF_ATTR_STATSADDR = 7,
+};
+
+/**
+ * VF link status
+ */
+enum ionic_vf_link_status {
+ IONIC_VF_LINK_STATUS_AUTO = 0, /* link state of the uplink */
+ IONIC_VF_LINK_STATUS_UP = 1, /* link is always up */
+ IONIC_VF_LINK_STATUS_DOWN = 2, /* link is always down */
+};
+
+/**
+ * struct ionic_vf_setattr_cmd - Set VF attributes on the NIC
+ * @opcode: Opcode
+ * @index: VF index
+ * @attr: Attribute type (enum ionic_vf_attr)
+ * macaddr mac address
+ * vlanid vlan ID
+ * maxrate max Tx rate in Mbps
+ * spoofchk enable address spoof checking
+ * trust enable VF trust
+ * linkstate set link up or down
+ * stats_pa set DMA address for VF stats
+ */
+struct ionic_vf_setattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 vf_index;
+ union {
+ u8 macaddr[6];
+ __le16 vlanid;
+ __le32 maxrate;
+ u8 spoofchk;
+ u8 trust;
+ u8 linkstate;
+ __le64 stats_pa;
+ u8 pad[60];
+ };
+};
+
+struct ionic_vf_setattr_comp {
+ u8 status;
+ u8 attr;
+ __le16 vf_index;
+ __le16 comp_index;
+ u8 rsvd[9];
+ u8 color;
+};
+
+/**
+ * struct ionic_vf_getattr_cmd - Get VF attributes from the NIC
+ * @opcode: Opcode
+ * @index: VF index
+ * @attr: Attribute type (enum ionic_vf_attr)
+ */
+struct ionic_vf_getattr_cmd {
+ u8 opcode;
+ u8 attr;
+ __le16 vf_index;
+ u8 rsvd[60];
+};
+
+struct ionic_vf_getattr_comp {
+ u8 status;
+ u8 attr;
+ __le16 vf_index;
+ union {
+ u8 macaddr[6];
+ __le16 vlanid;
+ __le32 maxrate;
+ u8 spoofchk;
+ u8 trust;
+ u8 linkstate;
+ __le64 stats_pa;
+ u8 pad[11];
+ };
+ u8 color;
+};
+
/**
* union ionic_qos_config - Qos configuration structure
* @flags: Configuration flags
@@ -2289,6 +2380,9 @@ union ionic_dev_cmd {
struct ionic_port_getattr_cmd port_getattr;
struct ionic_port_setattr_cmd port_setattr;
+ struct ionic_vf_setattr_cmd vf_setattr;
+ struct ionic_vf_getattr_cmd vf_getattr;
+
struct ionic_lif_identify_cmd lif_identify;
struct ionic_lif_init_cmd lif_init;
struct ionic_lif_reset_cmd lif_reset;
@@ -2318,6 +2412,9 @@ union ionic_dev_cmd_comp {
struct ionic_port_getattr_comp port_getattr;
struct ionic_port_setattr_comp port_setattr;
+ struct ionic_vf_setattr_comp vf_setattr;
+ struct ionic_vf_getattr_comp vf_getattr;
+
struct ionic_lif_identify_comp lif_identify;
struct ionic_lif_init_comp lif_init;
ionic_lif_reset_comp lif_reset;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index ef8258713369..191271f6260d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1285,7 +1285,7 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
rtnl_unlock();
}
-static void ionic_tx_timeout(struct net_device *netdev)
+static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct ionic_lif *lif = netdev_priv(netdev);
@@ -1619,6 +1619,227 @@ int ionic_stop(struct net_device *netdev)
return err;
}
+static int ionic_get_vf_config(struct net_device *netdev,
+ int vf, struct ifla_vf_info *ivf)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ int ret = 0;
+
+ down_read(&ionic->vf_op_lock);
+
+ if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+ ret = -EINVAL;
+ } else {
+ ivf->vf = vf;
+ ivf->vlan = ionic->vfs[vf].vlanid;
+ ivf->qos = 0;
+ ivf->spoofchk = ionic->vfs[vf].spoofchk;
+ ivf->linkstate = ionic->vfs[vf].linkstate;
+ ivf->max_tx_rate = ionic->vfs[vf].maxrate;
+ ivf->trusted = ionic->vfs[vf].trusted;
+ ether_addr_copy(ivf->mac, ionic->vfs[vf].macaddr);
+ }
+
+ up_read(&ionic->vf_op_lock);
+ return ret;
+}
+
+static int ionic_get_vf_stats(struct net_device *netdev, int vf,
+ struct ifla_vf_stats *vf_stats)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ struct ionic_lif_stats *vs;
+ int ret = 0;
+
+ down_read(&ionic->vf_op_lock);
+
+ if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+ ret = -EINVAL;
+ } else {
+ memset(vf_stats, 0, sizeof(*vf_stats));
+ vs = &ionic->vfs[vf].stats;
+
+ vf_stats->rx_packets = le64_to_cpu(vs->rx_ucast_packets);
+ vf_stats->tx_packets = le64_to_cpu(vs->tx_ucast_packets);
+ vf_stats->rx_bytes = le64_to_cpu(vs->rx_ucast_bytes);
+ vf_stats->tx_bytes = le64_to_cpu(vs->tx_ucast_bytes);
+ vf_stats->broadcast = le64_to_cpu(vs->rx_bcast_packets);
+ vf_stats->multicast = le64_to_cpu(vs->rx_mcast_packets);
+ vf_stats->rx_dropped = le64_to_cpu(vs->rx_ucast_drop_packets) +
+ le64_to_cpu(vs->rx_mcast_drop_packets) +
+ le64_to_cpu(vs->rx_bcast_drop_packets);
+ vf_stats->tx_dropped = le64_to_cpu(vs->tx_ucast_drop_packets) +
+ le64_to_cpu(vs->tx_mcast_drop_packets) +
+ le64_to_cpu(vs->tx_bcast_drop_packets);
+ }
+
+ up_read(&ionic->vf_op_lock);
+ return ret;
+}
+
+static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ int ret;
+
+ if (!(is_zero_ether_addr(mac) || is_valid_ether_addr(mac)))
+ return -EINVAL;
+
+ down_read(&ionic->vf_op_lock);
+
+ if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+ ret = -EINVAL;
+ } else {
+ ret = ionic_set_vf_config(ionic, vf, IONIC_VF_ATTR_MAC, mac);
+ if (!ret)
+ ether_addr_copy(ionic->vfs[vf].macaddr, mac);
+ }
+
+ up_read(&ionic->vf_op_lock);
+ return ret;
+}
+
+static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
+ u8 qos, __be16 proto)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ int ret;
+
+ /* until someday when we support qos */
+ if (qos)
+ return -EINVAL;
+
+ if (vlan > 4095)
+ return -EINVAL;
+
+ if (proto != htons(ETH_P_8021Q))
+ return -EPROTONOSUPPORT;
+
+ down_read(&ionic->vf_op_lock);
+
+ if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+ ret = -EINVAL;
+ } else {
+ ret = ionic_set_vf_config(ionic, vf,
+ IONIC_VF_ATTR_VLAN, (u8 *)&vlan);
+ if (!ret)
+ ionic->vfs[vf].vlanid = vlan;
+ }
+
+ up_read(&ionic->vf_op_lock);
+ return ret;
+}
+
+static int ionic_set_vf_rate(struct net_device *netdev, int vf,
+ int tx_min, int tx_max)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ int ret;
+
+ /* setting the min just seems silly */
+ if (tx_min)
+ return -EINVAL;
+
+ down_write(&ionic->vf_op_lock);
+
+ if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+ ret = -EINVAL;
+ } else {
+ ret = ionic_set_vf_config(ionic, vf,
+ IONIC_VF_ATTR_RATE, (u8 *)&tx_max);
+ if (!ret)
+ lif->ionic->vfs[vf].maxrate = tx_max;
+ }
+
+ up_write(&ionic->vf_op_lock);
+ return ret;
+}
+
+static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ u8 data = set; /* convert to u8 for config */
+ int ret;
+
+ down_write(&ionic->vf_op_lock);
+
+ if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+ ret = -EINVAL;
+ } else {
+ ret = ionic_set_vf_config(ionic, vf,
+ IONIC_VF_ATTR_SPOOFCHK, &data);
+ if (!ret)
+ ionic->vfs[vf].spoofchk = data;
+ }
+
+ up_write(&ionic->vf_op_lock);
+ return ret;
+}
+
+static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ u8 data = set; /* convert to u8 for config */
+ int ret;
+
+ down_write(&ionic->vf_op_lock);
+
+ if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+ ret = -EINVAL;
+ } else {
+ ret = ionic_set_vf_config(ionic, vf,
+ IONIC_VF_ATTR_TRUST, &data);
+ if (!ret)
+ ionic->vfs[vf].trusted = data;
+ }
+
+ up_write(&ionic->vf_op_lock);
+ return ret;
+}
+
+static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
+{
+ struct ionic_lif *lif = netdev_priv(netdev);
+ struct ionic *ionic = lif->ionic;
+ u8 data;
+ int ret;
+
+ switch (set) {
+ case IFLA_VF_LINK_STATE_ENABLE:
+ data = IONIC_VF_LINK_STATUS_UP;
+ break;
+ case IFLA_VF_LINK_STATE_DISABLE:
+ data = IONIC_VF_LINK_STATUS_DOWN;
+ break;
+ case IFLA_VF_LINK_STATE_AUTO:
+ data = IONIC_VF_LINK_STATUS_AUTO;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ down_write(&ionic->vf_op_lock);
+
+ if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
+ ret = -EINVAL;
+ } else {
+ ret = ionic_set_vf_config(ionic, vf,
+ IONIC_VF_ATTR_LINKSTATE, &data);
+ if (!ret)
+ ionic->vfs[vf].linkstate = set;
+ }
+
+ up_write(&ionic->vf_op_lock);
+ return ret;
+}
+
static const struct net_device_ops ionic_netdev_ops = {
.ndo_open = ionic_open,
.ndo_stop = ionic_stop,
@@ -1632,6 +1853,14 @@ static const struct net_device_ops ionic_netdev_ops = {
.ndo_change_mtu = ionic_change_mtu,
.ndo_vlan_rx_add_vid = ionic_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = ionic_vlan_rx_kill_vid,
+ .ndo_set_vf_vlan = ionic_set_vf_vlan,
+ .ndo_set_vf_trust = ionic_set_vf_trust,
+ .ndo_set_vf_mac = ionic_set_vf_mac,
+ .ndo_set_vf_rate = ionic_set_vf_rate,
+ .ndo_set_vf_spoofchk = ionic_set_vf_spoofchk,
+ .ndo_get_vf_config = ionic_get_vf_config,
+ .ndo_set_vf_link_state = ionic_set_vf_link_state,
+ .ndo_get_vf_stats = ionic_get_vf_stats,
};
int ionic_reset_queues(struct ionic_lif *lif)
@@ -1965,18 +2194,22 @@ static int ionic_station_set(struct ionic_lif *lif)
if (err)
return err;
+ if (is_zero_ether_addr(ctx.comp.lif_getattr.mac))
+ return 0;
+
memcpy(addr.sa_data, ctx.comp.lif_getattr.mac, netdev->addr_len);
addr.sa_family = AF_INET;
err = eth_prepare_mac_addr_change(netdev, &addr);
- if (err)
- return err;
-
- if (!is_zero_ether_addr(netdev->dev_addr)) {
- netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n",
- netdev->dev_addr);
- ionic_lif_addr(lif, netdev->dev_addr, false);
+ if (err) {
+ netdev_warn(lif->netdev, "ignoring bad MAC addr from NIC %pM\n",
+ addr.sa_data);
+ return 0;
}
+ netdev_dbg(lif->netdev, "deleting station MAC addr %pM\n",
+ netdev->dev_addr);
+ ionic_lif_addr(lif, netdev->dev_addr, false);
+
eth_commit_mac_addr_change(netdev, &addr);
netdev_dbg(lif->netdev, "adding station MAC addr %pM\n",
netdev->dev_addr);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index a55fd1f8c31b..9c5a7dd45f9d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -37,6 +37,7 @@ struct ionic_rx_stats {
u64 csum_complete;
u64 csum_error;
u64 buffers_posted;
+ u64 dropped;
};
#define IONIC_QCQ_F_INITED BIT(0)
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 3590ea7fd88a..a8e3fb73b465 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -165,6 +165,10 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
return "IONIC_CMD_FW_DOWNLOAD";
case IONIC_CMD_FW_CONTROL:
return "IONIC_CMD_FW_CONTROL";
+ case IONIC_CMD_VF_GETATTR:
+ return "IONIC_CMD_VF_GETATTR";
+ case IONIC_CMD_VF_SETATTR:
+ return "IONIC_CMD_VF_SETATTR";
default:
return "DEVCMD_UNKNOWN";
}
@@ -326,9 +330,9 @@ int ionic_dev_cmd_wait(struct ionic *ionic, unsigned long max_seconds)
unsigned long max_wait;
unsigned long duration;
int opcode;
+ int hb = 0;
int done;
int err;
- int hb;
WARN_ON(in_interrupt());
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 03916b6d47f2..a1e9796a660a 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -39,6 +39,7 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
IONIC_RX_STAT_DESC(csum_none),
IONIC_RX_STAT_DESC(csum_complete),
IONIC_RX_STAT_DESC(csum_error),
+ IONIC_RX_STAT_DESC(dropped),
};
static const struct ionic_stat_desc ionic_txq_stats_desc[] = {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 97e79949b359..e452f4242ba0 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -152,12 +152,16 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
stats = q_to_rx_stats(q);
netdev = q->lif->netdev;
- if (comp->status)
+ if (comp->status) {
+ stats->dropped++;
return;
+ }
/* no packet processing while resetting */
- if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state)))
+ if (unlikely(test_bit(IONIC_LIF_QUEUE_RESET, q->lif->state))) {
+ stats->dropped++;
return;
+ }
stats->pkts++;
stats->bytes += le16_to_cpu(comp->len);
@@ -167,8 +171,10 @@ static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_i
else
skb = ionic_rx_frags(q, desc_info, cq_info);
- if (unlikely(!skb))
+ if (unlikely(!skb)) {
+ stats->dropped++;
return;
+ }
skb_record_rx_queue(skb, q->index);
@@ -337,6 +343,8 @@ void ionic_rx_fill(struct ionic_queue *q)
struct ionic_rxq_sg_desc *sg_desc;
struct ionic_rxq_sg_elem *sg_elem;
struct ionic_rxq_desc *desc;
+ unsigned int remain_len;
+ unsigned int seg_len;
unsigned int nfrags;
bool ring_doorbell;
unsigned int i, j;
@@ -346,6 +354,7 @@ void ionic_rx_fill(struct ionic_queue *q)
nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
for (i = ionic_q_space_avail(q); i; i--) {
+ remain_len = len;
desc_info = q->head;
desc = desc_info->desc;
sg_desc = desc_info->sg_desc;
@@ -369,7 +378,9 @@ void ionic_rx_fill(struct ionic_queue *q)
return;
}
desc->addr = cpu_to_le64(page_info->dma_addr);
- desc->len = cpu_to_le16(PAGE_SIZE);
+ seg_len = min_t(unsigned int, PAGE_SIZE, len);
+ desc->len = cpu_to_le16(seg_len);
+ remain_len -= seg_len;
page_info++;
/* fill sg descriptors - pages[1..n] */
@@ -385,7 +396,9 @@ void ionic_rx_fill(struct ionic_queue *q)
return;
}
sg_elem->addr = cpu_to_le64(page_info->dma_addr);
- sg_elem->len = cpu_to_le16(PAGE_SIZE);
+ seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
+ sg_elem->len = cpu_to_le16(seg_len);
+ remain_len -= seg_len;
page_info++;
}
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index c692a41e4548..8067ea04d455 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -49,7 +49,7 @@ static int netxen_nic_open(struct net_device *netdev);
static int netxen_nic_close(struct net_device *netdev);
static netdev_tx_t netxen_nic_xmit_frame(struct sk_buff *,
struct net_device *);
-static void netxen_tx_timeout(struct net_device *netdev);
+static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue);
static void netxen_tx_timeout_task(struct work_struct *work);
static void netxen_fw_poll_work(struct work_struct *work);
static void netxen_schedule_work(struct netxen_adapter *adapter,
@@ -2222,7 +2222,7 @@ static void netxen_nic_handle_phy_intr(struct netxen_adapter *adapter)
netxen_advert_link_change(adapter, linkup);
}
-static void netxen_tx_timeout(struct net_device *netdev)
+static void netxen_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct netxen_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 7e0b795230b2..900bc603e30a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -331,8 +331,8 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
u8 sb_index = p_hwfn->p_eq->eq_sb_index;
struct qed_spq_entry *p_ent = NULL;
struct qed_sp_init_data init_data;
- int rc = -EINVAL;
u8 page_cnt, i;
+ int rc;
/* update initial eq producer */
qed_eq_prod_update(p_hwfn,
@@ -447,7 +447,7 @@ int qed_sp_pf_update(struct qed_hwfn *p_hwfn)
{
struct qed_spq_entry *p_ent = NULL;
struct qed_sp_init_data init_data;
- int rc = -EINVAL;
+ int rc;
/* Get SPQ entry */
memset(&init_data, 0, sizeof(init_data));
@@ -471,7 +471,7 @@ int qed_sp_pf_update_ufp(struct qed_hwfn *p_hwfn)
{
struct qed_spq_entry *p_ent = NULL;
struct qed_sp_init_data init_data;
- int rc = -EOPNOTSUPP;
+ int rc;
if (p_hwfn->ufp_info.pri_type == QED_UFP_PRI_UNKNOWN) {
DP_INFO(p_hwfn, "Invalid priority type %d\n",
@@ -509,7 +509,7 @@ int qed_sp_pf_update_tunn_cfg(struct qed_hwfn *p_hwfn,
{
struct qed_spq_entry *p_ent = NULL;
struct qed_sp_init_data init_data;
- int rc = -EINVAL;
+ int rc;
if (IS_VF(p_hwfn->cdev))
return qed_vf_pf_tunnel_param_update(p_hwfn, p_tunn);
@@ -546,7 +546,7 @@ int qed_sp_pf_stop(struct qed_hwfn *p_hwfn)
{
struct qed_spq_entry *p_ent = NULL;
struct qed_sp_init_data init_data;
- int rc = -EINVAL;
+ int rc;
/* Get SPQ entry */
memset(&init_data, 0, sizeof(init_data));
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index c303a92d5b06..e8a1b27db84d 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -464,7 +464,7 @@ struct qede_fastpath {
struct qede_tx_queue *txq;
struct qede_tx_queue *xdp_tx;
-#define VEC_NAME_SIZE (FIELD_SIZEOF(struct net_device, name) + 8)
+#define VEC_NAME_SIZE (sizeof_field(struct net_device, name) + 8)
char name[VEC_NAME_SIZE];
};
diff --git a/drivers/net/ethernet/qlogic/qede/qede_filter.c b/drivers/net/ethernet/qlogic/qede/qede_filter.c
index d6cfe4ffbaf3..d1ce4531d01a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_filter.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_filter.c
@@ -1230,7 +1230,7 @@ qede_configure_mcast_filtering(struct net_device *ndev,
netif_addr_lock_bh(ndev);
mc_count = netdev_mc_count(ndev);
- if (mc_count < 64) {
+ if (mc_count <= 64) {
netdev_for_each_mc_addr(ha, ndev) {
ether_addr_copy(temp, ha->addr);
temp += ETH_ALEN;
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 481b096e984d..34fa3917eb33 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1406,6 +1406,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
rxq->rx_buf_seg_size = roundup_pow_of_two(size);
} else {
rxq->rx_buf_seg_size = PAGE_SIZE;
+ edev->ndev->features &= ~NETIF_F_GRO_HW;
}
/* Allocate the parallel driver ring for Rx buffers */
@@ -1450,6 +1451,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
}
}
+ edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO_HW);
if (!edev->gro_disable)
qede_set_tpa_param(rxq);
err:
@@ -1702,8 +1704,6 @@ static void qede_init_fp(struct qede_dev *edev)
snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
edev->ndev->name, queue_id);
}
-
- edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO_HW);
}
static int qede_set_real_num_queues(struct qede_dev *edev)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ptp.c b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
index f815435cf106..4c7f7a7fc151 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ptp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ptp.c
@@ -247,6 +247,7 @@ static int qede_ptp_cfg_filters(struct qede_dev *edev)
break;
case HWTSTAMP_TX_ONESTEP_SYNC:
+ case HWTSTAMP_TX_ONESTEP_P2P:
DP_ERR(edev, "One-step timestamping is not supported\n");
return -ERANGE;
}
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index b4b8ba00ee01..0fade19e00d4 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -2756,6 +2756,9 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev)
int err;
for (i = 0; i < qdev->num_large_buffers; i++) {
+ lrg_buf_cb = &qdev->lrg_buf[i];
+ memset(lrg_buf_cb, 0, sizeof(struct ql_rcv_buf_cb));
+
skb = netdev_alloc_skb(qdev->ndev,
qdev->lrg_buffer_len);
if (unlikely(!skb)) {
@@ -2766,11 +2769,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev)
ql_free_large_buffers(qdev);
return -ENOMEM;
} else {
-
- lrg_buf_cb = &qdev->lrg_buf[i];
- memset(lrg_buf_cb, 0, sizeof(struct ql_rcv_buf_cb));
lrg_buf_cb->index = i;
- lrg_buf_cb->skb = skb;
/*
* We save some space to copy the ethhdr from first
* buffer
@@ -2792,6 +2791,7 @@ static int ql_alloc_large_buffers(struct ql3_adapter *qdev)
return -ENOMEM;
}
+ lrg_buf_cb->skb = skb;
dma_unmap_addr_set(lrg_buf_cb, mapaddr, map);
dma_unmap_len_set(lrg_buf_cb, maplen,
qdev->lrg_buffer_len -
@@ -3602,7 +3602,7 @@ static int ql3xxx_set_mac_address(struct net_device *ndev, void *p)
return 0;
}
-static void ql3xxx_tx_timeout(struct net_device *ndev)
+static void ql3xxx_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct ql3_adapter *qdev = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index a4cd6f2cfb86..75d83c3cbf27 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -20,7 +20,7 @@ struct qlcnic_stats {
int stat_offset;
};
-#define QLC_SIZEOF(m) FIELD_SIZEOF(struct qlcnic_adapter, m)
+#define QLC_SIZEOF(m) sizeof_field(struct qlcnic_adapter, m)
#define QLC_OFF(m) offsetof(struct qlcnic_adapter, m)
static const u32 qlcnic_fw_dump_level[] = {
0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index c07438db30ba..9dd6cb36f366 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -56,7 +56,7 @@ static int qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
static void qlcnic_remove(struct pci_dev *pdev);
static int qlcnic_open(struct net_device *netdev);
static int qlcnic_close(struct net_device *netdev);
-static void qlcnic_tx_timeout(struct net_device *netdev);
+static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue);
static void qlcnic_attach_work(struct work_struct *work);
static void qlcnic_fwinit_work(struct work_struct *work);
@@ -3068,7 +3068,7 @@ static void qlcnic_dump_rings(struct qlcnic_adapter *adapter)
}
-static void qlcnic_tx_timeout(struct net_device *netdev)
+static void qlcnic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct qlcnic_adapter *adapter = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 98f92268cbaa..522fad4cb2cd 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -282,7 +282,7 @@ static int emac_close(struct net_device *netdev)
}
/* Respond to a TX hang */
-static void emac_tx_timeout(struct net_device *netdev)
+static void emac_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct emac_adapter *adpt = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index baac016f3ec0..5a3b65a6eb4f 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -785,7 +785,7 @@ qcaspi_netdev_xmit(struct sk_buff *skb, struct net_device *dev)
}
static void
-qcaspi_netdev_tx_timeout(struct net_device *dev)
+qcaspi_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct qcaspi *qca = netdev_priv(dev);
diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
index 0981068504fa..375a844cd27c 100644
--- a/drivers/net/ethernet/qualcomm/qca_uart.c
+++ b/drivers/net/ethernet/qualcomm/qca_uart.c
@@ -248,7 +248,7 @@ out:
return NETDEV_TX_OK;
}
-static void qcauart_netdev_tx_timeout(struct net_device *dev)
+static void qcauart_netdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct qcauart *qca = netdev_priv(dev);
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index 274e5b4bc4ac..c23cb61bbd30 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -410,7 +410,7 @@ static void r6040_init_mac_regs(struct net_device *dev)
iowrite16(TM2TX, ioaddr + MTPR);
}
-static void r6040_tx_timeout(struct net_device *dev)
+static void r6040_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct r6040_private *priv = netdev_priv(dev);
void __iomem *ioaddr = priv->base;
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 4f910c4f67b0..60d342f82fb3 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -1235,7 +1235,7 @@ static int cp_close (struct net_device *dev)
return 0;
}
-static void cp_tx_timeout(struct net_device *dev)
+static void cp_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct cp_private *cp = netdev_priv(dev);
unsigned long flags;
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 55d01266e615..5caeb8368eab 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -642,7 +642,7 @@ static int mdio_read (struct net_device *dev, int phy_id, int location);
static void mdio_write (struct net_device *dev, int phy_id, int location,
int val);
static void rtl8139_start_thread(struct rtl8139_private *tp);
-static void rtl8139_tx_timeout (struct net_device *dev);
+static void rtl8139_tx_timeout (struct net_device *dev, unsigned int txqueue);
static void rtl8139_init_ring (struct net_device *dev);
static netdev_tx_t rtl8139_start_xmit (struct sk_buff *skb,
struct net_device *dev);
@@ -1700,7 +1700,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work)
spin_unlock_bh(&tp->rx_lock);
}
-static void rtl8139_tx_timeout (struct net_device *dev)
+static void rtl8139_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct rtl8139_private *tp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/realtek/Makefile b/drivers/net/ethernet/realtek/Makefile
index d5304bad2372..2e1d78b106b0 100644
--- a/drivers/net/ethernet/realtek/Makefile
+++ b/drivers/net/ethernet/realtek/Makefile
@@ -6,5 +6,5 @@
obj-$(CONFIG_8139CP) += 8139cp.o
obj-$(CONFIG_8139TOO) += 8139too.o
obj-$(CONFIG_ATP) += atp.o
-r8169-objs += r8169_main.o r8169_firmware.o
+r8169-objs += r8169_main.o r8169_firmware.o r8169_phy_config.o
obj-$(CONFIG_R8169) += r8169.o
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index 58e0ca9093d3..9e3b35c97e63 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -204,7 +204,7 @@ static void net_rx(struct net_device *dev);
static void read_block(long ioaddr, int length, unsigned char *buffer, int data_mode);
static int net_close(struct net_device *dev);
static void set_rx_mode(struct net_device *dev);
-static void tx_timeout(struct net_device *dev);
+static void tx_timeout(struct net_device *dev, unsigned int txqueue);
/* A list of all installed ATP devices, for removing the driver module. */
@@ -533,7 +533,7 @@ static void write_packet(long ioaddr, int length, unsigned char *packet, int pad
outb(Ctrl_HNibWrite | Ctrl_SelData | Ctrl_IRQEN, ioaddr + PAR_CONTROL);
}
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
{
long ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/realtek/r8169.h b/drivers/net/ethernet/realtek/r8169.h
new file mode 100644
index 000000000000..22a6a057b11e
--- /dev/null
+++ b/drivers/net/ethernet/realtek/r8169.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* r8169.h: RealTek 8169/8168/8101 ethernet driver.
+ *
+ * Copyright (c) 2002 ShuChen <[email protected]>
+ * Copyright (c) 2003 - 2007 Francois Romieu <[email protected]>
+ * Copyright (c) a lot of people too. Please respect their work.
+ *
+ * See MAINTAINERS file for support contact information.
+ */
+
+#include <linux/types.h>
+#include <linux/phy.h>
+
+enum mac_version {
+ /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
+ RTL_GIGA_MAC_VER_02,
+ RTL_GIGA_MAC_VER_03,
+ RTL_GIGA_MAC_VER_04,
+ RTL_GIGA_MAC_VER_05,
+ RTL_GIGA_MAC_VER_06,
+ RTL_GIGA_MAC_VER_07,
+ RTL_GIGA_MAC_VER_08,
+ RTL_GIGA_MAC_VER_09,
+ RTL_GIGA_MAC_VER_10,
+ RTL_GIGA_MAC_VER_11,
+ RTL_GIGA_MAC_VER_12,
+ RTL_GIGA_MAC_VER_13,
+ RTL_GIGA_MAC_VER_14,
+ RTL_GIGA_MAC_VER_15,
+ RTL_GIGA_MAC_VER_16,
+ RTL_GIGA_MAC_VER_17,
+ RTL_GIGA_MAC_VER_18,
+ RTL_GIGA_MAC_VER_19,
+ RTL_GIGA_MAC_VER_20,
+ RTL_GIGA_MAC_VER_21,
+ RTL_GIGA_MAC_VER_22,
+ RTL_GIGA_MAC_VER_23,
+ RTL_GIGA_MAC_VER_24,
+ RTL_GIGA_MAC_VER_25,
+ RTL_GIGA_MAC_VER_26,
+ RTL_GIGA_MAC_VER_27,
+ RTL_GIGA_MAC_VER_28,
+ RTL_GIGA_MAC_VER_29,
+ RTL_GIGA_MAC_VER_30,
+ RTL_GIGA_MAC_VER_31,
+ RTL_GIGA_MAC_VER_32,
+ RTL_GIGA_MAC_VER_33,
+ RTL_GIGA_MAC_VER_34,
+ RTL_GIGA_MAC_VER_35,
+ RTL_GIGA_MAC_VER_36,
+ RTL_GIGA_MAC_VER_37,
+ RTL_GIGA_MAC_VER_38,
+ RTL_GIGA_MAC_VER_39,
+ RTL_GIGA_MAC_VER_40,
+ RTL_GIGA_MAC_VER_41,
+ RTL_GIGA_MAC_VER_42,
+ RTL_GIGA_MAC_VER_43,
+ RTL_GIGA_MAC_VER_44,
+ RTL_GIGA_MAC_VER_45,
+ RTL_GIGA_MAC_VER_46,
+ RTL_GIGA_MAC_VER_47,
+ RTL_GIGA_MAC_VER_48,
+ RTL_GIGA_MAC_VER_49,
+ RTL_GIGA_MAC_VER_50,
+ RTL_GIGA_MAC_VER_51,
+ RTL_GIGA_MAC_VER_52,
+ RTL_GIGA_MAC_VER_60,
+ RTL_GIGA_MAC_VER_61,
+ RTL_GIGA_MAC_NONE
+};
+
+struct rtl8169_private;
+
+void r8169_apply_firmware(struct rtl8169_private *tp);
+u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp);
+u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr);
+void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
+ enum mac_version ver);
diff --git a/drivers/net/ethernet/realtek/r8169_firmware.c b/drivers/net/ethernet/realtek/r8169_firmware.c
index 355cc810e322..cbc6b846ded5 100644
--- a/drivers/net/ethernet/realtek/r8169_firmware.c
+++ b/drivers/net/ethernet/realtek/r8169_firmware.c
@@ -37,7 +37,7 @@ struct fw_info {
u8 chksum;
} __packed;
-#define FW_OPCODE_SIZE FIELD_SIZEOF(struct rtl_fw_phy_action, code[0])
+#define FW_OPCODE_SIZE sizeof_field(struct rtl_fw_phy_action, code[0])
static bool rtl_fw_format_ok(struct rtl_fw *rtl_fw)
{
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 67a4d5d45e3a..6d699df7d27f 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -31,6 +31,7 @@
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
+#include "r8169.h"
#include "r8169_firmware.h"
#define MODULENAME "r8169"
@@ -84,64 +85,6 @@
#define RTL_R16(tp, reg) readw(tp->mmio_addr + (reg))
#define RTL_R32(tp, reg) readl(tp->mmio_addr + (reg))
-enum mac_version {
- /* support for ancient RTL_GIGA_MAC_VER_01 has been removed */
- RTL_GIGA_MAC_VER_02,
- RTL_GIGA_MAC_VER_03,
- RTL_GIGA_MAC_VER_04,
- RTL_GIGA_MAC_VER_05,
- RTL_GIGA_MAC_VER_06,
- RTL_GIGA_MAC_VER_07,
- RTL_GIGA_MAC_VER_08,
- RTL_GIGA_MAC_VER_09,
- RTL_GIGA_MAC_VER_10,
- RTL_GIGA_MAC_VER_11,
- RTL_GIGA_MAC_VER_12,
- RTL_GIGA_MAC_VER_13,
- RTL_GIGA_MAC_VER_14,
- RTL_GIGA_MAC_VER_15,
- RTL_GIGA_MAC_VER_16,
- RTL_GIGA_MAC_VER_17,
- RTL_GIGA_MAC_VER_18,
- RTL_GIGA_MAC_VER_19,
- RTL_GIGA_MAC_VER_20,
- RTL_GIGA_MAC_VER_21,
- RTL_GIGA_MAC_VER_22,
- RTL_GIGA_MAC_VER_23,
- RTL_GIGA_MAC_VER_24,
- RTL_GIGA_MAC_VER_25,
- RTL_GIGA_MAC_VER_26,
- RTL_GIGA_MAC_VER_27,
- RTL_GIGA_MAC_VER_28,
- RTL_GIGA_MAC_VER_29,
- RTL_GIGA_MAC_VER_30,
- RTL_GIGA_MAC_VER_31,
- RTL_GIGA_MAC_VER_32,
- RTL_GIGA_MAC_VER_33,
- RTL_GIGA_MAC_VER_34,
- RTL_GIGA_MAC_VER_35,
- RTL_GIGA_MAC_VER_36,
- RTL_GIGA_MAC_VER_37,
- RTL_GIGA_MAC_VER_38,
- RTL_GIGA_MAC_VER_39,
- RTL_GIGA_MAC_VER_40,
- RTL_GIGA_MAC_VER_41,
- RTL_GIGA_MAC_VER_42,
- RTL_GIGA_MAC_VER_43,
- RTL_GIGA_MAC_VER_44,
- RTL_GIGA_MAC_VER_45,
- RTL_GIGA_MAC_VER_46,
- RTL_GIGA_MAC_VER_47,
- RTL_GIGA_MAC_VER_48,
- RTL_GIGA_MAC_VER_49,
- RTL_GIGA_MAC_VER_50,
- RTL_GIGA_MAC_VER_51,
- RTL_GIGA_MAC_VER_52,
- RTL_GIGA_MAC_VER_60,
- RTL_GIGA_MAC_VER_61,
- RTL_GIGA_MAC_NONE
-};
-
#define JUMBO_1K ETH_DATA_LEN
#define JUMBO_4K (4*1024 - ETH_HLEN - 2)
#define JUMBO_6K (6*1024 - ETH_HLEN - 2)
@@ -492,6 +435,7 @@ enum rtl_register_content {
/* CPlusCmd p.31 */
EnableBist = (1 << 15), // 8168 8101
Mac_dbgo_oe = (1 << 14), // 8168 8101
+ EnAnaPLL = (1 << 14), // 8169
Normal_mode = (1 << 13), // unused
Force_half_dup = (1 << 12), // 8168 8101
Force_rxflow_en = (1 << 11), // 8168 8101
@@ -1078,52 +1022,6 @@ static int rtl_readphy(struct rtl8169_private *tp, int location)
}
}
-static void rtl_patchphy(struct rtl8169_private *tp, int reg_addr, int value)
-{
- rtl_writephy(tp, reg_addr, rtl_readphy(tp, reg_addr) | value);
-}
-
-static void rtl_w0w1_phy(struct rtl8169_private *tp, int reg_addr, int p, int m)
-{
- int val;
-
- val = rtl_readphy(tp, reg_addr);
- rtl_writephy(tp, reg_addr, (val & ~m) | p);
-}
-
-static void r8168d_modify_extpage(struct phy_device *phydev, int extpage,
- int reg, u16 mask, u16 val)
-{
- int oldpage = phy_select_page(phydev, 0x0007);
-
- __phy_write(phydev, 0x1e, extpage);
- __phy_modify(phydev, reg, mask, val);
-
- phy_restore_page(phydev, oldpage, 0);
-}
-
-static void r8168d_phy_param(struct phy_device *phydev, u16 parm,
- u16 mask, u16 val)
-{
- int oldpage = phy_select_page(phydev, 0x0005);
-
- __phy_write(phydev, 0x05, parm);
- __phy_modify(phydev, 0x06, mask, val);
-
- phy_restore_page(phydev, oldpage, 0);
-}
-
-static void r8168g_phy_param(struct phy_device *phydev, u16 parm,
- u16 mask, u16 val)
-{
- int oldpage = phy_select_page(phydev, 0x0a43);
-
- __phy_write(phydev, 0x13, parm);
- __phy_modify(phydev, 0x14, mask, val);
-
- phy_restore_page(phydev, oldpage, 0);
-}
-
DECLARE_RTL_COND(rtl_ephyar_cond)
{
return RTL_R32(tp, EPHYAR) & EPHYAR_FLAG;
@@ -1372,7 +1270,7 @@ DECLARE_RTL_COND(rtl_efusear_cond)
return RTL_R32(tp, EFUSEAR) & EFUSEAR_FLAG;
}
-static u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
+u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
{
RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
@@ -2268,22 +2166,6 @@ static void rtl8169_get_mac_version(struct rtl8169_private *tp)
}
}
-struct phy_reg {
- u16 reg;
- u16 val;
-};
-
-static void __rtl_writephy_batch(struct rtl8169_private *tp,
- const struct phy_reg *regs, int len)
-{
- while (len-- > 0) {
- rtl_writephy(tp, regs->reg, regs->val);
- regs++;
- }
-}
-
-#define rtl_writephy_batch(tp, a) __rtl_writephy_batch(tp, a, ARRAY_SIZE(a))
-
static void rtl_release_firmware(struct rtl8169_private *tp)
{
if (tp->rtl_fw) {
@@ -2293,7 +2175,7 @@ static void rtl_release_firmware(struct rtl8169_private *tp)
}
}
-static void rtl_apply_firmware(struct rtl8169_private *tp)
+void r8169_apply_firmware(struct rtl8169_private *tp)
{
/* TODO: release firmware if rtl_fw_write_firmware signals failure. */
if (tp->rtl_fw)
@@ -2315,594 +2197,6 @@ static void rtl8125_config_eee_mac(struct rtl8169_private *tp)
r8168_mac_ocp_modify(tp, 0xeb62, 0, BIT(2) | BIT(1));
}
-static void rtl8168f_config_eee_phy(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- r8168d_modify_extpage(phydev, 0x0020, 0x15, 0, BIT(8));
- r8168d_phy_param(phydev, 0x8b85, 0, BIT(13));
-}
-
-static void rtl8168g_config_eee_phy(struct rtl8169_private *tp)
-{
- phy_modify_paged(tp->phydev, 0x0a43, 0x11, 0, BIT(4));
-}
-
-static void rtl8168h_config_eee_phy(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- rtl8168g_config_eee_phy(tp);
-
- phy_modify_paged(phydev, 0xa4a, 0x11, 0x0000, 0x0200);
- phy_modify_paged(phydev, 0xa42, 0x14, 0x0000, 0x0080);
-}
-
-static void rtl8125_config_eee_phy(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- rtl8168h_config_eee_phy(tp);
-
- phy_modify_paged(phydev, 0xa6d, 0x12, 0x0001, 0x0000);
- phy_modify_paged(phydev, 0xa6d, 0x14, 0x0010, 0x0000);
-}
-
-static void rtl8169s_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0001 },
- { 0x06, 0x006e },
- { 0x08, 0x0708 },
- { 0x15, 0x4000 },
- { 0x18, 0x65c7 },
-
- { 0x1f, 0x0001 },
- { 0x03, 0x00a1 },
- { 0x02, 0x0008 },
- { 0x01, 0x0120 },
- { 0x00, 0x1000 },
- { 0x04, 0x0800 },
- { 0x04, 0x0000 },
-
- { 0x03, 0xff41 },
- { 0x02, 0xdf60 },
- { 0x01, 0x0140 },
- { 0x00, 0x0077 },
- { 0x04, 0x7800 },
- { 0x04, 0x7000 },
-
- { 0x03, 0x802f },
- { 0x02, 0x4f02 },
- { 0x01, 0x0409 },
- { 0x00, 0xf0f9 },
- { 0x04, 0x9800 },
- { 0x04, 0x9000 },
-
- { 0x03, 0xdf01 },
- { 0x02, 0xdf20 },
- { 0x01, 0xff95 },
- { 0x00, 0xba00 },
- { 0x04, 0xa800 },
- { 0x04, 0xa000 },
-
- { 0x03, 0xff41 },
- { 0x02, 0xdf20 },
- { 0x01, 0x0140 },
- { 0x00, 0x00bb },
- { 0x04, 0xb800 },
- { 0x04, 0xb000 },
-
- { 0x03, 0xdf41 },
- { 0x02, 0xdc60 },
- { 0x01, 0x6340 },
- { 0x00, 0x007d },
- { 0x04, 0xd800 },
- { 0x04, 0xd000 },
-
- { 0x03, 0xdf01 },
- { 0x02, 0xdf20 },
- { 0x01, 0x100a },
- { 0x00, 0xa0ff },
- { 0x04, 0xf800 },
- { 0x04, 0xf000 },
-
- { 0x1f, 0x0000 },
- { 0x0b, 0x0000 },
- { 0x00, 0x9200 }
- };
-
- rtl_writephy_batch(tp, phy_reg_init);
-}
-
-static void rtl8169sb_hw_phy_config(struct rtl8169_private *tp)
-{
- phy_write_paged(tp->phydev, 0x0002, 0x01, 0x90d0);
-}
-
-static void rtl8169scd_hw_phy_config_quirk(struct rtl8169_private *tp)
-{
- struct pci_dev *pdev = tp->pci_dev;
-
- if ((pdev->subsystem_vendor != PCI_VENDOR_ID_GIGABYTE) ||
- (pdev->subsystem_device != 0xe000))
- return;
-
- phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf01b);
-}
-
-static void rtl8169scd_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0001 },
- { 0x04, 0x0000 },
- { 0x03, 0x00a1 },
- { 0x02, 0x0008 },
- { 0x01, 0x0120 },
- { 0x00, 0x1000 },
- { 0x04, 0x0800 },
- { 0x04, 0x9000 },
- { 0x03, 0x802f },
- { 0x02, 0x4f02 },
- { 0x01, 0x0409 },
- { 0x00, 0xf099 },
- { 0x04, 0x9800 },
- { 0x04, 0xa000 },
- { 0x03, 0xdf01 },
- { 0x02, 0xdf20 },
- { 0x01, 0xff95 },
- { 0x00, 0xba00 },
- { 0x04, 0xa800 },
- { 0x04, 0xf000 },
- { 0x03, 0xdf01 },
- { 0x02, 0xdf20 },
- { 0x01, 0x101a },
- { 0x00, 0xa0ff },
- { 0x04, 0xf800 },
- { 0x04, 0x0000 },
- { 0x1f, 0x0000 },
-
- { 0x1f, 0x0001 },
- { 0x10, 0xf41b },
- { 0x14, 0xfb54 },
- { 0x18, 0xf5c7 },
- { 0x1f, 0x0000 },
-
- { 0x1f, 0x0001 },
- { 0x17, 0x0cc0 },
- { 0x1f, 0x0000 }
- };
-
- rtl_writephy_batch(tp, phy_reg_init);
-
- rtl8169scd_hw_phy_config_quirk(tp);
-}
-
-static void rtl8169sce_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0001 },
- { 0x04, 0x0000 },
- { 0x03, 0x00a1 },
- { 0x02, 0x0008 },
- { 0x01, 0x0120 },
- { 0x00, 0x1000 },
- { 0x04, 0x0800 },
- { 0x04, 0x9000 },
- { 0x03, 0x802f },
- { 0x02, 0x4f02 },
- { 0x01, 0x0409 },
- { 0x00, 0xf099 },
- { 0x04, 0x9800 },
- { 0x04, 0xa000 },
- { 0x03, 0xdf01 },
- { 0x02, 0xdf20 },
- { 0x01, 0xff95 },
- { 0x00, 0xba00 },
- { 0x04, 0xa800 },
- { 0x04, 0xf000 },
- { 0x03, 0xdf01 },
- { 0x02, 0xdf20 },
- { 0x01, 0x101a },
- { 0x00, 0xa0ff },
- { 0x04, 0xf800 },
- { 0x04, 0x0000 },
- { 0x1f, 0x0000 },
-
- { 0x1f, 0x0001 },
- { 0x0b, 0x8480 },
- { 0x1f, 0x0000 },
-
- { 0x1f, 0x0001 },
- { 0x18, 0x67c7 },
- { 0x04, 0x2000 },
- { 0x03, 0x002f },
- { 0x02, 0x4360 },
- { 0x01, 0x0109 },
- { 0x00, 0x3022 },
- { 0x04, 0x2800 },
- { 0x1f, 0x0000 },
-
- { 0x1f, 0x0001 },
- { 0x17, 0x0cc0 },
- { 0x1f, 0x0000 }
- };
-
- rtl_writephy_batch(tp, phy_reg_init);
-}
-
-static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp)
-{
- rtl_writephy(tp, 0x1f, 0x0001);
- rtl_patchphy(tp, 0x16, 1 << 0);
- rtl_writephy(tp, 0x10, 0xf41b);
- rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp)
-{
- phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf41b);
-}
-
-static void rtl8168cp_1_hw_phy_config(struct rtl8169_private *tp)
-{
- phy_write(tp->phydev, 0x1d, 0x0f00);
- phy_write_paged(tp->phydev, 0x0002, 0x0c, 0x1ec8);
-}
-
-static void rtl8168cp_2_hw_phy_config(struct rtl8169_private *tp)
-{
- phy_set_bits(tp->phydev, 0x14, BIT(5));
- phy_set_bits(tp->phydev, 0x0d, BIT(5));
- phy_write_paged(tp->phydev, 0x0001, 0x1d, 0x3d98);
-}
-
-static void rtl8168c_1_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0001 },
- { 0x12, 0x2300 },
- { 0x1f, 0x0002 },
- { 0x00, 0x88d4 },
- { 0x01, 0x82b1 },
- { 0x03, 0x7002 },
- { 0x08, 0x9e30 },
- { 0x09, 0x01f0 },
- { 0x0a, 0x5500 },
- { 0x0c, 0x00c8 },
- { 0x1f, 0x0003 },
- { 0x12, 0xc096 },
- { 0x16, 0x000a },
- { 0x1f, 0x0000 },
- { 0x1f, 0x0000 },
- { 0x09, 0x2000 },
- { 0x09, 0x0000 }
- };
-
- rtl_writephy_batch(tp, phy_reg_init);
-
- rtl_patchphy(tp, 0x14, 1 << 5);
- rtl_patchphy(tp, 0x0d, 1 << 5);
- rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8168c_2_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0001 },
- { 0x12, 0x2300 },
- { 0x03, 0x802f },
- { 0x02, 0x4f02 },
- { 0x01, 0x0409 },
- { 0x00, 0xf099 },
- { 0x04, 0x9800 },
- { 0x04, 0x9000 },
- { 0x1d, 0x3d98 },
- { 0x1f, 0x0002 },
- { 0x0c, 0x7eb8 },
- { 0x06, 0x0761 },
- { 0x1f, 0x0003 },
- { 0x16, 0x0f0a },
- { 0x1f, 0x0000 }
- };
-
- rtl_writephy_batch(tp, phy_reg_init);
-
- rtl_patchphy(tp, 0x16, 1 << 0);
- rtl_patchphy(tp, 0x14, 1 << 5);
- rtl_patchphy(tp, 0x0d, 1 << 5);
- rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8168c_3_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0001 },
- { 0x12, 0x2300 },
- { 0x1d, 0x3d98 },
- { 0x1f, 0x0002 },
- { 0x0c, 0x7eb8 },
- { 0x06, 0x5461 },
- { 0x1f, 0x0003 },
- { 0x16, 0x0f0a },
- { 0x1f, 0x0000 }
- };
-
- rtl_writephy_batch(tp, phy_reg_init);
-
- rtl_patchphy(tp, 0x16, 1 << 0);
- rtl_patchphy(tp, 0x14, 1 << 5);
- rtl_patchphy(tp, 0x0d, 1 << 5);
- rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
- /* Channel Estimation */
- { 0x1f, 0x0001 },
- { 0x06, 0x4064 },
- { 0x07, 0x2863 },
- { 0x08, 0x059c },
- { 0x09, 0x26b4 },
- { 0x0a, 0x6a19 },
- { 0x0b, 0xdcc8 },
- { 0x10, 0xf06d },
- { 0x14, 0x7f68 },
- { 0x18, 0x7fd9 },
- { 0x1c, 0xf0ff },
- { 0x1d, 0x3d9c },
- { 0x1f, 0x0003 },
- { 0x12, 0xf49f },
- { 0x13, 0x070b },
- { 0x1a, 0x05ad },
- { 0x14, 0x94c0 },
-
- /*
- * Tx Error Issue
- * Enhance line driver power
- */
- { 0x1f, 0x0002 },
- { 0x06, 0x5561 },
- { 0x1f, 0x0005 },
- { 0x05, 0x8332 },
- { 0x06, 0x5561 },
-
- /*
- * Can not link to 1Gbps with bad cable
- * Decrease SNR threshold form 21.07dB to 19.04dB
- */
- { 0x1f, 0x0001 },
- { 0x17, 0x0cc0 },
-
- { 0x1f, 0x0000 },
- { 0x0d, 0xf880 }
-};
-
-static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
- { 0x1f, 0x0002 },
- { 0x05, 0x669a },
- { 0x1f, 0x0005 },
- { 0x05, 0x8330 },
- { 0x06, 0x669a },
- { 0x1f, 0x0002 }
-};
-
-static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp, u16 val)
-{
- u16 reg_val;
-
- rtl_writephy(tp, 0x1f, 0x0005);
- rtl_writephy(tp, 0x05, 0x001b);
- reg_val = rtl_readphy(tp, 0x06);
- rtl_writephy(tp, 0x1f, 0x0000);
-
- if (reg_val != val)
- netif_warn(tp, hw, tp->dev, "chipset not ready for firmware\n");
- else
- rtl_apply_firmware(tp);
-}
-
-static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp)
-{
- rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
-
- /*
- * Rx Error Issue
- * Fine Tune Switching regulator parameter
- */
- rtl_writephy(tp, 0x1f, 0x0002);
- rtl_w0w1_phy(tp, 0x0b, 0x0010, 0x00ef);
- rtl_w0w1_phy(tp, 0x0c, 0xa200, 0x5d00);
-
- if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
- int val;
-
- rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
-
- val = rtl_readphy(tp, 0x0d);
-
- if ((val & 0x00ff) != 0x006c) {
- static const u32 set[] = {
- 0x0065, 0x0066, 0x0067, 0x0068,
- 0x0069, 0x006a, 0x006b, 0x006c
- };
- int i;
-
- rtl_writephy(tp, 0x1f, 0x0002);
-
- val &= 0xff00;
- for (i = 0; i < ARRAY_SIZE(set); i++)
- rtl_writephy(tp, 0x0d, val | set[i]);
- }
- } else {
- phy_write_paged(tp->phydev, 0x0002, 0x05, 0x6662);
- r8168d_phy_param(tp->phydev, 0x8330, 0xffff, 0x6662);
- }
-
- /* RSET couple improve */
- rtl_writephy(tp, 0x1f, 0x0002);
- rtl_patchphy(tp, 0x0d, 0x0300);
- rtl_patchphy(tp, 0x0f, 0x0010);
-
- /* Fine tune PLL performance */
- rtl_writephy(tp, 0x1f, 0x0002);
- rtl_w0w1_phy(tp, 0x02, 0x0100, 0x0600);
- rtl_w0w1_phy(tp, 0x03, 0x0000, 0xe000);
- rtl_writephy(tp, 0x1f, 0x0000);
-
- rtl8168d_apply_firmware_cond(tp, 0xbf00);
-}
-
-static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp)
-{
- rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_0);
-
- if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
- int val;
-
- rtl_writephy_batch(tp, rtl8168d_1_phy_reg_init_1);
-
- val = rtl_readphy(tp, 0x0d);
- if ((val & 0x00ff) != 0x006c) {
- static const u32 set[] = {
- 0x0065, 0x0066, 0x0067, 0x0068,
- 0x0069, 0x006a, 0x006b, 0x006c
- };
- int i;
-
- rtl_writephy(tp, 0x1f, 0x0002);
-
- val &= 0xff00;
- for (i = 0; i < ARRAY_SIZE(set); i++)
- rtl_writephy(tp, 0x0d, val | set[i]);
- }
- } else {
- phy_write_paged(tp->phydev, 0x0002, 0x05, 0x2642);
- r8168d_phy_param(tp->phydev, 0x8330, 0xffff, 0x2642);
- }
-
- /* Fine tune PLL performance */
- rtl_writephy(tp, 0x1f, 0x0002);
- rtl_w0w1_phy(tp, 0x02, 0x0100, 0x0600);
- rtl_w0w1_phy(tp, 0x03, 0x0000, 0xe000);
-
- /* Switching regulator Slew rate */
- rtl_writephy(tp, 0x1f, 0x0002);
- rtl_patchphy(tp, 0x0f, 0x0017);
- rtl_writephy(tp, 0x1f, 0x0000);
-
- rtl8168d_apply_firmware_cond(tp, 0xb300);
-}
-
-static void rtl8168d_3_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0002 },
- { 0x10, 0x0008 },
- { 0x0d, 0x006c },
-
- { 0x1f, 0x0000 },
- { 0x0d, 0xf880 },
-
- { 0x1f, 0x0001 },
- { 0x17, 0x0cc0 },
-
- { 0x1f, 0x0001 },
- { 0x0b, 0xa4d8 },
- { 0x09, 0x281c },
- { 0x07, 0x2883 },
- { 0x0a, 0x6b35 },
- { 0x1d, 0x3da4 },
- { 0x1c, 0xeffd },
- { 0x14, 0x7f52 },
- { 0x18, 0x7fc6 },
- { 0x08, 0x0601 },
- { 0x06, 0x4063 },
- { 0x10, 0xf074 },
- { 0x1f, 0x0003 },
- { 0x13, 0x0789 },
- { 0x12, 0xf4bd },
- { 0x1a, 0x04fd },
- { 0x14, 0x84b0 },
- { 0x1f, 0x0000 },
- { 0x00, 0x9200 },
-
- { 0x1f, 0x0005 },
- { 0x01, 0x0340 },
- { 0x1f, 0x0001 },
- { 0x04, 0x4000 },
- { 0x03, 0x1d21 },
- { 0x02, 0x0c32 },
- { 0x01, 0x0200 },
- { 0x00, 0x5554 },
- { 0x04, 0x4800 },
- { 0x04, 0x4000 },
- { 0x04, 0xf000 },
- { 0x03, 0xdf01 },
- { 0x02, 0xdf20 },
- { 0x01, 0x101a },
- { 0x00, 0xa0ff },
- { 0x04, 0xf800 },
- { 0x04, 0xf000 },
- { 0x1f, 0x0000 },
- };
-
- rtl_writephy_batch(tp, phy_reg_init);
-
- r8168d_modify_extpage(tp->phydev, 0x0023, 0x16, 0xffff, 0x0000);
-}
-
-static void rtl8168d_4_hw_phy_config(struct rtl8169_private *tp)
-{
- phy_write_paged(tp->phydev, 0x0001, 0x17, 0x0cc0);
- r8168d_modify_extpage(tp->phydev, 0x002d, 0x18, 0xffff, 0x0040);
- phy_set_bits(tp->phydev, 0x0d, BIT(5));
-}
-
-static void rtl8168e_1_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- /* Channel estimation fine tune */
- { 0x1f, 0x0001 },
- { 0x0b, 0x6c20 },
- { 0x07, 0x2872 },
- { 0x1c, 0xefff },
- { 0x1f, 0x0003 },
- { 0x14, 0x6420 },
- { 0x1f, 0x0000 },
- };
- struct phy_device *phydev = tp->phydev;
-
- rtl_apply_firmware(tp);
-
- /* Enable Delay cap */
- r8168d_phy_param(phydev, 0x8b80, 0xffff, 0xc896);
-
- rtl_writephy_batch(tp, phy_reg_init);
-
- /* Update PFM & 10M TX idle timer */
- r8168d_modify_extpage(phydev, 0x002f, 0x15, 0xffff, 0x1919);
-
- r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006);
-
- /* DCO enable for 10M IDLE Power */
- r8168d_modify_extpage(phydev, 0x0023, 0x17, 0x0000, 0x0006);
-
- /* For impedance matching */
- phy_modify_paged(phydev, 0x0002, 0x08, 0x7f00, 0x8000);
-
- /* PHY auto speed down */
- r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0050);
- phy_set_bits(phydev, 0x14, BIT(15));
-
- r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
- r8168d_phy_param(phydev, 0x8b85, 0x2000, 0x0000);
-
- r8168d_modify_extpage(phydev, 0x0020, 0x15, 0x1100, 0x0000);
- phy_write_paged(phydev, 0x0006, 0x00, 0x5a00);
-
- phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0000);
-}
-
static void rtl_rar_exgmac_set(struct rtl8169_private *tp, u8 *addr)
{
const u16 w[] = {
@@ -2917,698 +2211,20 @@ static void rtl_rar_exgmac_set(struct rtl8169_private *tp, u8 *addr)
rtl_eri_write(tp, 0xf4, ERIAR_MASK_1111, w[1] | (w[2] << 16));
}
-static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- rtl_apply_firmware(tp);
-
- /* Enable Delay cap */
- r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006);
-
- /* Channel estimation fine tune */
- phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
-
- /* Green Setting */
- r8168d_phy_param(phydev, 0x8b5b, 0xffff, 0x9222);
- r8168d_phy_param(phydev, 0x8b6d, 0xffff, 0x8000);
- r8168d_phy_param(phydev, 0x8b76, 0xffff, 0x8000);
-
- /* For 4-corner performance improve */
- rtl_writephy(tp, 0x1f, 0x0005);
- rtl_writephy(tp, 0x05, 0x8b80);
- rtl_w0w1_phy(tp, 0x17, 0x0006, 0x0000);
- rtl_writephy(tp, 0x1f, 0x0000);
-
- /* PHY auto speed down */
- r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010);
- phy_set_bits(phydev, 0x14, BIT(15));
-
- /* improve 10M EEE waveform */
- r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
-
- /* Improve 2-pair detection performance */
- r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
-
- rtl8168f_config_eee_phy(tp);
- rtl_enable_eee(tp);
-
- /* Green feature */
- rtl_writephy(tp, 0x1f, 0x0003);
- rtl_w0w1_phy(tp, 0x19, 0x0001, 0x0000);
- rtl_w0w1_phy(tp, 0x10, 0x0400, 0x0000);
- rtl_writephy(tp, 0x1f, 0x0000);
- rtl_writephy(tp, 0x1f, 0x0005);
- rtl_w0w1_phy(tp, 0x01, 0x0100, 0x0000);
- rtl_writephy(tp, 0x1f, 0x0000);
-
- /* Broken BIOS workaround: feed GigaMAC registers with MAC address. */
- rtl_rar_exgmac_set(tp, tp->dev->dev_addr);
-}
-
-static void rtl8168f_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- /* For 4-corner performance improve */
- r8168d_phy_param(phydev, 0x8b80, 0x0000, 0x0006);
-
- /* PHY auto speed down */
- r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010);
- phy_set_bits(phydev, 0x14, BIT(15));
-
- /* Improve 10M EEE waveform */
- r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
-
- rtl8168f_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- rtl_apply_firmware(tp);
-
- /* Channel estimation fine tune */
- phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
-
- /* Modify green table for giga & fnet */
- r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000);
- r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000);
- r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000);
- r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000);
- r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000);
- r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00fb);
-
- /* Modify green table for 10M */
- r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00);
-
- /* Disable hiimpedance detection (RTCT) */
- phy_write_paged(phydev, 0x0003, 0x01, 0x328a);
-
- rtl8168f_hw_phy_config(tp);
-
- /* Improve 2-pair detection performance */
- r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
-}
-
-static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp)
+u16 rtl8168h_2_get_adc_bias_ioffset(struct rtl8169_private *tp)
{
- rtl_apply_firmware(tp);
-
- rtl8168f_hw_phy_config(tp);
-}
-
-static void rtl8411_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- rtl_apply_firmware(tp);
-
- rtl8168f_hw_phy_config(tp);
-
- /* Improve 2-pair detection performance */
- r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
-
- /* Channel estimation fine tune */
- phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
-
- /* Modify green table for giga & fnet */
- r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000);
- r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000);
- r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000);
- r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000);
- r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000);
- r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00aa);
-
- /* Modify green table for 10M */
- r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00);
-
- /* Disable hiimpedance detection (RTCT) */
- phy_write_paged(phydev, 0x0003, 0x01, 0x328a);
-
- /* Modify green table for giga */
- r8168d_phy_param(phydev, 0x8b54, 0x0800, 0x0000);
- r8168d_phy_param(phydev, 0x8b5d, 0x0800, 0x0000);
- r8168d_phy_param(phydev, 0x8a7c, 0x0100, 0x0000);
- r8168d_phy_param(phydev, 0x8a7f, 0x0000, 0x0100);
- r8168d_phy_param(phydev, 0x8a82, 0x0100, 0x0000);
- r8168d_phy_param(phydev, 0x8a85, 0x0100, 0x0000);
- r8168d_phy_param(phydev, 0x8a88, 0x0100, 0x0000);
-
- /* uc same-seed solution */
- r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x8000);
-
- /* Green feature */
- rtl_writephy(tp, 0x1f, 0x0003);
- rtl_w0w1_phy(tp, 0x19, 0x0000, 0x0001);
- rtl_w0w1_phy(tp, 0x10, 0x0000, 0x0400);
- rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8168g_disable_aldps(struct rtl8169_private *tp)
-{
- phy_modify_paged(tp->phydev, 0x0a43, 0x10, BIT(2), 0);
-}
-
-static void rtl8168g_phy_adjust_10m_aldps(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0);
- phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6));
- r8168g_phy_param(phydev, 0x8084, 0x6000, 0x0000);
- phy_modify_paged(phydev, 0x0a43, 0x10, 0x0000, 0x1003);
-}
-
-static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp)
-{
- int ret;
-
- rtl_apply_firmware(tp);
-
- ret = phy_read_paged(tp->phydev, 0x0a46, 0x10);
- if (ret & BIT(8))
- phy_modify_paged(tp->phydev, 0x0bcc, 0x12, BIT(15), 0);
- else
- phy_modify_paged(tp->phydev, 0x0bcc, 0x12, 0, BIT(15));
-
- ret = phy_read_paged(tp->phydev, 0x0a46, 0x13);
- if (ret & BIT(8))
- phy_modify_paged(tp->phydev, 0x0c41, 0x15, 0, BIT(1));
- else
- phy_modify_paged(tp->phydev, 0x0c41, 0x15, BIT(1), 0);
-
- /* Enable PHY auto speed down */
- phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
-
- rtl8168g_phy_adjust_10m_aldps(tp);
-
- /* EEE auto-fallback function */
- phy_modify_paged(tp->phydev, 0x0a4b, 0x11, 0, BIT(2));
-
- /* Enable UC LPF tune function */
- r8168g_phy_param(tp->phydev, 0x8012, 0x0000, 0x8000);
-
- phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
-
- /* Improve SWR Efficiency */
- rtl_writephy(tp, 0x1f, 0x0bcd);
- rtl_writephy(tp, 0x14, 0x5065);
- rtl_writephy(tp, 0x14, 0xd065);
- rtl_writephy(tp, 0x1f, 0x0bc8);
- rtl_writephy(tp, 0x11, 0x5655);
- rtl_writephy(tp, 0x1f, 0x0bcd);
- rtl_writephy(tp, 0x14, 0x1065);
- rtl_writephy(tp, 0x14, 0x9065);
- rtl_writephy(tp, 0x14, 0x1065);
- rtl_writephy(tp, 0x1f, 0x0000);
-
- rtl8168g_disable_aldps(tp);
- rtl8168g_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp)
-{
- rtl_apply_firmware(tp);
- rtl8168g_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
- u16 dout_tapbin;
- u32 data;
-
- rtl_apply_firmware(tp);
-
- /* CHN EST parameters adjust - giga master */
- r8168g_phy_param(phydev, 0x809b, 0xf800, 0x8000);
- r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x8000);
- r8168g_phy_param(phydev, 0x80a4, 0xff00, 0x8500);
- r8168g_phy_param(phydev, 0x809c, 0xff00, 0xbd00);
-
- /* CHN EST parameters adjust - giga slave */
- r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x7000);
- r8168g_phy_param(phydev, 0x80b4, 0xff00, 0x5000);
- r8168g_phy_param(phydev, 0x80ac, 0xff00, 0x4000);
-
- /* CHN EST parameters adjust - fnet */
- r8168g_phy_param(phydev, 0x808e, 0xff00, 0x1200);
- r8168g_phy_param(phydev, 0x8090, 0xff00, 0xe500);
- r8168g_phy_param(phydev, 0x8092, 0xff00, 0x9f00);
-
- /* enable R-tune & PGA-retune function */
- dout_tapbin = 0;
- data = phy_read_paged(phydev, 0x0a46, 0x13);
- data &= 3;
- data <<= 2;
- dout_tapbin |= data;
- data = phy_read_paged(phydev, 0x0a46, 0x12);
- data &= 0xc000;
- data >>= 14;
- dout_tapbin |= data;
- dout_tapbin = ~(dout_tapbin^0x08);
- dout_tapbin <<= 12;
- dout_tapbin &= 0xf000;
-
- r8168g_phy_param(phydev, 0x827a, 0xf000, dout_tapbin);
- r8168g_phy_param(phydev, 0x827b, 0xf000, dout_tapbin);
- r8168g_phy_param(phydev, 0x827c, 0xf000, dout_tapbin);
- r8168g_phy_param(phydev, 0x827d, 0xf000, dout_tapbin);
- r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
- phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
-
- /* enable GPHY 10M */
- phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
-
- /* SAR ADC performance */
- phy_modify_paged(tp->phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14));
-
- r8168g_phy_param(phydev, 0x803f, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x8047, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x804f, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x8057, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x805f, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x8067, 0x3000, 0x0000);
- r8168g_phy_param(phydev, 0x806f, 0x3000, 0x0000);
-
- /* disable phy pfm mode */
- phy_modify_paged(tp->phydev, 0x0a44, 0x11, BIT(7), 0);
-
- rtl8168g_disable_aldps(tp);
- rtl8168h_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp)
-{
- u16 ioffset_p3, ioffset_p2, ioffset_p1, ioffset_p0;
- struct phy_device *phydev = tp->phydev;
- u16 rlen;
- u32 data;
-
- rtl_apply_firmware(tp);
-
- /* CHIN EST parameter update */
- r8168g_phy_param(phydev, 0x808a, 0x003f, 0x000a);
-
- /* enable R-tune & PGA-retune function */
- r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
- phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
-
- /* enable GPHY 10M */
- phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
+ u16 data1, data2, ioffset;
r8168_mac_ocp_write(tp, 0xdd02, 0x807d);
- data = r8168_mac_ocp_read(tp, 0xdd02);
- ioffset_p3 = ((data & 0x80)>>7);
- ioffset_p3 <<= 3;
-
- data = r8168_mac_ocp_read(tp, 0xdd00);
- ioffset_p3 |= ((data & (0xe000))>>13);
- ioffset_p2 = ((data & (0x1e00))>>9);
- ioffset_p1 = ((data & (0x01e0))>>5);
- ioffset_p0 = ((data & 0x0010)>>4);
- ioffset_p0 <<= 3;
- ioffset_p0 |= (data & (0x07));
- data = (ioffset_p3<<12)|(ioffset_p2<<8)|(ioffset_p1<<4)|(ioffset_p0);
-
- if ((ioffset_p3 != 0x0f) || (ioffset_p2 != 0x0f) ||
- (ioffset_p1 != 0x0f) || (ioffset_p0 != 0x0f))
- phy_write_paged(phydev, 0x0bcf, 0x16, data);
-
- /* Modify rlen (TX LPF corner frequency) level */
- data = phy_read_paged(phydev, 0x0bcd, 0x16);
- data &= 0x000f;
- rlen = 0;
- if (data > 3)
- rlen = data - 3;
- data = rlen | (rlen<<4) | (rlen<<8) | (rlen<<12);
- phy_write_paged(phydev, 0x0bcd, 0x17, data);
-
- /* disable phy pfm mode */
- phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
-
- rtl8168g_disable_aldps(tp);
- rtl8168g_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- /* Enable PHY auto speed down */
- phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
-
- rtl8168g_phy_adjust_10m_aldps(tp);
-
- /* Enable EEE auto-fallback function */
- phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2));
-
- /* Enable UC LPF tune function */
- r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
-
- /* set rg_sel_sdm_rate */
- phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
-
- rtl8168g_disable_aldps(tp);
- rtl8168g_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- rtl8168g_phy_adjust_10m_aldps(tp);
-
- /* Enable UC LPF tune function */
- r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
-
- /* Set rg_sel_sdm_rate */
- phy_modify_paged(tp->phydev, 0x0c42, 0x11, BIT(13), BIT(14));
-
- /* Channel estimation parameters */
- r8168g_phy_param(phydev, 0x80f3, 0xff00, 0x8b00);
- r8168g_phy_param(phydev, 0x80f0, 0xff00, 0x3a00);
- r8168g_phy_param(phydev, 0x80ef, 0xff00, 0x0500);
- r8168g_phy_param(phydev, 0x80f6, 0xff00, 0x6e00);
- r8168g_phy_param(phydev, 0x80ec, 0xff00, 0x6800);
- r8168g_phy_param(phydev, 0x80ed, 0xff00, 0x7c00);
- r8168g_phy_param(phydev, 0x80f2, 0xff00, 0xf400);
- r8168g_phy_param(phydev, 0x80f4, 0xff00, 0x8500);
- r8168g_phy_param(phydev, 0x8110, 0xff00, 0xa800);
- r8168g_phy_param(phydev, 0x810f, 0xff00, 0x1d00);
- r8168g_phy_param(phydev, 0x8111, 0xff00, 0xf500);
- r8168g_phy_param(phydev, 0x8113, 0xff00, 0x6100);
- r8168g_phy_param(phydev, 0x8115, 0xff00, 0x9200);
- r8168g_phy_param(phydev, 0x810e, 0xff00, 0x0400);
- r8168g_phy_param(phydev, 0x810c, 0xff00, 0x7c00);
- r8168g_phy_param(phydev, 0x810b, 0xff00, 0x5a00);
- r8168g_phy_param(phydev, 0x80d1, 0xff00, 0xff00);
- r8168g_phy_param(phydev, 0x80cd, 0xff00, 0x9e00);
- r8168g_phy_param(phydev, 0x80d3, 0xff00, 0x0e00);
- r8168g_phy_param(phydev, 0x80d5, 0xff00, 0xca00);
- r8168g_phy_param(phydev, 0x80d7, 0xff00, 0x8400);
-
- /* Force PWM-mode */
- rtl_writephy(tp, 0x1f, 0x0bcd);
- rtl_writephy(tp, 0x14, 0x5065);
- rtl_writephy(tp, 0x14, 0xd065);
- rtl_writephy(tp, 0x1f, 0x0bc8);
- rtl_writephy(tp, 0x12, 0x00ed);
- rtl_writephy(tp, 0x1f, 0x0bcd);
- rtl_writephy(tp, 0x14, 0x1065);
- rtl_writephy(tp, 0x14, 0x9065);
- rtl_writephy(tp, 0x14, 0x1065);
- rtl_writephy(tp, 0x1f, 0x0000);
-
- rtl8168g_disable_aldps(tp);
- rtl8168g_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8117_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- /* CHN EST parameters adjust - fnet */
- r8168g_phy_param(phydev, 0x808e, 0xff00, 0x4800);
- r8168g_phy_param(phydev, 0x8090, 0xff00, 0xcc00);
- r8168g_phy_param(phydev, 0x8092, 0xff00, 0xb000);
-
- r8168g_phy_param(phydev, 0x8088, 0xff00, 0x6000);
- r8168g_phy_param(phydev, 0x808b, 0x3f00, 0x0b00);
- r8168g_phy_param(phydev, 0x808d, 0x1f00, 0x0600);
- r8168g_phy_param(phydev, 0x808c, 0xff00, 0xb000);
- r8168g_phy_param(phydev, 0x80a0, 0xff00, 0x2800);
- r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x5000);
- r8168g_phy_param(phydev, 0x809b, 0xf800, 0xb000);
- r8168g_phy_param(phydev, 0x809a, 0xff00, 0x4b00);
- r8168g_phy_param(phydev, 0x809d, 0x3f00, 0x0800);
- r8168g_phy_param(phydev, 0x80a1, 0xff00, 0x7000);
- r8168g_phy_param(phydev, 0x809f, 0x1f00, 0x0300);
- r8168g_phy_param(phydev, 0x809e, 0xff00, 0x8800);
- r8168g_phy_param(phydev, 0x80b2, 0xff00, 0x2200);
- r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x9800);
- r8168g_phy_param(phydev, 0x80af, 0x3f00, 0x0800);
- r8168g_phy_param(phydev, 0x80b3, 0xff00, 0x6f00);
- r8168g_phy_param(phydev, 0x80b1, 0x1f00, 0x0300);
- r8168g_phy_param(phydev, 0x80b0, 0xff00, 0x9300);
-
- r8168g_phy_param(phydev, 0x8011, 0x0000, 0x0800);
-
- /* enable GPHY 10M */
- phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(11));
-
- r8168g_phy_param(phydev, 0x8016, 0x0000, 0x0400);
-
- rtl8168g_disable_aldps(tp);
- rtl8168h_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8102e_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0003 },
- { 0x08, 0x441d },
- { 0x01, 0x9100 },
- { 0x1f, 0x0000 }
- };
-
- rtl_writephy(tp, 0x1f, 0x0000);
- rtl_patchphy(tp, 0x11, 1 << 12);
- rtl_patchphy(tp, 0x19, 1 << 13);
- rtl_patchphy(tp, 0x10, 1 << 15);
-
- rtl_writephy_batch(tp, phy_reg_init);
-}
-
-static void rtl8105e_hw_phy_config(struct rtl8169_private *tp)
-{
- /* Disable ALDPS before ram code */
- phy_write(tp->phydev, 0x18, 0x0310);
- msleep(100);
-
- rtl_apply_firmware(tp);
-
- phy_write_paged(tp->phydev, 0x0005, 0x1a, 0x0000);
- phy_write_paged(tp->phydev, 0x0004, 0x1c, 0x0000);
- phy_write_paged(tp->phydev, 0x0001, 0x15, 0x7701);
-}
-
-static void rtl8402_hw_phy_config(struct rtl8169_private *tp)
-{
- /* Disable ALDPS before setting firmware */
- phy_write(tp->phydev, 0x18, 0x0310);
- msleep(20);
-
- rtl_apply_firmware(tp);
-
- /* EEE setting */
- rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
- rtl_writephy(tp, 0x1f, 0x0004);
- rtl_writephy(tp, 0x10, 0x401f);
- rtl_writephy(tp, 0x19, 0x7030);
- rtl_writephy(tp, 0x1f, 0x0000);
-}
-
-static void rtl8106e_hw_phy_config(struct rtl8169_private *tp)
-{
- static const struct phy_reg phy_reg_init[] = {
- { 0x1f, 0x0004 },
- { 0x10, 0xc07f },
- { 0x19, 0x7030 },
- { 0x1f, 0x0000 }
- };
+ data1 = r8168_mac_ocp_read(tp, 0xdd02);
+ data2 = r8168_mac_ocp_read(tp, 0xdd00);
- /* Disable ALDPS before ram code */
- phy_write(tp->phydev, 0x18, 0x0310);
- msleep(100);
+ ioffset = (data2 >> 1) & 0x7ff8;
+ ioffset |= data2 & 0x0007;
+ if (data1 & BIT(7))
+ ioffset |= BIT(15);
- rtl_apply_firmware(tp);
-
- rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
- rtl_writephy_batch(tp, phy_reg_init);
-
- rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
-}
-
-static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
-
- phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084);
- phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
- phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006);
- phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
- phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100);
- phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000);
- phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400);
- phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff);
- phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff);
-
- r8168g_phy_param(phydev, 0x80ea, 0xff00, 0xc400);
- r8168g_phy_param(phydev, 0x80eb, 0x0700, 0x0300);
- r8168g_phy_param(phydev, 0x80f8, 0xff00, 0x1c00);
- r8168g_phy_param(phydev, 0x80f1, 0xff00, 0x3000);
- r8168g_phy_param(phydev, 0x80fe, 0xff00, 0xa500);
- r8168g_phy_param(phydev, 0x8102, 0xff00, 0x5000);
- r8168g_phy_param(phydev, 0x8105, 0xff00, 0x3300);
- r8168g_phy_param(phydev, 0x8100, 0xff00, 0x7000);
- r8168g_phy_param(phydev, 0x8104, 0xff00, 0xf000);
- r8168g_phy_param(phydev, 0x8106, 0xff00, 0x6500);
- r8168g_phy_param(phydev, 0x80dc, 0xff00, 0xed00);
- r8168g_phy_param(phydev, 0x80df, 0x0000, 0x0100);
- r8168g_phy_param(phydev, 0x80e1, 0x0100, 0x0000);
-
- phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038);
- r8168g_phy_param(phydev, 0x819f, 0xffff, 0xd0b6);
-
- phy_write_paged(phydev, 0xbc3, 0x12, 0x5555);
- phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00);
- phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000);
- phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
-
- rtl8125_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp)
-{
- struct phy_device *phydev = tp->phydev;
- int i;
-
- phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
- phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x03ff);
- phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
- phy_modify_paged(phydev, 0xac0, 0x14, 0x1100, 0x0000);
- phy_modify_paged(phydev, 0xacc, 0x10, 0x0003, 0x0002);
- phy_modify_paged(phydev, 0xad4, 0x10, 0x00e7, 0x0044);
- phy_modify_paged(phydev, 0xac1, 0x12, 0x0080, 0x0000);
- phy_modify_paged(phydev, 0xac8, 0x10, 0x0300, 0x0000);
- phy_modify_paged(phydev, 0xac5, 0x17, 0x0007, 0x0002);
- phy_write_paged(phydev, 0xad4, 0x16, 0x00a8);
- phy_write_paged(phydev, 0xac5, 0x16, 0x01ff);
- phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030);
-
- phy_write(phydev, 0x1f, 0x0b87);
- phy_write(phydev, 0x16, 0x80a2);
- phy_write(phydev, 0x17, 0x0153);
- phy_write(phydev, 0x16, 0x809c);
- phy_write(phydev, 0x17, 0x0153);
- phy_write(phydev, 0x1f, 0x0000);
-
- phy_write(phydev, 0x1f, 0x0a43);
- phy_write(phydev, 0x13, 0x81B3);
- phy_write(phydev, 0x14, 0x0043);
- phy_write(phydev, 0x14, 0x00A7);
- phy_write(phydev, 0x14, 0x00D6);
- phy_write(phydev, 0x14, 0x00EC);
- phy_write(phydev, 0x14, 0x00F6);
- phy_write(phydev, 0x14, 0x00FB);
- phy_write(phydev, 0x14, 0x00FD);
- phy_write(phydev, 0x14, 0x00FF);
- phy_write(phydev, 0x14, 0x00BB);
- phy_write(phydev, 0x14, 0x0058);
- phy_write(phydev, 0x14, 0x0029);
- phy_write(phydev, 0x14, 0x0013);
- phy_write(phydev, 0x14, 0x0009);
- phy_write(phydev, 0x14, 0x0004);
- phy_write(phydev, 0x14, 0x0002);
- for (i = 0; i < 25; i++)
- phy_write(phydev, 0x14, 0x0000);
- phy_write(phydev, 0x1f, 0x0000);
-
- r8168g_phy_param(phydev, 0x8257, 0xffff, 0x020F);
- r8168g_phy_param(phydev, 0x80ea, 0xffff, 0x7843);
-
- rtl_apply_firmware(tp);
-
- phy_modify_paged(phydev, 0xd06, 0x14, 0x0000, 0x2000);
-
- r8168g_phy_param(phydev, 0x81a2, 0x0000, 0x0100);
-
- phy_modify_paged(phydev, 0xb54, 0x16, 0xff00, 0xdb00);
- phy_modify_paged(phydev, 0xa45, 0x12, 0x0001, 0x0000);
- phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020);
- phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000);
- phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000);
- phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
-
- rtl8125_config_eee_phy(tp);
- rtl_enable_eee(tp);
-}
-
-static void rtl_hw_phy_config(struct net_device *dev)
-{
- static const rtl_generic_fct phy_configs[] = {
- /* PCI devices. */
- [RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
- [RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
- [RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
- [RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config,
- [RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config,
- /* PCI-E devices. */
- [RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config,
- [RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config,
- [RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config,
- [RTL_GIGA_MAC_VER_10] = NULL,
- [RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config,
- [RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config,
- [RTL_GIGA_MAC_VER_13] = NULL,
- [RTL_GIGA_MAC_VER_14] = NULL,
- [RTL_GIGA_MAC_VER_15] = NULL,
- [RTL_GIGA_MAC_VER_16] = NULL,
- [RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config,
- [RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config,
- [RTL_GIGA_MAC_VER_22] = rtl8168c_3_hw_phy_config,
- [RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config,
- [RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config,
- [RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config,
- [RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config,
- [RTL_GIGA_MAC_VER_31] = NULL,
- [RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config,
- [RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config,
- [RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config,
- [RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_41] = NULL,
- [RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
- [RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config,
- [RTL_GIGA_MAC_VER_60] = rtl8125_1_hw_phy_config,
- [RTL_GIGA_MAC_VER_61] = rtl8125_2_hw_phy_config,
- };
- struct rtl8169_private *tp = netdev_priv(dev);
-
- if (phy_configs[tp->mac_version])
- phy_configs[tp->mac_version](tp);
+ return ioffset;
}
static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
@@ -3617,21 +2233,28 @@ static void rtl_schedule_task(struct rtl8169_private *tp, enum rtl_flag flag)
schedule_work(&tp->wk.work);
}
-static void rtl8169_init_phy(struct net_device *dev, struct rtl8169_private *tp)
+static void rtl8169_init_phy(struct rtl8169_private *tp)
{
- rtl_hw_phy_config(dev);
+ r8169_hw_phy_config(tp, tp->phydev, tp->mac_version);
if (tp->mac_version <= RTL_GIGA_MAC_VER_06) {
pci_write_config_byte(tp->pci_dev, PCI_LATENCY_TIMER, 0x40);
pci_write_config_byte(tp->pci_dev, PCI_CACHE_LINE_SIZE, 0x08);
- netif_dbg(tp, drv, dev,
- "Set MAC Reg C+CR Offset 0x82h = 0x01h\n");
+ /* set undocumented MAC Reg C+CR Offset 0x82h */
RTL_W8(tp, 0x82, 0x01);
}
+ if (tp->mac_version == RTL_GIGA_MAC_VER_05 &&
+ tp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_GIGABYTE &&
+ tp->pci_dev->subsystem_device == 0xe000)
+ phy_write_paged(tp->phydev, 0x0001, 0x10, 0xf01b);
+
/* We may have called phy_speed_down before */
phy_speed_up(tp->phydev);
+ if (rtl_supports_eee(tp))
+ rtl_enable_eee(tp);
+
genphy_soft_reset(tp->phydev);
}
@@ -4710,9 +3333,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
rtl_pcie_state_l2l3_disable(tp);
- rtl_writephy(tp, 0x1f, 0x0c42);
- rg_saw_cnt = (rtl_readphy(tp, 0x13) & 0x3fff);
- rtl_writephy(tp, 0x1f, 0x0000);
+ rg_saw_cnt = phy_read_paged(tp->phydev, 0x0c42, 0x13) & 0x3fff;
if (rg_saw_cnt > 0) {
u16 sw_cnt_1ms_ini;
@@ -4887,7 +3508,7 @@ static void rtl_hw_start_8117(struct rtl8169_private *tp)
r8168_mac_ocp_write(tp, 0xc09e, 0x0000);
/* firmware is for MAC only */
- rtl_apply_firmware(tp);
+ r8169_apply_firmware(tp);
rtl_hw_aspm_clkreq_enable(tp, true);
}
@@ -4991,6 +3612,9 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00);
+ /* disable EEE */
+ rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
+
rtl_pcie_state_l2l3_disable(tp);
}
@@ -5005,6 +3629,11 @@ static void rtl_hw_start_8106(struct rtl8169_private *tp)
RTL_W8(tp, MCU, RTL_R8(tp, MCU) | EN_NDP | EN_OOB_RESET);
RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~PFM_EN);
+ rtl_eri_write(tp, 0x1d0, ERIAR_MASK_0011, 0x0000);
+
+ /* disable EEE */
+ rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
+
rtl_pcie_state_l2l3_disable(tp);
rtl_hw_aspm_clkreq_enable(tp, true);
}
@@ -5222,11 +3851,8 @@ static void rtl_hw_start_8169(struct rtl8169_private *tp)
tp->cp_cmd |= PCIMulRW;
if (tp->mac_version == RTL_GIGA_MAC_VER_02 ||
- tp->mac_version == RTL_GIGA_MAC_VER_03) {
- netif_dbg(tp, drv, tp->dev,
- "Set MAC Reg C+CR Offset 0xe0. Bit 3 and Bit 14 MUST be 1\n");
- tp->cp_cmd |= (1 << 14);
- }
+ tp->mac_version == RTL_GIGA_MAC_VER_03)
+ tp->cp_cmd |= EnAnaPLL;
RTL_W16(tp, CPlusCmd, tp->cp_cmd);
@@ -5435,7 +4061,7 @@ static void rtl_reset_work(struct rtl8169_private *tp)
netif_wake_queue(dev);
}
-static void rtl8169_tx_timeout(struct net_device *dev)
+static void rtl8169_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct rtl8169_private *tp = netdev_priv(dev);
@@ -6240,7 +4866,7 @@ static int rtl_open(struct net_device *dev)
napi_enable(&tp->napi);
- rtl8169_init_phy(dev, tp);
+ rtl8169_init_phy(tp);
rtl_pll_power_up(tp);
@@ -6371,7 +4997,7 @@ static void __rtl8169_resume(struct net_device *dev)
netif_device_attach(dev);
rtl_pll_power_up(tp);
- rtl8169_init_phy(dev, tp);
+ rtl8169_init_phy(tp);
phy_start(tp->phydev);
@@ -6825,6 +5451,15 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
int chipset, region;
int jumbo_max, rc;
+ /* Some tools for creating an initramfs don't consider softdeps, then
+ * r8169.ko may be in initramfs, but realtek.ko not. Then the generic
+ * PHY driver is used that doesn't work with most chip versions.
+ */
+ if (!driver_find("RTL8201CP Ethernet", &mdio_bus_type)) {
+ dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n");
+ return -ENOENT;
+ }
+
dev = devm_alloc_etherdev(&pdev->dev, sizeof (*tp));
if (!dev)
return -ENOMEM;
diff --git a/drivers/net/ethernet/realtek/r8169_phy_config.c b/drivers/net/ethernet/realtek/r8169_phy_config.c
new file mode 100644
index 000000000000..e367e77c773b
--- /dev/null
+++ b/drivers/net/ethernet/realtek/r8169_phy_config.c
@@ -0,0 +1,1307 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * r8169_phy_config.c: RealTek 8169/8168/8101 ethernet driver.
+ *
+ * Copyright (c) 2002 ShuChen <[email protected]>
+ * Copyright (c) 2003 - 2007 Francois Romieu <[email protected]>
+ * Copyright (c) a lot of people too. Please respect their work.
+ *
+ * See MAINTAINERS file for support contact information.
+ */
+
+#include <linux/delay.h>
+#include <linux/phy.h>
+
+#include "r8169.h"
+
+typedef void (*rtl_phy_cfg_fct)(struct rtl8169_private *tp,
+ struct phy_device *phydev);
+
+static void r8168d_modify_extpage(struct phy_device *phydev, int extpage,
+ int reg, u16 mask, u16 val)
+{
+ int oldpage = phy_select_page(phydev, 0x0007);
+
+ __phy_write(phydev, 0x1e, extpage);
+ __phy_modify(phydev, reg, mask, val);
+
+ phy_restore_page(phydev, oldpage, 0);
+}
+
+static void r8168d_phy_param(struct phy_device *phydev, u16 parm,
+ u16 mask, u16 val)
+{
+ int oldpage = phy_select_page(phydev, 0x0005);
+
+ __phy_write(phydev, 0x05, parm);
+ __phy_modify(phydev, 0x06, mask, val);
+
+ phy_restore_page(phydev, oldpage, 0);
+}
+
+static void r8168g_phy_param(struct phy_device *phydev, u16 parm,
+ u16 mask, u16 val)
+{
+ int oldpage = phy_select_page(phydev, 0x0a43);
+
+ __phy_write(phydev, 0x13, parm);
+ __phy_modify(phydev, 0x14, mask, val);
+
+ phy_restore_page(phydev, oldpage, 0);
+}
+
+struct phy_reg {
+ u16 reg;
+ u16 val;
+};
+
+static void __rtl_writephy_batch(struct phy_device *phydev,
+ const struct phy_reg *regs, int len)
+{
+ phy_lock_mdio_bus(phydev);
+
+ while (len-- > 0) {
+ __phy_write(phydev, regs->reg, regs->val);
+ regs++;
+ }
+
+ phy_unlock_mdio_bus(phydev);
+}
+
+#define rtl_writephy_batch(p, a) __rtl_writephy_batch(p, a, ARRAY_SIZE(a))
+
+static void rtl8168f_config_eee_phy(struct phy_device *phydev)
+{
+ r8168d_modify_extpage(phydev, 0x0020, 0x15, 0, BIT(8));
+ r8168d_phy_param(phydev, 0x8b85, 0, BIT(13));
+}
+
+static void rtl8168g_config_eee_phy(struct phy_device *phydev)
+{
+ phy_modify_paged(phydev, 0x0a43, 0x11, 0, BIT(4));
+}
+
+static void rtl8168h_config_eee_phy(struct phy_device *phydev)
+{
+ rtl8168g_config_eee_phy(phydev);
+
+ phy_modify_paged(phydev, 0xa4a, 0x11, 0x0000, 0x0200);
+ phy_modify_paged(phydev, 0xa42, 0x14, 0x0000, 0x0080);
+}
+
+static void rtl8125_config_eee_phy(struct phy_device *phydev)
+{
+ rtl8168h_config_eee_phy(phydev);
+
+ phy_modify_paged(phydev, 0xa6d, 0x12, 0x0001, 0x0000);
+ phy_modify_paged(phydev, 0xa6d, 0x14, 0x0010, 0x0000);
+}
+
+static void rtl8169s_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0001 },
+ { 0x06, 0x006e },
+ { 0x08, 0x0708 },
+ { 0x15, 0x4000 },
+ { 0x18, 0x65c7 },
+
+ { 0x1f, 0x0001 },
+ { 0x03, 0x00a1 },
+ { 0x02, 0x0008 },
+ { 0x01, 0x0120 },
+ { 0x00, 0x1000 },
+ { 0x04, 0x0800 },
+ { 0x04, 0x0000 },
+
+ { 0x03, 0xff41 },
+ { 0x02, 0xdf60 },
+ { 0x01, 0x0140 },
+ { 0x00, 0x0077 },
+ { 0x04, 0x7800 },
+ { 0x04, 0x7000 },
+
+ { 0x03, 0x802f },
+ { 0x02, 0x4f02 },
+ { 0x01, 0x0409 },
+ { 0x00, 0xf0f9 },
+ { 0x04, 0x9800 },
+ { 0x04, 0x9000 },
+
+ { 0x03, 0xdf01 },
+ { 0x02, 0xdf20 },
+ { 0x01, 0xff95 },
+ { 0x00, 0xba00 },
+ { 0x04, 0xa800 },
+ { 0x04, 0xa000 },
+
+ { 0x03, 0xff41 },
+ { 0x02, 0xdf20 },
+ { 0x01, 0x0140 },
+ { 0x00, 0x00bb },
+ { 0x04, 0xb800 },
+ { 0x04, 0xb000 },
+
+ { 0x03, 0xdf41 },
+ { 0x02, 0xdc60 },
+ { 0x01, 0x6340 },
+ { 0x00, 0x007d },
+ { 0x04, 0xd800 },
+ { 0x04, 0xd000 },
+
+ { 0x03, 0xdf01 },
+ { 0x02, 0xdf20 },
+ { 0x01, 0x100a },
+ { 0x00, 0xa0ff },
+ { 0x04, 0xf800 },
+ { 0x04, 0xf000 },
+
+ { 0x1f, 0x0000 },
+ { 0x0b, 0x0000 },
+ { 0x00, 0x9200 }
+ };
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8169sb_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ phy_write_paged(phydev, 0x0002, 0x01, 0x90d0);
+}
+
+static void rtl8169scd_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0001 },
+ { 0x04, 0x0000 },
+ { 0x03, 0x00a1 },
+ { 0x02, 0x0008 },
+ { 0x01, 0x0120 },
+ { 0x00, 0x1000 },
+ { 0x04, 0x0800 },
+ { 0x04, 0x9000 },
+ { 0x03, 0x802f },
+ { 0x02, 0x4f02 },
+ { 0x01, 0x0409 },
+ { 0x00, 0xf099 },
+ { 0x04, 0x9800 },
+ { 0x04, 0xa000 },
+ { 0x03, 0xdf01 },
+ { 0x02, 0xdf20 },
+ { 0x01, 0xff95 },
+ { 0x00, 0xba00 },
+ { 0x04, 0xa800 },
+ { 0x04, 0xf000 },
+ { 0x03, 0xdf01 },
+ { 0x02, 0xdf20 },
+ { 0x01, 0x101a },
+ { 0x00, 0xa0ff },
+ { 0x04, 0xf800 },
+ { 0x04, 0x0000 },
+ { 0x1f, 0x0000 },
+
+ { 0x1f, 0x0001 },
+ { 0x10, 0xf41b },
+ { 0x14, 0xfb54 },
+ { 0x18, 0xf5c7 },
+ { 0x1f, 0x0000 },
+
+ { 0x1f, 0x0001 },
+ { 0x17, 0x0cc0 },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8169sce_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0001 },
+ { 0x04, 0x0000 },
+ { 0x03, 0x00a1 },
+ { 0x02, 0x0008 },
+ { 0x01, 0x0120 },
+ { 0x00, 0x1000 },
+ { 0x04, 0x0800 },
+ { 0x04, 0x9000 },
+ { 0x03, 0x802f },
+ { 0x02, 0x4f02 },
+ { 0x01, 0x0409 },
+ { 0x00, 0xf099 },
+ { 0x04, 0x9800 },
+ { 0x04, 0xa000 },
+ { 0x03, 0xdf01 },
+ { 0x02, 0xdf20 },
+ { 0x01, 0xff95 },
+ { 0x00, 0xba00 },
+ { 0x04, 0xa800 },
+ { 0x04, 0xf000 },
+ { 0x03, 0xdf01 },
+ { 0x02, 0xdf20 },
+ { 0x01, 0x101a },
+ { 0x00, 0xa0ff },
+ { 0x04, 0xf800 },
+ { 0x04, 0x0000 },
+ { 0x1f, 0x0000 },
+
+ { 0x1f, 0x0001 },
+ { 0x0b, 0x8480 },
+ { 0x1f, 0x0000 },
+
+ { 0x1f, 0x0001 },
+ { 0x18, 0x67c7 },
+ { 0x04, 0x2000 },
+ { 0x03, 0x002f },
+ { 0x02, 0x4360 },
+ { 0x01, 0x0109 },
+ { 0x00, 0x3022 },
+ { 0x04, 0x2800 },
+ { 0x1f, 0x0000 },
+
+ { 0x1f, 0x0001 },
+ { 0x17, 0x0cc0 },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8168bb_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ phy_write(phydev, 0x1f, 0x0001);
+ phy_set_bits(phydev, 0x16, BIT(0));
+ phy_write(phydev, 0x10, 0xf41b);
+ phy_write(phydev, 0x1f, 0x0000);
+}
+
+static void rtl8168bef_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ phy_write_paged(phydev, 0x0001, 0x10, 0xf41b);
+}
+
+static void rtl8168cp_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ phy_write(phydev, 0x1d, 0x0f00);
+ phy_write_paged(phydev, 0x0002, 0x0c, 0x1ec8);
+}
+
+static void rtl8168cp_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ phy_set_bits(phydev, 0x14, BIT(5));
+ phy_set_bits(phydev, 0x0d, BIT(5));
+ phy_write_paged(phydev, 0x0001, 0x1d, 0x3d98);
+}
+
+static void rtl8168c_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0001 },
+ { 0x12, 0x2300 },
+ { 0x1f, 0x0002 },
+ { 0x00, 0x88d4 },
+ { 0x01, 0x82b1 },
+ { 0x03, 0x7002 },
+ { 0x08, 0x9e30 },
+ { 0x09, 0x01f0 },
+ { 0x0a, 0x5500 },
+ { 0x0c, 0x00c8 },
+ { 0x1f, 0x0003 },
+ { 0x12, 0xc096 },
+ { 0x16, 0x000a },
+ { 0x1f, 0x0000 },
+ { 0x1f, 0x0000 },
+ { 0x09, 0x2000 },
+ { 0x09, 0x0000 }
+ };
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+
+ phy_set_bits(phydev, 0x14, BIT(5));
+ phy_set_bits(phydev, 0x0d, BIT(5));
+}
+
+static void rtl8168c_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0001 },
+ { 0x12, 0x2300 },
+ { 0x03, 0x802f },
+ { 0x02, 0x4f02 },
+ { 0x01, 0x0409 },
+ { 0x00, 0xf099 },
+ { 0x04, 0x9800 },
+ { 0x04, 0x9000 },
+ { 0x1d, 0x3d98 },
+ { 0x1f, 0x0002 },
+ { 0x0c, 0x7eb8 },
+ { 0x06, 0x0761 },
+ { 0x1f, 0x0003 },
+ { 0x16, 0x0f0a },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+
+ phy_set_bits(phydev, 0x16, BIT(0));
+ phy_set_bits(phydev, 0x14, BIT(5));
+ phy_set_bits(phydev, 0x0d, BIT(5));
+}
+
+static void rtl8168c_3_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0001 },
+ { 0x12, 0x2300 },
+ { 0x1d, 0x3d98 },
+ { 0x1f, 0x0002 },
+ { 0x0c, 0x7eb8 },
+ { 0x06, 0x5461 },
+ { 0x1f, 0x0003 },
+ { 0x16, 0x0f0a },
+ { 0x1f, 0x0000 }
+ };
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+
+ phy_set_bits(phydev, 0x16, BIT(0));
+ phy_set_bits(phydev, 0x14, BIT(5));
+ phy_set_bits(phydev, 0x0d, BIT(5));
+}
+
+static const struct phy_reg rtl8168d_1_phy_reg_init_0[] = {
+ /* Channel Estimation */
+ { 0x1f, 0x0001 },
+ { 0x06, 0x4064 },
+ { 0x07, 0x2863 },
+ { 0x08, 0x059c },
+ { 0x09, 0x26b4 },
+ { 0x0a, 0x6a19 },
+ { 0x0b, 0xdcc8 },
+ { 0x10, 0xf06d },
+ { 0x14, 0x7f68 },
+ { 0x18, 0x7fd9 },
+ { 0x1c, 0xf0ff },
+ { 0x1d, 0x3d9c },
+ { 0x1f, 0x0003 },
+ { 0x12, 0xf49f },
+ { 0x13, 0x070b },
+ { 0x1a, 0x05ad },
+ { 0x14, 0x94c0 },
+
+ /*
+ * Tx Error Issue
+ * Enhance line driver power
+ */
+ { 0x1f, 0x0002 },
+ { 0x06, 0x5561 },
+ { 0x1f, 0x0005 },
+ { 0x05, 0x8332 },
+ { 0x06, 0x5561 },
+
+ /*
+ * Can not link to 1Gbps with bad cable
+ * Decrease SNR threshold form 21.07dB to 19.04dB
+ */
+ { 0x1f, 0x0001 },
+ { 0x17, 0x0cc0 },
+
+ { 0x1f, 0x0000 },
+ { 0x0d, 0xf880 }
+};
+
+static const struct phy_reg rtl8168d_1_phy_reg_init_1[] = {
+ { 0x1f, 0x0002 },
+ { 0x05, 0x669a },
+ { 0x1f, 0x0005 },
+ { 0x05, 0x8330 },
+ { 0x06, 0x669a },
+ { 0x1f, 0x0002 }
+};
+
+static void rtl8168d_apply_firmware_cond(struct rtl8169_private *tp,
+ struct phy_device *phydev,
+ u16 val)
+{
+ u16 reg_val;
+
+ phy_write(phydev, 0x1f, 0x0005);
+ phy_write(phydev, 0x05, 0x001b);
+ reg_val = phy_read(phydev, 0x06);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ if (reg_val != val)
+ phydev_warn(phydev, "chipset not ready for firmware\n");
+ else
+ r8169_apply_firmware(tp);
+}
+
+static void rtl8168d_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0);
+
+ /*
+ * Rx Error Issue
+ * Fine Tune Switching regulator parameter
+ */
+ phy_write(phydev, 0x1f, 0x0002);
+ phy_modify(phydev, 0x0b, 0x00ef, 0x0010);
+ phy_modify(phydev, 0x0c, 0x5d00, 0xa200);
+
+ if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
+ int val;
+
+ rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1);
+
+ val = phy_read(phydev, 0x0d);
+
+ if ((val & 0x00ff) != 0x006c) {
+ static const u32 set[] = {
+ 0x0065, 0x0066, 0x0067, 0x0068,
+ 0x0069, 0x006a, 0x006b, 0x006c
+ };
+ int i;
+
+ phy_write(phydev, 0x1f, 0x0002);
+
+ val &= 0xff00;
+ for (i = 0; i < ARRAY_SIZE(set); i++)
+ phy_write(phydev, 0x0d, val | set[i]);
+ }
+ } else {
+ phy_write_paged(phydev, 0x0002, 0x05, 0x6662);
+ r8168d_phy_param(phydev, 0x8330, 0xffff, 0x6662);
+ }
+
+ /* RSET couple improve */
+ phy_write(phydev, 0x1f, 0x0002);
+ phy_set_bits(phydev, 0x0d, 0x0300);
+ phy_set_bits(phydev, 0x0f, 0x0010);
+
+ /* Fine tune PLL performance */
+ phy_write(phydev, 0x1f, 0x0002);
+ phy_modify(phydev, 0x02, 0x0600, 0x0100);
+ phy_clear_bits(phydev, 0x03, 0xe000);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ rtl8168d_apply_firmware_cond(tp, phydev, 0xbf00);
+}
+
+static void rtl8168d_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_0);
+
+ if (rtl8168d_efuse_read(tp, 0x01) == 0xb1) {
+ int val;
+
+ rtl_writephy_batch(phydev, rtl8168d_1_phy_reg_init_1);
+
+ val = phy_read(phydev, 0x0d);
+ if ((val & 0x00ff) != 0x006c) {
+ static const u32 set[] = {
+ 0x0065, 0x0066, 0x0067, 0x0068,
+ 0x0069, 0x006a, 0x006b, 0x006c
+ };
+ int i;
+
+ phy_write(phydev, 0x1f, 0x0002);
+
+ val &= 0xff00;
+ for (i = 0; i < ARRAY_SIZE(set); i++)
+ phy_write(phydev, 0x0d, val | set[i]);
+ }
+ } else {
+ phy_write_paged(phydev, 0x0002, 0x05, 0x2642);
+ r8168d_phy_param(phydev, 0x8330, 0xffff, 0x2642);
+ }
+
+ /* Fine tune PLL performance */
+ phy_write(phydev, 0x1f, 0x0002);
+ phy_modify(phydev, 0x02, 0x0600, 0x0100);
+ phy_clear_bits(phydev, 0x03, 0xe000);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ /* Switching regulator Slew rate */
+ phy_modify_paged(phydev, 0x0002, 0x0f, 0x0000, 0x0017);
+
+ rtl8168d_apply_firmware_cond(tp, phydev, 0xb300);
+}
+
+static void rtl8168d_3_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0002 },
+ { 0x10, 0x0008 },
+ { 0x0d, 0x006c },
+
+ { 0x1f, 0x0000 },
+ { 0x0d, 0xf880 },
+
+ { 0x1f, 0x0001 },
+ { 0x17, 0x0cc0 },
+
+ { 0x1f, 0x0001 },
+ { 0x0b, 0xa4d8 },
+ { 0x09, 0x281c },
+ { 0x07, 0x2883 },
+ { 0x0a, 0x6b35 },
+ { 0x1d, 0x3da4 },
+ { 0x1c, 0xeffd },
+ { 0x14, 0x7f52 },
+ { 0x18, 0x7fc6 },
+ { 0x08, 0x0601 },
+ { 0x06, 0x4063 },
+ { 0x10, 0xf074 },
+ { 0x1f, 0x0003 },
+ { 0x13, 0x0789 },
+ { 0x12, 0xf4bd },
+ { 0x1a, 0x04fd },
+ { 0x14, 0x84b0 },
+ { 0x1f, 0x0000 },
+ { 0x00, 0x9200 },
+
+ { 0x1f, 0x0005 },
+ { 0x01, 0x0340 },
+ { 0x1f, 0x0001 },
+ { 0x04, 0x4000 },
+ { 0x03, 0x1d21 },
+ { 0x02, 0x0c32 },
+ { 0x01, 0x0200 },
+ { 0x00, 0x5554 },
+ { 0x04, 0x4800 },
+ { 0x04, 0x4000 },
+ { 0x04, 0xf000 },
+ { 0x03, 0xdf01 },
+ { 0x02, 0xdf20 },
+ { 0x01, 0x101a },
+ { 0x00, 0xa0ff },
+ { 0x04, 0xf800 },
+ { 0x04, 0xf000 },
+ { 0x1f, 0x0000 },
+ };
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+ r8168d_modify_extpage(phydev, 0x0023, 0x16, 0xffff, 0x0000);
+}
+
+static void rtl8168d_4_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ phy_write_paged(phydev, 0x0001, 0x17, 0x0cc0);
+ r8168d_modify_extpage(phydev, 0x002d, 0x18, 0xffff, 0x0040);
+ phy_set_bits(phydev, 0x0d, BIT(5));
+}
+
+static void rtl8168e_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ /* Channel estimation fine tune */
+ { 0x1f, 0x0001 },
+ { 0x0b, 0x6c20 },
+ { 0x07, 0x2872 },
+ { 0x1c, 0xefff },
+ { 0x1f, 0x0003 },
+ { 0x14, 0x6420 },
+ { 0x1f, 0x0000 },
+ };
+
+ r8169_apply_firmware(tp);
+
+ /* Enable Delay cap */
+ r8168d_phy_param(phydev, 0x8b80, 0xffff, 0xc896);
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+
+ /* Update PFM & 10M TX idle timer */
+ r8168d_modify_extpage(phydev, 0x002f, 0x15, 0xffff, 0x1919);
+
+ r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006);
+
+ /* DCO enable for 10M IDLE Power */
+ r8168d_modify_extpage(phydev, 0x0023, 0x17, 0x0000, 0x0006);
+
+ /* For impedance matching */
+ phy_modify_paged(phydev, 0x0002, 0x08, 0x7f00, 0x8000);
+
+ /* PHY auto speed down */
+ r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0050);
+ phy_set_bits(phydev, 0x14, BIT(15));
+
+ r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
+ r8168d_phy_param(phydev, 0x8b85, 0x2000, 0x0000);
+
+ r8168d_modify_extpage(phydev, 0x0020, 0x15, 0x1100, 0x0000);
+ phy_write_paged(phydev, 0x0006, 0x00, 0x5a00);
+
+ phy_write_mmd(phydev, MDIO_MMD_AN, MDIO_AN_EEE_ADV, 0x0000);
+}
+
+static void rtl8168e_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ r8169_apply_firmware(tp);
+
+ /* Enable Delay cap */
+ r8168d_modify_extpage(phydev, 0x00ac, 0x18, 0xffff, 0x0006);
+
+ /* Channel estimation fine tune */
+ phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
+
+ /* Green Setting */
+ r8168d_phy_param(phydev, 0x8b5b, 0xffff, 0x9222);
+ r8168d_phy_param(phydev, 0x8b6d, 0xffff, 0x8000);
+ r8168d_phy_param(phydev, 0x8b76, 0xffff, 0x8000);
+
+ /* For 4-corner performance improve */
+ phy_write(phydev, 0x1f, 0x0005);
+ phy_write(phydev, 0x05, 0x8b80);
+ phy_set_bits(phydev, 0x17, 0x0006);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ /* PHY auto speed down */
+ r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010);
+ phy_set_bits(phydev, 0x14, BIT(15));
+
+ /* improve 10M EEE waveform */
+ r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
+
+ /* Improve 2-pair detection performance */
+ r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
+
+ rtl8168f_config_eee_phy(phydev);
+
+ /* Green feature */
+ phy_write(phydev, 0x1f, 0x0003);
+ phy_set_bits(phydev, 0x19, BIT(0));
+ phy_set_bits(phydev, 0x10, BIT(10));
+ phy_write(phydev, 0x1f, 0x0000);
+ phy_modify_paged(phydev, 0x0005, 0x01, 0, BIT(8));
+}
+
+static void rtl8168f_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ /* For 4-corner performance improve */
+ r8168d_phy_param(phydev, 0x8b80, 0x0000, 0x0006);
+
+ /* PHY auto speed down */
+ r8168d_modify_extpage(phydev, 0x002d, 0x18, 0x0000, 0x0010);
+ phy_set_bits(phydev, 0x14, BIT(15));
+
+ /* Improve 10M EEE waveform */
+ r8168d_phy_param(phydev, 0x8b86, 0x0000, 0x0001);
+
+ rtl8168f_config_eee_phy(phydev);
+}
+
+static void rtl8168f_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ r8169_apply_firmware(tp);
+
+ /* Channel estimation fine tune */
+ phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
+
+ /* Modify green table for giga & fnet */
+ r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000);
+ r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000);
+ r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000);
+ r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000);
+ r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000);
+ r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00fb);
+
+ /* Modify green table for 10M */
+ r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00);
+
+ /* Disable hiimpedance detection (RTCT) */
+ phy_write_paged(phydev, 0x0003, 0x01, 0x328a);
+
+ rtl8168f_hw_phy_config(tp, phydev);
+
+ /* Improve 2-pair detection performance */
+ r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
+}
+
+static void rtl8168f_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ r8169_apply_firmware(tp);
+
+ rtl8168f_hw_phy_config(tp, phydev);
+}
+
+static void rtl8411_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ r8169_apply_firmware(tp);
+
+ rtl8168f_hw_phy_config(tp, phydev);
+
+ /* Improve 2-pair detection performance */
+ r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x4000);
+
+ /* Channel estimation fine tune */
+ phy_write_paged(phydev, 0x0003, 0x09, 0xa20f);
+
+ /* Modify green table for giga & fnet */
+ r8168d_phy_param(phydev, 0x8b55, 0xffff, 0x0000);
+ r8168d_phy_param(phydev, 0x8b5e, 0xffff, 0x0000);
+ r8168d_phy_param(phydev, 0x8b67, 0xffff, 0x0000);
+ r8168d_phy_param(phydev, 0x8b70, 0xffff, 0x0000);
+ r8168d_modify_extpage(phydev, 0x0078, 0x17, 0xffff, 0x0000);
+ r8168d_modify_extpage(phydev, 0x0078, 0x19, 0xffff, 0x00aa);
+
+ /* Modify green table for 10M */
+ r8168d_phy_param(phydev, 0x8b79, 0xffff, 0xaa00);
+
+ /* Disable hiimpedance detection (RTCT) */
+ phy_write_paged(phydev, 0x0003, 0x01, 0x328a);
+
+ /* Modify green table for giga */
+ r8168d_phy_param(phydev, 0x8b54, 0x0800, 0x0000);
+ r8168d_phy_param(phydev, 0x8b5d, 0x0800, 0x0000);
+ r8168d_phy_param(phydev, 0x8a7c, 0x0100, 0x0000);
+ r8168d_phy_param(phydev, 0x8a7f, 0x0000, 0x0100);
+ r8168d_phy_param(phydev, 0x8a82, 0x0100, 0x0000);
+ r8168d_phy_param(phydev, 0x8a85, 0x0100, 0x0000);
+ r8168d_phy_param(phydev, 0x8a88, 0x0100, 0x0000);
+
+ /* uc same-seed solution */
+ r8168d_phy_param(phydev, 0x8b85, 0x0000, 0x8000);
+
+ /* Green feature */
+ phy_write(phydev, 0x1f, 0x0003);
+ phy_clear_bits(phydev, 0x19, BIT(0));
+ phy_clear_bits(phydev, 0x10, BIT(10));
+ phy_write(phydev, 0x1f, 0x0000);
+}
+
+static void rtl8168g_disable_aldps(struct phy_device *phydev)
+{
+ phy_modify_paged(phydev, 0x0a43, 0x10, BIT(2), 0);
+}
+
+static void rtl8168g_phy_adjust_10m_aldps(struct phy_device *phydev)
+{
+ phy_modify_paged(phydev, 0x0bcc, 0x14, BIT(8), 0);
+ phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(7) | BIT(6));
+ r8168g_phy_param(phydev, 0x8084, 0x6000, 0x0000);
+ phy_modify_paged(phydev, 0x0a43, 0x10, 0x0000, 0x1003);
+}
+
+static void rtl8168g_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ int ret;
+
+ r8169_apply_firmware(tp);
+
+ ret = phy_read_paged(phydev, 0x0a46, 0x10);
+ if (ret & BIT(8))
+ phy_modify_paged(phydev, 0x0bcc, 0x12, BIT(15), 0);
+ else
+ phy_modify_paged(phydev, 0x0bcc, 0x12, 0, BIT(15));
+
+ ret = phy_read_paged(phydev, 0x0a46, 0x13);
+ if (ret & BIT(8))
+ phy_modify_paged(phydev, 0x0c41, 0x15, 0, BIT(1));
+ else
+ phy_modify_paged(phydev, 0x0c41, 0x15, BIT(1), 0);
+
+ /* Enable PHY auto speed down */
+ phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
+
+ rtl8168g_phy_adjust_10m_aldps(phydev);
+
+ /* EEE auto-fallback function */
+ phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2));
+
+ /* Enable UC LPF tune function */
+ r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
+
+ phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
+
+ /* Improve SWR Efficiency */
+ phy_write(phydev, 0x1f, 0x0bcd);
+ phy_write(phydev, 0x14, 0x5065);
+ phy_write(phydev, 0x14, 0xd065);
+ phy_write(phydev, 0x1f, 0x0bc8);
+ phy_write(phydev, 0x11, 0x5655);
+ phy_write(phydev, 0x1f, 0x0bcd);
+ phy_write(phydev, 0x14, 0x1065);
+ phy_write(phydev, 0x14, 0x9065);
+ phy_write(phydev, 0x14, 0x1065);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ rtl8168g_disable_aldps(phydev);
+ rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8168g_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ r8169_apply_firmware(tp);
+ rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8168h_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ u16 dout_tapbin;
+ u32 data;
+
+ r8169_apply_firmware(tp);
+
+ /* CHN EST parameters adjust - giga master */
+ r8168g_phy_param(phydev, 0x809b, 0xf800, 0x8000);
+ r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x8000);
+ r8168g_phy_param(phydev, 0x80a4, 0xff00, 0x8500);
+ r8168g_phy_param(phydev, 0x809c, 0xff00, 0xbd00);
+
+ /* CHN EST parameters adjust - giga slave */
+ r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x7000);
+ r8168g_phy_param(phydev, 0x80b4, 0xff00, 0x5000);
+ r8168g_phy_param(phydev, 0x80ac, 0xff00, 0x4000);
+
+ /* CHN EST parameters adjust - fnet */
+ r8168g_phy_param(phydev, 0x808e, 0xff00, 0x1200);
+ r8168g_phy_param(phydev, 0x8090, 0xff00, 0xe500);
+ r8168g_phy_param(phydev, 0x8092, 0xff00, 0x9f00);
+
+ /* enable R-tune & PGA-retune function */
+ dout_tapbin = 0;
+ data = phy_read_paged(phydev, 0x0a46, 0x13);
+ data &= 3;
+ data <<= 2;
+ dout_tapbin |= data;
+ data = phy_read_paged(phydev, 0x0a46, 0x12);
+ data &= 0xc000;
+ data >>= 14;
+ dout_tapbin |= data;
+ dout_tapbin = ~(dout_tapbin ^ 0x08);
+ dout_tapbin <<= 12;
+ dout_tapbin &= 0xf000;
+
+ r8168g_phy_param(phydev, 0x827a, 0xf000, dout_tapbin);
+ r8168g_phy_param(phydev, 0x827b, 0xf000, dout_tapbin);
+ r8168g_phy_param(phydev, 0x827c, 0xf000, dout_tapbin);
+ r8168g_phy_param(phydev, 0x827d, 0xf000, dout_tapbin);
+ r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
+ phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
+
+ /* enable GPHY 10M */
+ phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11));
+
+ /* SAR ADC performance */
+ phy_modify_paged(phydev, 0x0bca, 0x17, BIT(12) | BIT(13), BIT(14));
+
+ r8168g_phy_param(phydev, 0x803f, 0x3000, 0x0000);
+ r8168g_phy_param(phydev, 0x8047, 0x3000, 0x0000);
+ r8168g_phy_param(phydev, 0x804f, 0x3000, 0x0000);
+ r8168g_phy_param(phydev, 0x8057, 0x3000, 0x0000);
+ r8168g_phy_param(phydev, 0x805f, 0x3000, 0x0000);
+ r8168g_phy_param(phydev, 0x8067, 0x3000, 0x0000);
+ r8168g_phy_param(phydev, 0x806f, 0x3000, 0x0000);
+
+ /* disable phy pfm mode */
+ phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
+
+ rtl8168g_disable_aldps(phydev);
+ rtl8168h_config_eee_phy(phydev);
+}
+
+static void rtl8168h_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ u16 ioffset, rlen;
+ u32 data;
+
+ r8169_apply_firmware(tp);
+
+ /* CHIN EST parameter update */
+ r8168g_phy_param(phydev, 0x808a, 0x003f, 0x000a);
+
+ /* enable R-tune & PGA-retune function */
+ r8168g_phy_param(phydev, 0x0811, 0x0000, 0x0800);
+ phy_modify_paged(phydev, 0x0a42, 0x16, 0x0000, 0x0002);
+
+ /* enable GPHY 10M */
+ phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11));
+
+ ioffset = rtl8168h_2_get_adc_bias_ioffset(tp);
+ if (ioffset != 0xffff)
+ phy_write_paged(phydev, 0x0bcf, 0x16, ioffset);
+
+ /* Modify rlen (TX LPF corner frequency) level */
+ data = phy_read_paged(phydev, 0x0bcd, 0x16);
+ data &= 0x000f;
+ rlen = 0;
+ if (data > 3)
+ rlen = data - 3;
+ data = rlen | (rlen << 4) | (rlen << 8) | (rlen << 12);
+ phy_write_paged(phydev, 0x0bcd, 0x17, data);
+
+ /* disable phy pfm mode */
+ phy_modify_paged(phydev, 0x0a44, 0x11, BIT(7), 0);
+
+ rtl8168g_disable_aldps(phydev);
+ rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8168ep_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ /* Enable PHY auto speed down */
+ phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
+
+ rtl8168g_phy_adjust_10m_aldps(phydev);
+
+ /* Enable EEE auto-fallback function */
+ phy_modify_paged(phydev, 0x0a4b, 0x11, 0, BIT(2));
+
+ /* Enable UC LPF tune function */
+ r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
+
+ /* set rg_sel_sdm_rate */
+ phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
+
+ rtl8168g_disable_aldps(phydev);
+ rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8168ep_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ rtl8168g_phy_adjust_10m_aldps(phydev);
+
+ /* Enable UC LPF tune function */
+ r8168g_phy_param(phydev, 0x8012, 0x0000, 0x8000);
+
+ /* Set rg_sel_sdm_rate */
+ phy_modify_paged(phydev, 0x0c42, 0x11, BIT(13), BIT(14));
+
+ /* Channel estimation parameters */
+ r8168g_phy_param(phydev, 0x80f3, 0xff00, 0x8b00);
+ r8168g_phy_param(phydev, 0x80f0, 0xff00, 0x3a00);
+ r8168g_phy_param(phydev, 0x80ef, 0xff00, 0x0500);
+ r8168g_phy_param(phydev, 0x80f6, 0xff00, 0x6e00);
+ r8168g_phy_param(phydev, 0x80ec, 0xff00, 0x6800);
+ r8168g_phy_param(phydev, 0x80ed, 0xff00, 0x7c00);
+ r8168g_phy_param(phydev, 0x80f2, 0xff00, 0xf400);
+ r8168g_phy_param(phydev, 0x80f4, 0xff00, 0x8500);
+ r8168g_phy_param(phydev, 0x8110, 0xff00, 0xa800);
+ r8168g_phy_param(phydev, 0x810f, 0xff00, 0x1d00);
+ r8168g_phy_param(phydev, 0x8111, 0xff00, 0xf500);
+ r8168g_phy_param(phydev, 0x8113, 0xff00, 0x6100);
+ r8168g_phy_param(phydev, 0x8115, 0xff00, 0x9200);
+ r8168g_phy_param(phydev, 0x810e, 0xff00, 0x0400);
+ r8168g_phy_param(phydev, 0x810c, 0xff00, 0x7c00);
+ r8168g_phy_param(phydev, 0x810b, 0xff00, 0x5a00);
+ r8168g_phy_param(phydev, 0x80d1, 0xff00, 0xff00);
+ r8168g_phy_param(phydev, 0x80cd, 0xff00, 0x9e00);
+ r8168g_phy_param(phydev, 0x80d3, 0xff00, 0x0e00);
+ r8168g_phy_param(phydev, 0x80d5, 0xff00, 0xca00);
+ r8168g_phy_param(phydev, 0x80d7, 0xff00, 0x8400);
+
+ /* Force PWM-mode */
+ phy_write(phydev, 0x1f, 0x0bcd);
+ phy_write(phydev, 0x14, 0x5065);
+ phy_write(phydev, 0x14, 0xd065);
+ phy_write(phydev, 0x1f, 0x0bc8);
+ phy_write(phydev, 0x12, 0x00ed);
+ phy_write(phydev, 0x1f, 0x0bcd);
+ phy_write(phydev, 0x14, 0x1065);
+ phy_write(phydev, 0x14, 0x9065);
+ phy_write(phydev, 0x14, 0x1065);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ rtl8168g_disable_aldps(phydev);
+ rtl8168g_config_eee_phy(phydev);
+}
+
+static void rtl8117_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ /* CHN EST parameters adjust - fnet */
+ r8168g_phy_param(phydev, 0x808e, 0xff00, 0x4800);
+ r8168g_phy_param(phydev, 0x8090, 0xff00, 0xcc00);
+ r8168g_phy_param(phydev, 0x8092, 0xff00, 0xb000);
+
+ r8168g_phy_param(phydev, 0x8088, 0xff00, 0x6000);
+ r8168g_phy_param(phydev, 0x808b, 0x3f00, 0x0b00);
+ r8168g_phy_param(phydev, 0x808d, 0x1f00, 0x0600);
+ r8168g_phy_param(phydev, 0x808c, 0xff00, 0xb000);
+ r8168g_phy_param(phydev, 0x80a0, 0xff00, 0x2800);
+ r8168g_phy_param(phydev, 0x80a2, 0xff00, 0x5000);
+ r8168g_phy_param(phydev, 0x809b, 0xf800, 0xb000);
+ r8168g_phy_param(phydev, 0x809a, 0xff00, 0x4b00);
+ r8168g_phy_param(phydev, 0x809d, 0x3f00, 0x0800);
+ r8168g_phy_param(phydev, 0x80a1, 0xff00, 0x7000);
+ r8168g_phy_param(phydev, 0x809f, 0x1f00, 0x0300);
+ r8168g_phy_param(phydev, 0x809e, 0xff00, 0x8800);
+ r8168g_phy_param(phydev, 0x80b2, 0xff00, 0x2200);
+ r8168g_phy_param(phydev, 0x80ad, 0xf800, 0x9800);
+ r8168g_phy_param(phydev, 0x80af, 0x3f00, 0x0800);
+ r8168g_phy_param(phydev, 0x80b3, 0xff00, 0x6f00);
+ r8168g_phy_param(phydev, 0x80b1, 0x1f00, 0x0300);
+ r8168g_phy_param(phydev, 0x80b0, 0xff00, 0x9300);
+
+ r8168g_phy_param(phydev, 0x8011, 0x0000, 0x0800);
+
+ /* enable GPHY 10M */
+ phy_modify_paged(phydev, 0x0a44, 0x11, 0, BIT(11));
+
+ r8168g_phy_param(phydev, 0x8016, 0x0000, 0x0400);
+
+ rtl8168g_disable_aldps(phydev);
+ rtl8168h_config_eee_phy(phydev);
+}
+
+static void rtl8102e_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0003 },
+ { 0x08, 0x441d },
+ { 0x01, 0x9100 },
+ { 0x1f, 0x0000 }
+ };
+
+ phy_set_bits(phydev, 0x11, BIT(12));
+ phy_set_bits(phydev, 0x19, BIT(13));
+ phy_set_bits(phydev, 0x10, BIT(15));
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8105e_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ /* Disable ALDPS before ram code */
+ phy_write(phydev, 0x18, 0x0310);
+ msleep(100);
+
+ r8169_apply_firmware(tp);
+
+ phy_write_paged(phydev, 0x0005, 0x1a, 0x0000);
+ phy_write_paged(phydev, 0x0004, 0x1c, 0x0000);
+ phy_write_paged(phydev, 0x0001, 0x15, 0x7701);
+}
+
+static void rtl8402_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ /* Disable ALDPS before setting firmware */
+ phy_write(phydev, 0x18, 0x0310);
+ msleep(20);
+
+ r8169_apply_firmware(tp);
+
+ /* EEE setting */
+ phy_write(phydev, 0x1f, 0x0004);
+ phy_write(phydev, 0x10, 0x401f);
+ phy_write(phydev, 0x19, 0x7030);
+ phy_write(phydev, 0x1f, 0x0000);
+}
+
+static void rtl8106e_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ static const struct phy_reg phy_reg_init[] = {
+ { 0x1f, 0x0004 },
+ { 0x10, 0xc07f },
+ { 0x19, 0x7030 },
+ { 0x1f, 0x0000 }
+ };
+
+ /* Disable ALDPS before ram code */
+ phy_write(phydev, 0x18, 0x0310);
+ msleep(100);
+
+ r8169_apply_firmware(tp);
+
+ rtl_writephy_batch(phydev, phy_reg_init);
+}
+
+static void rtl8125_1_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ phy_modify_paged(phydev, 0xad4, 0x10, 0x03ff, 0x0084);
+ phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
+ phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x0006);
+ phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
+ phy_modify_paged(phydev, 0xac0, 0x14, 0x0000, 0x1100);
+ phy_modify_paged(phydev, 0xac8, 0x15, 0xf000, 0x7000);
+ phy_modify_paged(phydev, 0xad1, 0x14, 0x0000, 0x0400);
+ phy_modify_paged(phydev, 0xad1, 0x15, 0x0000, 0x03ff);
+ phy_modify_paged(phydev, 0xad1, 0x16, 0x0000, 0x03ff);
+
+ r8168g_phy_param(phydev, 0x80ea, 0xff00, 0xc400);
+ r8168g_phy_param(phydev, 0x80eb, 0x0700, 0x0300);
+ r8168g_phy_param(phydev, 0x80f8, 0xff00, 0x1c00);
+ r8168g_phy_param(phydev, 0x80f1, 0xff00, 0x3000);
+ r8168g_phy_param(phydev, 0x80fe, 0xff00, 0xa500);
+ r8168g_phy_param(phydev, 0x8102, 0xff00, 0x5000);
+ r8168g_phy_param(phydev, 0x8105, 0xff00, 0x3300);
+ r8168g_phy_param(phydev, 0x8100, 0xff00, 0x7000);
+ r8168g_phy_param(phydev, 0x8104, 0xff00, 0xf000);
+ r8168g_phy_param(phydev, 0x8106, 0xff00, 0x6500);
+ r8168g_phy_param(phydev, 0x80dc, 0xff00, 0xed00);
+ r8168g_phy_param(phydev, 0x80df, 0x0000, 0x0100);
+ r8168g_phy_param(phydev, 0x80e1, 0x0100, 0x0000);
+
+ phy_modify_paged(phydev, 0xbf0, 0x13, 0x003f, 0x0038);
+ r8168g_phy_param(phydev, 0x819f, 0xffff, 0xd0b6);
+
+ phy_write_paged(phydev, 0xbc3, 0x12, 0x5555);
+ phy_modify_paged(phydev, 0xbf0, 0x15, 0x0e00, 0x0a00);
+ phy_modify_paged(phydev, 0xa5c, 0x10, 0x0400, 0x0000);
+ phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+
+ rtl8125_config_eee_phy(phydev);
+}
+
+static void rtl8125_2_hw_phy_config(struct rtl8169_private *tp,
+ struct phy_device *phydev)
+{
+ int i;
+
+ phy_modify_paged(phydev, 0xad4, 0x17, 0x0000, 0x0010);
+ phy_modify_paged(phydev, 0xad1, 0x13, 0x03ff, 0x03ff);
+ phy_modify_paged(phydev, 0xad3, 0x11, 0x003f, 0x0006);
+ phy_modify_paged(phydev, 0xac0, 0x14, 0x1100, 0x0000);
+ phy_modify_paged(phydev, 0xacc, 0x10, 0x0003, 0x0002);
+ phy_modify_paged(phydev, 0xad4, 0x10, 0x00e7, 0x0044);
+ phy_modify_paged(phydev, 0xac1, 0x12, 0x0080, 0x0000);
+ phy_modify_paged(phydev, 0xac8, 0x10, 0x0300, 0x0000);
+ phy_modify_paged(phydev, 0xac5, 0x17, 0x0007, 0x0002);
+ phy_write_paged(phydev, 0xad4, 0x16, 0x00a8);
+ phy_write_paged(phydev, 0xac5, 0x16, 0x01ff);
+ phy_modify_paged(phydev, 0xac8, 0x15, 0x00f0, 0x0030);
+
+ phy_write(phydev, 0x1f, 0x0b87);
+ phy_write(phydev, 0x16, 0x80a2);
+ phy_write(phydev, 0x17, 0x0153);
+ phy_write(phydev, 0x16, 0x809c);
+ phy_write(phydev, 0x17, 0x0153);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ phy_write(phydev, 0x1f, 0x0a43);
+ phy_write(phydev, 0x13, 0x81B3);
+ phy_write(phydev, 0x14, 0x0043);
+ phy_write(phydev, 0x14, 0x00A7);
+ phy_write(phydev, 0x14, 0x00D6);
+ phy_write(phydev, 0x14, 0x00EC);
+ phy_write(phydev, 0x14, 0x00F6);
+ phy_write(phydev, 0x14, 0x00FB);
+ phy_write(phydev, 0x14, 0x00FD);
+ phy_write(phydev, 0x14, 0x00FF);
+ phy_write(phydev, 0x14, 0x00BB);
+ phy_write(phydev, 0x14, 0x0058);
+ phy_write(phydev, 0x14, 0x0029);
+ phy_write(phydev, 0x14, 0x0013);
+ phy_write(phydev, 0x14, 0x0009);
+ phy_write(phydev, 0x14, 0x0004);
+ phy_write(phydev, 0x14, 0x0002);
+ for (i = 0; i < 25; i++)
+ phy_write(phydev, 0x14, 0x0000);
+ phy_write(phydev, 0x1f, 0x0000);
+
+ r8168g_phy_param(phydev, 0x8257, 0xffff, 0x020F);
+ r8168g_phy_param(phydev, 0x80ea, 0xffff, 0x7843);
+
+ r8169_apply_firmware(tp);
+
+ phy_modify_paged(phydev, 0xd06, 0x14, 0x0000, 0x2000);
+
+ r8168g_phy_param(phydev, 0x81a2, 0x0000, 0x0100);
+
+ phy_modify_paged(phydev, 0xb54, 0x16, 0xff00, 0xdb00);
+ phy_modify_paged(phydev, 0xa45, 0x12, 0x0001, 0x0000);
+ phy_modify_paged(phydev, 0xa5d, 0x12, 0x0000, 0x0020);
+ phy_modify_paged(phydev, 0xad4, 0x17, 0x0010, 0x0000);
+ phy_modify_paged(phydev, 0xa86, 0x15, 0x0001, 0x0000);
+ phy_modify_paged(phydev, 0xa44, 0x11, 0x0000, 0x0800);
+
+ rtl8125_config_eee_phy(phydev);
+}
+
+void r8169_hw_phy_config(struct rtl8169_private *tp, struct phy_device *phydev,
+ enum mac_version ver)
+{
+ static const rtl_phy_cfg_fct phy_configs[] = {
+ /* PCI devices. */
+ [RTL_GIGA_MAC_VER_02] = rtl8169s_hw_phy_config,
+ [RTL_GIGA_MAC_VER_03] = rtl8169s_hw_phy_config,
+ [RTL_GIGA_MAC_VER_04] = rtl8169sb_hw_phy_config,
+ [RTL_GIGA_MAC_VER_05] = rtl8169scd_hw_phy_config,
+ [RTL_GIGA_MAC_VER_06] = rtl8169sce_hw_phy_config,
+ /* PCI-E devices. */
+ [RTL_GIGA_MAC_VER_07] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_08] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_09] = rtl8102e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_10] = NULL,
+ [RTL_GIGA_MAC_VER_11] = rtl8168bb_hw_phy_config,
+ [RTL_GIGA_MAC_VER_12] = rtl8168bef_hw_phy_config,
+ [RTL_GIGA_MAC_VER_13] = NULL,
+ [RTL_GIGA_MAC_VER_14] = NULL,
+ [RTL_GIGA_MAC_VER_15] = NULL,
+ [RTL_GIGA_MAC_VER_16] = NULL,
+ [RTL_GIGA_MAC_VER_17] = rtl8168bef_hw_phy_config,
+ [RTL_GIGA_MAC_VER_18] = rtl8168cp_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_19] = rtl8168c_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_20] = rtl8168c_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_21] = rtl8168c_3_hw_phy_config,
+ [RTL_GIGA_MAC_VER_22] = rtl8168c_3_hw_phy_config,
+ [RTL_GIGA_MAC_VER_23] = rtl8168cp_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_24] = rtl8168cp_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_25] = rtl8168d_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_26] = rtl8168d_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_27] = rtl8168d_3_hw_phy_config,
+ [RTL_GIGA_MAC_VER_28] = rtl8168d_4_hw_phy_config,
+ [RTL_GIGA_MAC_VER_29] = rtl8105e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_30] = rtl8105e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_31] = NULL,
+ [RTL_GIGA_MAC_VER_32] = rtl8168e_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_33] = rtl8168e_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_34] = rtl8168e_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_35] = rtl8168f_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_36] = rtl8168f_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_37] = rtl8402_hw_phy_config,
+ [RTL_GIGA_MAC_VER_38] = rtl8411_hw_phy_config,
+ [RTL_GIGA_MAC_VER_39] = rtl8106e_hw_phy_config,
+ [RTL_GIGA_MAC_VER_40] = rtl8168g_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_41] = NULL,
+ [RTL_GIGA_MAC_VER_42] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_43] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_44] = rtl8168g_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_45] = rtl8168h_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_46] = rtl8168h_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_47] = rtl8168h_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_48] = rtl8168h_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_49] = rtl8168ep_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_50] = rtl8168ep_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_51] = rtl8168ep_2_hw_phy_config,
+ [RTL_GIGA_MAC_VER_52] = rtl8117_hw_phy_config,
+ [RTL_GIGA_MAC_VER_60] = rtl8125_1_hw_phy_config,
+ [RTL_GIGA_MAC_VER_61] = rtl8125_2_hw_phy_config,
+ };
+
+ if (phy_configs[ver])
+ phy_configs[ver](tp, phydev);
+}
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 4b13a184bfc7..067ad25553b9 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1425,7 +1425,7 @@ out_napi_off:
}
/* Timeout function for Ethernet AVB */
-static void ravb_tx_timeout(struct net_device *ndev)
+static void ravb_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct ravb_private *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index e19b49c4013e..cdd8ab2eb910 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2478,7 +2478,7 @@ out_napi_off:
}
/* Timeout function */
-static void sh_eth_tx_timeout(struct net_device *ndev)
+static void sh_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct sh_eth_private *mdp = netdev_priv(ndev);
struct sh_eth_rxdesc *rxdesc;
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index bc4f951315da..7585cd2270ba 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -2159,7 +2159,7 @@ static void rocker_router_fib_event_work(struct work_struct *work)
/* Protect internal structures from changes */
rtnl_lock();
switch (fib_work->event) {
- case FIB_EVENT_ENTRY_ADD:
+ case FIB_EVENT_ENTRY_REPLACE:
err = rocker_world_fib4_add(rocker, &fib_work->fen_info);
if (err)
rocker_world_fib4_abort(rocker);
@@ -2201,7 +2201,7 @@ static int rocker_router_fib_event(struct notifier_block *nb,
fib_work->event = event;
switch (event) {
- case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_DEL:
if (info->family == AF_INET) {
struct fib_entry_notifier_info *fen_info = ptr;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
index 0775b9464b4e..466483c4ac67 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_ethtool.c
@@ -30,7 +30,7 @@ struct sxgbe_stats {
#define SXGBE_STAT(m) \
{ \
#m, \
- FIELD_SIZEOF(struct sxgbe_extra_stats, m), \
+ sizeof_field(struct sxgbe_extra_stats, m), \
offsetof(struct sxgbe_priv_data, xstats.m) \
}
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index c56fcbb37066..7d3a1c0df09c 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1572,7 +1572,7 @@ static int sxgbe_poll(struct napi_struct *napi, int budget)
* netdev structure and arrange for the device to be reset to a sane state
* in order to transmit a new packet.
*/
-static void sxgbe_tx_timeout(struct net_device *dev)
+static void sxgbe_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct sxgbe_priv_data *priv = netdev_priv(dev);
@@ -2296,7 +2296,7 @@ __setup("sxgbeeth=", sxgbe_cmdline_opt);
-MODULE_DESCRIPTION("SAMSUNG 10G/2.5G/1G Ethernet PLATFORM driver");
+MODULE_DESCRIPTION("Samsung 10G/2.5G/1G Ethernet PLATFORM driver");
MODULE_PARM_DESC(debug, "Message Level (-1: default, 0: no output, 16: all)");
MODULE_PARM_DESC(eee_timer, "EEE-LPI Default LS timer value");
diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index 632a7c85964d..128ee7cda1ed 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -79,7 +79,7 @@ static netdev_tx_t ether3_sendpacket(struct sk_buff *skb,
static irqreturn_t ether3_interrupt (int irq, void *dev_id);
static int ether3_close (struct net_device *dev);
static void ether3_setmulticastlist (struct net_device *dev);
-static void ether3_timeout(struct net_device *dev);
+static void ether3_timeout(struct net_device *dev, unsigned int txqueue);
#define BUS_16 2
#define BUS_8 1
@@ -450,7 +450,7 @@ static void ether3_setmulticastlist(struct net_device *dev)
ether3_outw(priv(dev)->regs.config1 | CFG1_LOCBUFMEM, REG_CONFIG1);
}
-static void ether3_timeout(struct net_device *dev)
+static void ether3_timeout(struct net_device *dev, unsigned int txqueue)
{
unsigned long flags;
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index 276c7cae7cee..8507ff242014 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -645,7 +645,7 @@ sgiseeq_start_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
-static void timeout(struct net_device *dev)
+static void timeout(struct net_device *dev, unsigned int txqueue)
{
printk(KERN_NOTICE "%s: transmit timed out, resetting\n", dev->name);
sgiseeq_reset(dev);
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 5f36774bf4b8..ea5a9220196c 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -21,8 +21,6 @@ config SFC
depends on PCI
select MDIO
select CRC32
- select I2C
- select I2C_ALGOBIT
imply PTP_1588_CLOCK
---help---
This driver supports 10/40-gigabit Ethernet cards based on
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index c5c297e78d06..890fd65caa2d 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -1,7 +1,10 @@
# SPDX-License-Identifier: GPL-2.0
-sfc-y += efx.o nic.o farch.o siena.o ef10.o tx.o rx.o \
- selftest.o ethtool.o ptp.o tx_tso.o \
- mcdi.o mcdi_port.o mcdi_mon.o
+sfc-y += efx.o efx_common.o efx_channels.o nic.o \
+ farch.o siena.o ef10.o \
+ tx.o tx_common.o tx_tso.o rx.o rx_common.o \
+ selftest.o ethtool.o ethtool_common.o ptp.o \
+ mcdi.o mcdi_port.o mcdi_port_common.o \
+ mcdi_functions.o mcdi_mon.o
sfc-$(CONFIG_SFC_MTD) += mtd.o
sfc-$(CONFIG_SFC_SRIOV) += sriov.o siena_sriov.o ef10_sriov.o
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 4d9bbccc6f89..4997f61de3d6 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -5,10 +5,13 @@
*/
#include "net_driver.h"
+#include "rx_common.h"
#include "ef10_regs.h"
#include "io.h"
#include "mcdi.h"
#include "mcdi_pcol.h"
+#include "mcdi_port_common.h"
+#include "mcdi_functions.h"
#include "nic.h"
#include "workarounds.h"
#include "selftest.h"
@@ -185,24 +188,6 @@ static bool efx_ef10_is_vf(struct efx_nic *efx)
return efx->type->is_vf;
}
-static int efx_ef10_get_pf_index(struct efx_nic *efx)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN);
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- size_t outlen;
- int rc;
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf,
- sizeof(outbuf), &outlen);
- if (rc)
- return rc;
- if (outlen < sizeof(outbuf))
- return -EIO;
-
- nic_data->pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF);
- return 0;
-}
-
#ifdef CONFIG_SFC_SRIOV
static int efx_ef10_get_vf_index(struct efx_nic *efx)
{
@@ -273,24 +258,9 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx)
u8 vi_window_mode = MCDI_BYTE(outbuf,
GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE);
- switch (vi_window_mode) {
- case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K:
- efx->vi_stride = 8192;
- break;
- case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K:
- efx->vi_stride = 16384;
- break;
- case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K:
- efx->vi_stride = 65536;
- break;
- default:
- netif_err(efx, probe, efx->net_dev,
- "Unrecognised VI window mode %d\n",
- vi_window_mode);
- return -EIO;
- }
- netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n",
- efx->vi_stride);
+ rc = efx_mcdi_window_mode_to_stride(efx, vi_window_mode);
+ if (rc)
+ return rc;
} else {
/* keep default VI stride */
netif_dbg(efx, probe, efx->net_dev,
@@ -689,7 +659,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
}
nic_data->warm_boot_count = rc;
- efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
@@ -725,7 +695,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
if (rc)
goto fail4;
- rc = efx_ef10_get_pf_index(efx);
+ rc = efx_get_pf_index(efx, &nic_data->pf_index);
if (rc)
goto fail5;
@@ -831,22 +801,6 @@ fail1:
return rc;
}
-static int efx_ef10_free_vis(struct efx_nic *efx)
-{
- MCDI_DECLARE_BUF_ERR(outbuf);
- size_t outlen;
- int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
-
- /* -EALREADY means nothing to free, so ignore */
- if (rc == -EALREADY)
- rc = 0;
- if (rc)
- efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen,
- rc);
- return rc;
-}
-
#ifdef EFX_USE_PIO
static void efx_ef10_free_piobufs(struct efx_nic *efx)
@@ -1089,7 +1043,7 @@ static void efx_ef10_remove(struct efx_nic *efx)
if (nic_data->wc_membase)
iounmap(nic_data->wc_membase);
- rc = efx_ef10_free_vis(efx);
+ rc = efx_mcdi_free_vis(efx);
WARN_ON(rc != 0);
if (!nic_data->must_restore_piobufs)
@@ -1260,28 +1214,10 @@ static int efx_ef10_probe_vf(struct efx_nic *efx __attribute__ ((unused)))
static int efx_ef10_alloc_vis(struct efx_nic *efx,
unsigned int min_vis, unsigned int max_vis)
{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN);
- MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN);
struct efx_ef10_nic_data *nic_data = efx->nic_data;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis);
- MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis);
- rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
- if (rc != 0)
- return rc;
-
- if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN)
- return -EIO;
- netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n",
- MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE));
-
- nic_data->vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE);
- nic_data->n_allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT);
- return 0;
+ return efx_mcdi_alloc_vis(efx, min_vis, max_vis, &nic_data->vi_base,
+ &nic_data->n_allocated_vis);
}
/* Note that the failure path of this function does not free
@@ -1363,7 +1299,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
}
/* In case the last attached driver failed to free VIs, do it now */
- rc = efx_ef10_free_vis(efx);
+ rc = efx_mcdi_free_vis(efx);
if (rc != 0)
return rc;
@@ -1384,7 +1320,7 @@ static int efx_ef10_dimension_resources(struct efx_nic *efx)
efx->max_tx_channels =
nic_data->n_allocated_vis / EFX_TXQ_TYPES;
- efx_ef10_free_vis(efx);
+ efx_mcdi_free_vis(efx);
return -EAGAIN;
}
@@ -1503,7 +1439,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx)
nic_data->must_restore_filters = true;
nic_data->must_restore_piobufs = true;
efx_ef10_forget_old_piobufs(efx);
- efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
/* Driver-created vswitches and vports must be re-created */
nic_data->must_probe_vswitching = true;
@@ -2408,20 +2344,15 @@ static u32 efx_ef10_tso_versions(struct efx_nic *efx)
static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
- EFX_BUF_SIZE));
bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
- size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
struct efx_channel *channel = tx_queue->channel;
struct efx_nic *efx = tx_queue->efx;
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ struct efx_ef10_nic_data *nic_data;
bool tso_v2 = false;
- size_t inlen;
- dma_addr_t dma_addr;
efx_qword_t *txd;
int rc;
- int i;
- BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+
+ nic_data = efx->nic_data;
/* Only attempt to enable TX timestamping if we have the license for it,
* otherwise TXQ init will fail
@@ -2448,51 +2379,9 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
channel->channel);
}
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
- MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
-
- dma_addr = tx_queue->txd.buf.dma_addr;
-
- netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n",
- tx_queue->queue, entries, (u64)dma_addr);
-
- for (i = 0; i < entries; ++i) {
- MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr);
- dma_addr += EFX_BUF_SIZE;
- }
-
- inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
-
- do {
- MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
- /* This flag was removed from mcdi_pcol.h for
- * the non-_EXT version of INIT_TXQ. However,
- * firmware still honours it.
- */
- INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
- INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
- INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
- INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
- tx_queue->timestamping);
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
- NULL, 0, NULL);
- if (rc == -ENOSPC && tso_v2) {
- /* Retry without TSOv2 if we're short on contexts. */
- tso_v2 = false;
- netif_warn(efx, probe, efx->net_dev,
- "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n");
- } else if (rc) {
- efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
- MC_CMD_INIT_TXQ_EXT_IN_LEN,
- NULL, 0, rc);
- goto fail;
- }
- } while (rc);
+ rc = efx_mcdi_tx_init(tx_queue, tso_v2);
+ if (rc)
+ goto fail;
/* A previous user of this TX queue might have set us up the
* bomb by writing a descriptor to the TX push collector but
@@ -2530,35 +2419,6 @@ fail:
tx_queue->queue);
}
-static void efx_ef10_tx_fini(struct efx_tx_queue *tx_queue)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
- MCDI_DECLARE_BUF_ERR(outbuf);
- struct efx_nic *efx = tx_queue->efx;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE,
- tx_queue->queue);
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
-
- if (rc && rc != -EALREADY)
- goto fail;
-
- return;
-
-fail:
- efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN,
- outbuf, outlen, rc);
-}
-
-static void efx_ef10_tx_remove(struct efx_tx_queue *tx_queue)
-{
- efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf);
-}
-
/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
{
@@ -2737,7 +2597,7 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, bool exclusive,
EFX_EF10_MAX_SHARED_RSS_CONTEXT_SIZE);
if (!exclusive && rss_spread == 1) {
- ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
if (context_size)
*context_size = 1;
return 0;
@@ -2824,11 +2684,11 @@ static void efx_ef10_rx_free_indir_table(struct efx_nic *efx)
{
int rc;
- if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID) {
+ if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) {
rc = efx_ef10_free_rss_context(efx, efx->rss_context.context_id);
WARN_ON(rc != 0);
}
- efx->rss_context.context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
}
static int efx_ef10_rx_push_shared_rss_config(struct efx_nic *efx,
@@ -2854,7 +2714,7 @@ static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx,
struct efx_ef10_nic_data *nic_data = efx->nic_data;
int rc;
- if (efx->rss_context.context_id == EFX_EF10_RSS_CONTEXT_INVALID ||
+ if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID ||
!nic_data->rx_rss_context_exclusive) {
rc = efx_ef10_alloc_rss_context(efx, true, &efx->rss_context,
NULL);
@@ -2870,7 +2730,7 @@ static int efx_ef10_rx_push_exclusive_rss_config(struct efx_nic *efx,
goto fail2;
if (efx->rss_context.context_id != old_rx_rss_context &&
- old_rx_rss_context != EFX_EF10_RSS_CONTEXT_INVALID)
+ old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID)
WARN_ON(efx_ef10_free_rss_context(efx, old_rx_rss_context) != 0);
nic_data->rx_rss_context_exclusive = true;
if (rx_indir_table != efx->rss_context.rx_indir_table)
@@ -2901,7 +2761,7 @@ static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx,
WARN_ON(!mutex_is_locked(&efx->rss_lock));
- if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+ if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL);
if (rc)
return rc;
@@ -2936,7 +2796,7 @@ static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx,
BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN !=
MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN);
- if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID)
+ if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID)
return -ENOENT;
MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID,
@@ -2997,7 +2857,7 @@ static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx)
list_for_each_entry(ctx, &efx->rss_context.list, list) {
/* previous NIC RSS context is gone */
- ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
/* so try to allocate a new one */
rc = efx_ef10_rx_push_rss_context_config(efx, ctx,
ctx->rx_indir_table,
@@ -3068,96 +2928,11 @@ static int efx_ef10_vf_rx_push_rss_config(struct efx_nic *efx, bool user,
{
if (user)
return -EOPNOTSUPP;
- if (efx->rss_context.context_id != EFX_EF10_RSS_CONTEXT_INVALID)
+ if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID)
return 0;
return efx_ef10_rx_push_shared_rss_config(efx, NULL);
}
-static int efx_ef10_rx_probe(struct efx_rx_queue *rx_queue)
-{
- return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf,
- (rx_queue->ptr_mask + 1) *
- sizeof(efx_qword_t),
- GFP_KERNEL);
-}
-
-static void efx_ef10_rx_init(struct efx_rx_queue *rx_queue)
-{
- MCDI_DECLARE_BUF(inbuf,
- MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
- EFX_BUF_SIZE));
- struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
- size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE;
- struct efx_nic *efx = rx_queue->efx;
- struct efx_ef10_nic_data *nic_data = efx->nic_data;
- size_t inlen;
- dma_addr_t dma_addr;
- int rc;
- int i;
- BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0);
-
- rx_queue->scatter_n = 0;
- rx_queue->scatter_len = 0;
-
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1);
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel);
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue));
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE,
- efx_rx_queue_index(rx_queue));
- MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS,
- INIT_RXQ_IN_FLAG_PREFIX, 1,
- INIT_RXQ_IN_FLAG_TIMESTAMP, 1);
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0);
- MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id);
-
- dma_addr = rx_queue->rxd.buf.dma_addr;
-
- netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n",
- efx_rx_queue_index(rx_queue), entries, (u64)dma_addr);
-
- for (i = 0; i < entries; ++i) {
- MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr);
- dma_addr += EFX_BUF_SIZE;
- }
-
- inlen = MC_CMD_INIT_RXQ_IN_LEN(entries);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen,
- NULL, 0, NULL);
- if (rc)
- netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n",
- efx_rx_queue_index(rx_queue));
-}
-
-static void efx_ef10_rx_fini(struct efx_rx_queue *rx_queue)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN);
- MCDI_DECLARE_BUF_ERR(outbuf);
- struct efx_nic *efx = rx_queue->efx;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE,
- efx_rx_queue_index(rx_queue));
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
-
- if (rc && rc != -EALREADY)
- goto fail;
-
- return;
-
-fail:
- efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN,
- outbuf, outlen, rc);
-}
-
-static void efx_ef10_rx_remove(struct efx_rx_queue *rx_queue)
-{
- efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf);
-}
-
/* This creates an entry in the RX descriptor queue */
static inline void
efx_ef10_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index)
@@ -3229,106 +3004,20 @@ efx_ef10_rx_defer_refill_complete(struct efx_nic *efx, unsigned long cookie,
/* nothing to do */
}
-static int efx_ef10_ev_probe(struct efx_channel *channel)
-{
- return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf,
- (channel->eventq_mask + 1) *
- sizeof(efx_qword_t),
- GFP_KERNEL);
-}
-
-static void efx_ef10_ev_fini(struct efx_channel *channel)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN);
- MCDI_DECLARE_BUF_ERR(outbuf);
- struct efx_nic *efx = channel->efx;
- size_t outlen;
- int rc;
-
- MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel);
-
- rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf),
- outbuf, sizeof(outbuf), &outlen);
-
- if (rc && rc != -EALREADY)
- goto fail;
-
- return;
-
-fail:
- efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN,
- outbuf, outlen, rc);
-}
-
static int efx_ef10_ev_init(struct efx_channel *channel)
{
- MCDI_DECLARE_BUF(inbuf,
- MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
- EFX_BUF_SIZE));
- MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
- size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
struct efx_nic *efx = channel->efx;
struct efx_ef10_nic_data *nic_data;
- size_t inlen, outlen;
unsigned int enabled, implemented;
- dma_addr_t dma_addr;
+ bool use_v2, cut_thru;
int rc;
- int i;
nic_data = efx->nic_data;
-
- /* Fill event queue with all ones (i.e. empty events) */
- memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
-
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
- /* INIT_EVQ expects index in vector table, not absolute */
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
- MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE,
- MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
- MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
-
- if (nic_data->datapath_caps2 &
- 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) {
- /* Use the new generic approach to specifying event queue
- * configuration, requesting lower latency or higher throughput.
- * The options that actually get used appear in the output.
- */
- MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
- INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
- INIT_EVQ_V2_IN_FLAG_TYPE,
- MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
- } else {
- bool cut_thru = !(nic_data->datapath_caps &
- 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
-
- MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
- INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
- INIT_EVQ_IN_FLAG_RX_MERGE, 1,
- INIT_EVQ_IN_FLAG_TX_MERGE, 1,
- INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru);
- }
-
- dma_addr = channel->eventq.buf.dma_addr;
- for (i = 0; i < entries; ++i) {
- MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
- dma_addr += EFX_BUF_SIZE;
- }
-
- inlen = MC_CMD_INIT_EVQ_IN_LEN(entries);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
- outbuf, sizeof(outbuf), &outlen);
-
- if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
- netif_dbg(efx, drv, efx->net_dev,
- "Channel %d using event queue flags %08x\n",
- channel->channel,
- MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+ use_v2 = nic_data->datapath_caps2 &
+ 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN;
+ cut_thru = !(nic_data->datapath_caps &
+ 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
+ rc = efx_mcdi_ev_init(channel, cut_thru, use_v2);
/* IRQ return is ignored */
if (channel->channel || rc)
@@ -3386,15 +3075,10 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
return 0;
fail:
- efx_ef10_ev_fini(channel);
+ efx_mcdi_ev_fini(channel);
return rc;
}
-static void efx_ef10_ev_remove(struct efx_channel *channel)
-{
- efx_nic_free_buffer(channel->efx, &channel->eventq.buf);
-}
-
static void efx_ef10_handle_rx_wrong_queue(struct efx_rx_queue *rx_queue,
unsigned int rx_queue_label)
{
@@ -3976,9 +3660,9 @@ static int efx_ef10_fini_dmaq(struct efx_nic *efx)
if (efx->state != STATE_RECOVERY) {
efx_for_each_channel(channel, efx) {
efx_for_each_channel_rx_queue(rx_queue, channel)
- efx_ef10_rx_fini(rx_queue);
+ efx_mcdi_rx_fini(rx_queue);
efx_for_each_channel_tx_queue(tx_queue, channel)
- efx_ef10_tx_fini(tx_queue);
+ efx_mcdi_tx_fini(tx_queue);
}
wait_event_timeout(efx->flush_wq,
@@ -4165,7 +3849,7 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx,
*/
if (WARN_ON_ONCE(!ctx))
flags &= ~EFX_FILTER_FLAG_RX_RSS;
- else if (WARN_ON_ONCE(ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID))
+ else if (WARN_ON_ONCE(ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID))
flags &= ~EFX_FILTER_FLAG_RX_RSS;
}
@@ -4344,7 +4028,7 @@ static s32 efx_ef10_filter_insert_locked(struct efx_nic *efx,
rc = -ENOENT;
goto out_unlock;
}
- if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+ if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
rc = -EOPNOTSUPP;
goto out_unlock;
}
@@ -5085,7 +4769,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx)
invalid_filters++;
goto not_restored;
}
- if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) {
+ if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) {
netif_warn(efx, drv, efx->net_dev,
"Warning: unable to restore a filter with RSS context %u as it was not created.\n",
spec->rss_context);
@@ -6650,20 +6334,20 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
.irq_handle_legacy = efx_ef10_legacy_interrupt,
.tx_probe = efx_ef10_tx_probe,
.tx_init = efx_ef10_tx_init,
- .tx_remove = efx_ef10_tx_remove,
+ .tx_remove = efx_mcdi_tx_remove,
.tx_write = efx_ef10_tx_write,
.tx_limit_len = efx_ef10_tx_limit_len,
.rx_push_rss_config = efx_ef10_vf_rx_push_rss_config,
.rx_pull_rss_config = efx_ef10_rx_pull_rss_config,
- .rx_probe = efx_ef10_rx_probe,
- .rx_init = efx_ef10_rx_init,
- .rx_remove = efx_ef10_rx_remove,
+ .rx_probe = efx_mcdi_rx_probe,
+ .rx_init = efx_mcdi_rx_init,
+ .rx_remove = efx_mcdi_rx_remove,
.rx_write = efx_ef10_rx_write,
.rx_defer_refill = efx_ef10_rx_defer_refill,
- .ev_probe = efx_ef10_ev_probe,
+ .ev_probe = efx_mcdi_ev_probe,
.ev_init = efx_ef10_ev_init,
- .ev_fini = efx_ef10_ev_fini,
- .ev_remove = efx_ef10_ev_remove,
+ .ev_fini = efx_mcdi_ev_fini,
+ .ev_remove = efx_mcdi_ev_remove,
.ev_process = efx_ef10_ev_process,
.ev_read_ack = efx_ef10_ev_read_ack,
.ev_test_generate = efx_ef10_ev_test_generate,
@@ -6759,7 +6443,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.irq_handle_legacy = efx_ef10_legacy_interrupt,
.tx_probe = efx_ef10_tx_probe,
.tx_init = efx_ef10_tx_init,
- .tx_remove = efx_ef10_tx_remove,
+ .tx_remove = efx_mcdi_tx_remove,
.tx_write = efx_ef10_tx_write,
.tx_limit_len = efx_ef10_tx_limit_len,
.rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
@@ -6767,15 +6451,15 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
.rx_push_rss_context_config = efx_ef10_rx_push_rss_context_config,
.rx_pull_rss_context_config = efx_ef10_rx_pull_rss_context_config,
.rx_restore_rss_contexts = efx_ef10_rx_restore_rss_contexts,
- .rx_probe = efx_ef10_rx_probe,
- .rx_init = efx_ef10_rx_init,
- .rx_remove = efx_ef10_rx_remove,
+ .rx_probe = efx_mcdi_rx_probe,
+ .rx_init = efx_mcdi_rx_init,
+ .rx_remove = efx_mcdi_rx_remove,
.rx_write = efx_ef10_rx_write,
.rx_defer_refill = efx_ef10_rx_defer_refill,
- .ev_probe = efx_ef10_ev_probe,
+ .ev_probe = efx_mcdi_ev_probe,
.ev_init = efx_ef10_ev_init,
- .ev_fini = efx_ef10_ev_fini,
- .ev_remove = efx_ef10_ev_remove,
+ .ev_fini = efx_mcdi_ev_fini,
+ .ev_remove = efx_mcdi_ev_remove,
.ev_process = efx_ef10_ev_process,
.ev_read_ack = efx_ef10_ev_read_ack,
.ev_test_generate = efx_ef10_ev_test_generate,
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 52bd43f45761..14393767ef9f 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -522,10 +522,9 @@ int efx_ef10_sriov_set_vf_mac(struct efx_nic *efx, int vf_i, u8 *mac)
if (!is_zero_ether_addr(mac)) {
rc = efx_ef10_vport_add_mac(efx, vf->vport_id, mac);
- if (rc) {
- eth_zero_addr(vf->mac);
+ if (rc)
goto fail;
- }
+
if (vf->efx)
ether_addr_copy(vf->efx->net_dev->dev_addr, mac);
}
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 992c773620ec..110e485e6624 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -23,6 +23,11 @@
#include <net/gre.h>
#include <net/udp_tunnel.h>
#include "efx.h"
+#include "efx_common.h"
+#include "efx_channels.h"
+#include "rx_common.h"
+#include "tx_common.h"
+#include "rx_common.h"
#include "nic.h"
#include "io.h"
#include "selftest.h"
@@ -39,56 +44,6 @@
**************************************************************************
*/
-/* Loopback mode names (see LOOPBACK_MODE()) */
-const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
-const char *const efx_loopback_mode_names[] = {
- [LOOPBACK_NONE] = "NONE",
- [LOOPBACK_DATA] = "DATAPATH",
- [LOOPBACK_GMAC] = "GMAC",
- [LOOPBACK_XGMII] = "XGMII",
- [LOOPBACK_XGXS] = "XGXS",
- [LOOPBACK_XAUI] = "XAUI",
- [LOOPBACK_GMII] = "GMII",
- [LOOPBACK_SGMII] = "SGMII",
- [LOOPBACK_XGBR] = "XGBR",
- [LOOPBACK_XFI] = "XFI",
- [LOOPBACK_XAUI_FAR] = "XAUI_FAR",
- [LOOPBACK_GMII_FAR] = "GMII_FAR",
- [LOOPBACK_SGMII_FAR] = "SGMII_FAR",
- [LOOPBACK_XFI_FAR] = "XFI_FAR",
- [LOOPBACK_GPHY] = "GPHY",
- [LOOPBACK_PHYXS] = "PHYXS",
- [LOOPBACK_PCS] = "PCS",
- [LOOPBACK_PMAPMD] = "PMA/PMD",
- [LOOPBACK_XPORT] = "XPORT",
- [LOOPBACK_XGMII_WS] = "XGMII_WS",
- [LOOPBACK_XAUI_WS] = "XAUI_WS",
- [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR",
- [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
- [LOOPBACK_GMII_WS] = "GMII_WS",
- [LOOPBACK_XFI_WS] = "XFI_WS",
- [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR",
- [LOOPBACK_PHYXS_WS] = "PHYXS_WS",
-};
-
-const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
-const char *const efx_reset_type_names[] = {
- [RESET_TYPE_INVISIBLE] = "INVISIBLE",
- [RESET_TYPE_ALL] = "ALL",
- [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
- [RESET_TYPE_WORLD] = "WORLD",
- [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
- [RESET_TYPE_DATAPATH] = "DATAPATH",
- [RESET_TYPE_MC_BIST] = "MC_BIST",
- [RESET_TYPE_DISABLE] = "DISABLE",
- [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
- [RESET_TYPE_INT_ERROR] = "INT_ERROR",
- [RESET_TYPE_DMA_ERROR] = "DMA_ERROR",
- [RESET_TYPE_TX_SKIP] = "TX_SKIP",
- [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
- [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)",
-};
-
/* UDP tunnel type names */
static const char *const efx_udp_tunnel_type_names[] = {
[TUNNEL_ENCAP_UDP_PORT_ENTRY_VXLAN] = "vxlan",
@@ -104,18 +59,6 @@ void efx_get_udp_tunnel_type_name(u16 type, char *buf, size_t buflen)
snprintf(buf, buflen, "type %d", type);
}
-/* Reset workqueue. If any NIC has a hardware failure then a reset will be
- * queued onto this work queue. This is not a per-nic work queue, because
- * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
- */
-static struct workqueue_struct *reset_workqueue;
-
-/* How often and how many times to poll for a reset while waiting for a
- * BIST that another function started to complete.
- */
-#define BIST_WAIT_DELAY_MS 100
-#define BIST_WAIT_DELAY_COUNT 100
-
/**************************************************************************
*
* Configurable values
@@ -135,21 +78,6 @@ module_param(efx_separate_tx_channels, bool, 0444);
MODULE_PARM_DESC(efx_separate_tx_channels,
"Use separate channels for TX and RX");
-/* This is the weight assigned to each of the (per-channel) virtual
- * NAPI devices.
- */
-static int napi_weight = 64;
-
-/* This is the time (in jiffies) between invocations of the hardware
- * monitor.
- * On Falcon-based NICs, this will:
- * - Check the on-board hardware monitor;
- * - Poll the link state and reconfigure the hardware as necessary.
- * On Siena-based NICs for power systems with EEH support, this will give EEH a
- * chance to start.
- */
-static unsigned int efx_monitor_interval = 1 * HZ;
-
/* Initial interrupt moderation settings. They can be modified after
* module load with ethtool.
*
@@ -169,38 +97,10 @@ static unsigned int rx_irq_mod_usec = 60;
*/
static unsigned int tx_irq_mod_usec = 150;
-/* This is the first interrupt mode to try out of:
- * 0 => MSI-X
- * 1 => MSI
- * 2 => legacy
- */
-static unsigned int interrupt_mode;
-
-/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
- * i.e. the number of CPUs among which we may distribute simultaneous
- * interrupt handling.
- *
- * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
- * The default (0) means to assign an interrupt to each core.
- */
-static unsigned int rss_cpus;
-module_param(rss_cpus, uint, 0444);
-MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
-
static bool phy_flash_cfg;
module_param(phy_flash_cfg, bool, 0644);
MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
-static unsigned irq_adapt_low_thresh = 8000;
-module_param(irq_adapt_low_thresh, uint, 0644);
-MODULE_PARM_DESC(irq_adapt_low_thresh,
- "Threshold score for reducing IRQ moderation");
-
-static unsigned irq_adapt_high_thresh = 16000;
-module_param(irq_adapt_high_thresh, uint, 0644);
-MODULE_PARM_DESC(irq_adapt_high_thresh,
- "Threshold score for increasing IRQ moderation");
-
static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
@@ -214,18 +114,8 @@ MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
*
*************************************************************************/
-static int efx_soft_enable_interrupts(struct efx_nic *efx);
-static void efx_soft_disable_interrupts(struct efx_nic *efx);
-static void efx_remove_channel(struct efx_channel *channel);
-static void efx_remove_channels(struct efx_nic *efx);
static const struct efx_channel_type efx_default_channel_type;
static void efx_remove_port(struct efx_nic *efx);
-static void efx_init_napi_channel(struct efx_channel *channel);
-static void efx_fini_napi(struct efx_nic *efx);
-static void efx_fini_napi_channel(struct efx_channel *channel);
-static void efx_fini_struct(struct efx_nic *efx);
-static void efx_start_all(struct efx_nic *efx);
-static void efx_stop_all(struct efx_nic *efx);
static int efx_xdp_setup_prog(struct efx_nic *efx, struct bpf_prog *prog);
static int efx_xdp(struct net_device *dev, struct netdev_bpf *xdp);
static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
@@ -239,776 +129,12 @@ static int efx_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **xdpfs,
ASSERT_RTNL(); \
} while (0)
-static int efx_check_disabled(struct efx_nic *efx)
-{
- if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
- netif_err(efx, drv, efx->net_dev,
- "device is disabled due to earlier errors\n");
- return -EIO;
- }
- return 0;
-}
-
-/**************************************************************************
- *
- * Event queue processing
- *
- *************************************************************************/
-
-/* Process channel's event queue
- *
- * This function is responsible for processing the event queue of a
- * single channel. The caller must guarantee that this function will
- * never be concurrently called more than once on the same channel,
- * though different channels may be being processed concurrently.
- */
-static int efx_process_channel(struct efx_channel *channel, int budget)
-{
- struct efx_tx_queue *tx_queue;
- struct list_head rx_list;
- int spent;
-
- if (unlikely(!channel->enabled))
- return 0;
-
- /* Prepare the batch receive list */
- EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
- INIT_LIST_HEAD(&rx_list);
- channel->rx_list = &rx_list;
-
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- tx_queue->pkts_compl = 0;
- tx_queue->bytes_compl = 0;
- }
-
- spent = efx_nic_process_eventq(channel, budget);
- if (spent && efx_channel_has_rx_queue(channel)) {
- struct efx_rx_queue *rx_queue =
- efx_channel_get_rx_queue(channel);
-
- efx_rx_flush_packet(channel);
- efx_fast_push_rx_descriptors(rx_queue, true);
- }
-
- /* Update BQL */
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- if (tx_queue->bytes_compl) {
- netdev_tx_completed_queue(tx_queue->core_txq,
- tx_queue->pkts_compl, tx_queue->bytes_compl);
- }
- }
-
- /* Receive any packets we queued up */
- netif_receive_skb_list(channel->rx_list);
- channel->rx_list = NULL;
-
- return spent;
-}
-
-/* NAPI poll handler
- *
- * NAPI guarantees serialisation of polls of the same device, which
- * provides the guarantee required by efx_process_channel().
- */
-static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
-{
- int step = efx->irq_mod_step_us;
-
- if (channel->irq_mod_score < irq_adapt_low_thresh) {
- if (channel->irq_moderation_us > step) {
- channel->irq_moderation_us -= step;
- efx->type->push_irq_moderation(channel);
- }
- } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
- if (channel->irq_moderation_us <
- efx->irq_rx_moderation_us) {
- channel->irq_moderation_us += step;
- efx->type->push_irq_moderation(channel);
- }
- }
-
- channel->irq_count = 0;
- channel->irq_mod_score = 0;
-}
-
-static int efx_poll(struct napi_struct *napi, int budget)
-{
- struct efx_channel *channel =
- container_of(napi, struct efx_channel, napi_str);
- struct efx_nic *efx = channel->efx;
- int spent;
-
- netif_vdbg(efx, intr, efx->net_dev,
- "channel %d NAPI poll executing on CPU %d\n",
- channel->channel, raw_smp_processor_id());
-
- spent = efx_process_channel(channel, budget);
-
- xdp_do_flush_map();
-
- if (spent < budget) {
- if (efx_channel_has_rx_queue(channel) &&
- efx->irq_rx_adaptive &&
- unlikely(++channel->irq_count == 1000)) {
- efx_update_irq_mod(efx, channel);
- }
-
-#ifdef CONFIG_RFS_ACCEL
- /* Perhaps expire some ARFS filters */
- mod_delayed_work(system_wq, &channel->filter_work, 0);
-#endif
-
- /* There is no race here; although napi_disable() will
- * only wait for napi_complete(), this isn't a problem
- * since efx_nic_eventq_read_ack() will have no effect if
- * interrupts have already been disabled.
- */
- if (napi_complete_done(napi, spent))
- efx_nic_eventq_read_ack(channel);
- }
-
- return spent;
-}
-
-/* Create event queue
- * Event queue memory allocations are done only once. If the channel
- * is reset, the memory buffer will be reused; this guards against
- * errors during channel reset and also simplifies interrupt handling.
- */
-static int efx_probe_eventq(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
- unsigned long entries;
-
- netif_dbg(efx, probe, efx->net_dev,
- "chan %d create event queue\n", channel->channel);
-
- /* Build an event queue with room for one event per tx and rx buffer,
- * plus some extra for link state events and MCDI completions. */
- entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
- EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
- channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
-
- return efx_nic_probe_eventq(channel);
-}
-
-/* Prepare channel's event queue */
-static int efx_init_eventq(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
- int rc;
-
- EFX_WARN_ON_PARANOID(channel->eventq_init);
-
- netif_dbg(efx, drv, efx->net_dev,
- "chan %d init event queue\n", channel->channel);
-
- rc = efx_nic_init_eventq(channel);
- if (rc == 0) {
- efx->type->push_irq_moderation(channel);
- channel->eventq_read_ptr = 0;
- channel->eventq_init = true;
- }
- return rc;
-}
-
-/* Enable event queue processing and NAPI */
-void efx_start_eventq(struct efx_channel *channel)
-{
- netif_dbg(channel->efx, ifup, channel->efx->net_dev,
- "chan %d start event queue\n", channel->channel);
-
- /* Make sure the NAPI handler sees the enabled flag set */
- channel->enabled = true;
- smp_wmb();
-
- napi_enable(&channel->napi_str);
- efx_nic_eventq_read_ack(channel);
-}
-
-/* Disable event queue processing and NAPI */
-void efx_stop_eventq(struct efx_channel *channel)
-{
- if (!channel->enabled)
- return;
-
- napi_disable(&channel->napi_str);
- channel->enabled = false;
-}
-
-static void efx_fini_eventq(struct efx_channel *channel)
-{
- if (!channel->eventq_init)
- return;
-
- netif_dbg(channel->efx, drv, channel->efx->net_dev,
- "chan %d fini event queue\n", channel->channel);
-
- efx_nic_fini_eventq(channel);
- channel->eventq_init = false;
-}
-
-static void efx_remove_eventq(struct efx_channel *channel)
-{
- netif_dbg(channel->efx, drv, channel->efx->net_dev,
- "chan %d remove event queue\n", channel->channel);
-
- efx_nic_remove_eventq(channel);
-}
-
-/**************************************************************************
- *
- * Channel handling
- *
- *************************************************************************/
-
-/* Allocate and initialise a channel structure. */
-static struct efx_channel *
-efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
-{
- struct efx_channel *channel;
- struct efx_rx_queue *rx_queue;
- struct efx_tx_queue *tx_queue;
- int j;
-
- channel = kzalloc(sizeof(*channel), GFP_KERNEL);
- if (!channel)
- return NULL;
-
- channel->efx = efx;
- channel->channel = i;
- channel->type = &efx_default_channel_type;
-
- for (j = 0; j < EFX_TXQ_TYPES; j++) {
- tx_queue = &channel->tx_queue[j];
- tx_queue->efx = efx;
- tx_queue->queue = i * EFX_TXQ_TYPES + j;
- tx_queue->channel = channel;
- }
-
-#ifdef CONFIG_RFS_ACCEL
- INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
-#endif
-
- rx_queue = &channel->rx_queue;
- rx_queue->efx = efx;
- timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
-
- return channel;
-}
-
-/* Allocate and initialise a channel structure, copying parameters
- * (but not resources) from an old channel structure.
- */
-static struct efx_channel *
-efx_copy_channel(const struct efx_channel *old_channel)
-{
- struct efx_channel *channel;
- struct efx_rx_queue *rx_queue;
- struct efx_tx_queue *tx_queue;
- int j;
-
- channel = kmalloc(sizeof(*channel), GFP_KERNEL);
- if (!channel)
- return NULL;
-
- *channel = *old_channel;
-
- channel->napi_dev = NULL;
- INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
- channel->napi_str.napi_id = 0;
- channel->napi_str.state = 0;
- memset(&channel->eventq, 0, sizeof(channel->eventq));
-
- for (j = 0; j < EFX_TXQ_TYPES; j++) {
- tx_queue = &channel->tx_queue[j];
- if (tx_queue->channel)
- tx_queue->channel = channel;
- tx_queue->buffer = NULL;
- memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
- }
-
- rx_queue = &channel->rx_queue;
- rx_queue->buffer = NULL;
- memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
- timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
-#ifdef CONFIG_RFS_ACCEL
- INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
-#endif
-
- return channel;
-}
-
-static int efx_probe_channel(struct efx_channel *channel)
-{
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- int rc;
-
- netif_dbg(channel->efx, probe, channel->efx->net_dev,
- "creating channel %d\n", channel->channel);
-
- rc = channel->type->pre_probe(channel);
- if (rc)
- goto fail;
-
- rc = efx_probe_eventq(channel);
- if (rc)
- goto fail;
-
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- rc = efx_probe_tx_queue(tx_queue);
- if (rc)
- goto fail;
- }
-
- efx_for_each_channel_rx_queue(rx_queue, channel) {
- rc = efx_probe_rx_queue(rx_queue);
- if (rc)
- goto fail;
- }
-
- channel->rx_list = NULL;
-
- return 0;
-
-fail:
- efx_remove_channel(channel);
- return rc;
-}
-
-static void
-efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
-{
- struct efx_nic *efx = channel->efx;
- const char *type;
- int number;
-
- number = channel->channel;
-
- if (number >= efx->xdp_channel_offset &&
- !WARN_ON_ONCE(!efx->n_xdp_channels)) {
- type = "-xdp";
- number -= efx->xdp_channel_offset;
- } else if (efx->tx_channel_offset == 0) {
- type = "";
- } else if (number < efx->tx_channel_offset) {
- type = "-rx";
- } else {
- type = "-tx";
- number -= efx->tx_channel_offset;
- }
- snprintf(buf, len, "%s%s-%d", efx->name, type, number);
-}
-
-static void efx_set_channel_names(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- channel->type->get_name(channel,
- efx->msi_context[channel->channel].name,
- sizeof(efx->msi_context[0].name));
-}
-
-static int efx_probe_channels(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- int rc;
-
- /* Restart special buffer allocation */
- efx->next_buffer_table = 0;
-
- /* Probe channels in reverse, so that any 'extra' channels
- * use the start of the buffer table. This allows the traffic
- * channels to be resized without moving them or wasting the
- * entries before them.
- */
- efx_for_each_channel_rev(channel, efx) {
- rc = efx_probe_channel(channel);
- if (rc) {
- netif_err(efx, probe, efx->net_dev,
- "failed to create channel %d\n",
- channel->channel);
- goto fail;
- }
- }
- efx_set_channel_names(efx);
-
- return 0;
-
-fail:
- efx_remove_channels(efx);
- return rc;
-}
-
-/* Channels are shutdown and reinitialised whilst the NIC is running
- * to propagate configuration changes (mtu, checksum offload), or
- * to clear hardware error conditions
- */
-static void efx_start_datapath(struct efx_nic *efx)
-{
- netdev_features_t old_features = efx->net_dev->features;
- bool old_rx_scatter = efx->rx_scatter;
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- struct efx_channel *channel;
- size_t rx_buf_len;
-
- /* Calculate the rx buffer allocation parameters required to
- * support the current MTU, including padding for header
- * alignment and overruns.
- */
- efx->rx_dma_len = (efx->rx_prefix_size +
- EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
- efx->type->rx_buffer_padding);
- rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM +
- efx->rx_ip_align + efx->rx_dma_len);
- if (rx_buf_len <= PAGE_SIZE) {
- efx->rx_scatter = efx->type->always_rx_scatter;
- efx->rx_buffer_order = 0;
- } else if (efx->type->can_rx_scatter) {
- BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
- BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
- 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
- EFX_RX_BUF_ALIGNMENT) >
- PAGE_SIZE);
- efx->rx_scatter = true;
- efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
- efx->rx_buffer_order = 0;
- } else {
- efx->rx_scatter = false;
- efx->rx_buffer_order = get_order(rx_buf_len);
- }
-
- efx_rx_config_page_split(efx);
- if (efx->rx_buffer_order)
- netif_dbg(efx, drv, efx->net_dev,
- "RX buf len=%u; page order=%u batch=%u\n",
- efx->rx_dma_len, efx->rx_buffer_order,
- efx->rx_pages_per_batch);
- else
- netif_dbg(efx, drv, efx->net_dev,
- "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
- efx->rx_dma_len, efx->rx_page_buf_step,
- efx->rx_bufs_per_page, efx->rx_pages_per_batch);
-
- /* Restore previously fixed features in hw_features and remove
- * features which are fixed now
- */
- efx->net_dev->hw_features |= efx->net_dev->features;
- efx->net_dev->hw_features &= ~efx->fixed_features;
- efx->net_dev->features |= efx->fixed_features;
- if (efx->net_dev->features != old_features)
- netdev_features_change(efx->net_dev);
-
- /* RX filters may also have scatter-enabled flags */
- if (efx->rx_scatter != old_rx_scatter)
- efx->type->filter_update_rx_scatter(efx);
-
- /* We must keep at least one descriptor in a TX ring empty.
- * We could avoid this when the queue size does not exactly
- * match the hardware ring size, but it's not that important.
- * Therefore we stop the queue when one more skb might fill
- * the ring completely. We wake it when half way back to
- * empty.
- */
- efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
- efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
-
- /* Initialise the channels */
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- efx_init_tx_queue(tx_queue);
- atomic_inc(&efx->active_queues);
- }
-
- efx_for_each_channel_rx_queue(rx_queue, channel) {
- efx_init_rx_queue(rx_queue);
- atomic_inc(&efx->active_queues);
- efx_stop_eventq(channel);
- efx_fast_push_rx_descriptors(rx_queue, false);
- efx_start_eventq(channel);
- }
-
- WARN_ON(channel->rx_pkt_n_frags);
- }
-
- efx_ptp_start_datapath(efx);
-
- if (netif_device_present(efx->net_dev))
- netif_tx_wake_all_queues(efx->net_dev);
-}
-
-static void efx_stop_datapath(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- int rc;
-
- EFX_ASSERT_RESET_SERIALISED(efx);
- BUG_ON(efx->port_enabled);
-
- efx_ptp_stop_datapath(efx);
-
- /* Stop RX refill */
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_rx_queue(rx_queue, channel)
- rx_queue->refill_enabled = false;
- }
-
- efx_for_each_channel(channel, efx) {
- /* RX packet processing is pipelined, so wait for the
- * NAPI handler to complete. At least event queue 0
- * might be kept active by non-data events, so don't
- * use napi_synchronize() but actually disable NAPI
- * temporarily.
- */
- if (efx_channel_has_rx_queue(channel)) {
- efx_stop_eventq(channel);
- efx_start_eventq(channel);
- }
- }
-
- rc = efx->type->fini_dmaq(efx);
- if (rc) {
- netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
- } else {
- netif_dbg(efx, drv, efx->net_dev,
- "successfully flushed all queues\n");
- }
-
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_rx_queue(rx_queue, channel)
- efx_fini_rx_queue(rx_queue);
- efx_for_each_possible_channel_tx_queue(tx_queue, channel)
- efx_fini_tx_queue(tx_queue);
- }
- efx->xdp_rxq_info_failed = false;
-}
-
-static void efx_remove_channel(struct efx_channel *channel)
-{
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
-
- netif_dbg(channel->efx, drv, channel->efx->net_dev,
- "destroy chan %d\n", channel->channel);
-
- efx_for_each_channel_rx_queue(rx_queue, channel)
- efx_remove_rx_queue(rx_queue);
- efx_for_each_possible_channel_tx_queue(tx_queue, channel)
- efx_remove_tx_queue(tx_queue);
- efx_remove_eventq(channel);
- channel->type->post_remove(channel);
-}
-
-static void efx_remove_channels(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- efx_remove_channel(channel);
-
- kfree(efx->xdp_tx_queues);
-}
-
-int
-efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
-{
- struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
- u32 old_rxq_entries, old_txq_entries;
- unsigned i, next_buffer_table = 0;
- int rc, rc2;
-
- rc = efx_check_disabled(efx);
- if (rc)
- return rc;
-
- /* Not all channels should be reallocated. We must avoid
- * reallocating their buffer table entries.
- */
- efx_for_each_channel(channel, efx) {
- struct efx_rx_queue *rx_queue;
- struct efx_tx_queue *tx_queue;
-
- if (channel->type->copy)
- continue;
- next_buffer_table = max(next_buffer_table,
- channel->eventq.index +
- channel->eventq.entries);
- efx_for_each_channel_rx_queue(rx_queue, channel)
- next_buffer_table = max(next_buffer_table,
- rx_queue->rxd.index +
- rx_queue->rxd.entries);
- efx_for_each_channel_tx_queue(tx_queue, channel)
- next_buffer_table = max(next_buffer_table,
- tx_queue->txd.index +
- tx_queue->txd.entries);
- }
-
- efx_device_detach_sync(efx);
- efx_stop_all(efx);
- efx_soft_disable_interrupts(efx);
-
- /* Clone channels (where possible) */
- memset(other_channel, 0, sizeof(other_channel));
- for (i = 0; i < efx->n_channels; i++) {
- channel = efx->channel[i];
- if (channel->type->copy)
- channel = channel->type->copy(channel);
- if (!channel) {
- rc = -ENOMEM;
- goto out;
- }
- other_channel[i] = channel;
- }
-
- /* Swap entry counts and channel pointers */
- old_rxq_entries = efx->rxq_entries;
- old_txq_entries = efx->txq_entries;
- efx->rxq_entries = rxq_entries;
- efx->txq_entries = txq_entries;
- for (i = 0; i < efx->n_channels; i++) {
- channel = efx->channel[i];
- efx->channel[i] = other_channel[i];
- other_channel[i] = channel;
- }
-
- /* Restart buffer table allocation */
- efx->next_buffer_table = next_buffer_table;
-
- for (i = 0; i < efx->n_channels; i++) {
- channel = efx->channel[i];
- if (!channel->type->copy)
- continue;
- rc = efx_probe_channel(channel);
- if (rc)
- goto rollback;
- efx_init_napi_channel(efx->channel[i]);
- }
-
-out:
- /* Destroy unused channel structures */
- for (i = 0; i < efx->n_channels; i++) {
- channel = other_channel[i];
- if (channel && channel->type->copy) {
- efx_fini_napi_channel(channel);
- efx_remove_channel(channel);
- kfree(channel);
- }
- }
-
- rc2 = efx_soft_enable_interrupts(efx);
- if (rc2) {
- rc = rc ? rc : rc2;
- netif_err(efx, drv, efx->net_dev,
- "unable to restart interrupts on channel reallocation\n");
- efx_schedule_reset(efx, RESET_TYPE_DISABLE);
- } else {
- efx_start_all(efx);
- efx_device_attach_if_not_resetting(efx);
- }
- return rc;
-
-rollback:
- /* Swap back */
- efx->rxq_entries = old_rxq_entries;
- efx->txq_entries = old_txq_entries;
- for (i = 0; i < efx->n_channels; i++) {
- channel = efx->channel[i];
- efx->channel[i] = other_channel[i];
- other_channel[i] = channel;
- }
- goto out;
-}
-
-void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
-{
- mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
-}
-
-static bool efx_default_channel_want_txqs(struct efx_channel *channel)
-{
- return channel->channel - channel->efx->tx_channel_offset <
- channel->efx->n_tx_channels;
-}
-
-static const struct efx_channel_type efx_default_channel_type = {
- .pre_probe = efx_channel_dummy_op_int,
- .post_remove = efx_channel_dummy_op_void,
- .get_name = efx_get_channel_name,
- .copy = efx_copy_channel,
- .want_txqs = efx_default_channel_want_txqs,
- .keep_eventq = false,
- .want_pio = true,
-};
-
-int efx_channel_dummy_op_int(struct efx_channel *channel)
-{
- return 0;
-}
-
-void efx_channel_dummy_op_void(struct efx_channel *channel)
-{
-}
-
/**************************************************************************
*
* Port handling
*
**************************************************************************/
-/* This ensures that the kernel is kept informed (via
- * netif_carrier_on/off) of the link status, and also maintains the
- * link status's stop on the port's TX queue.
- */
-void efx_link_status_changed(struct efx_nic *efx)
-{
- struct efx_link_state *link_state = &efx->link_state;
-
- /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
- * that no events are triggered between unregister_netdev() and the
- * driver unloading. A more general condition is that NETDEV_CHANGE
- * can only be generated between NETDEV_UP and NETDEV_DOWN */
- if (!netif_running(efx->net_dev))
- return;
-
- if (link_state->up != netif_carrier_ok(efx->net_dev)) {
- efx->n_link_state_changes++;
-
- if (link_state->up)
- netif_carrier_on(efx->net_dev);
- else
- netif_carrier_off(efx->net_dev);
- }
-
- /* Status message for kernel log */
- if (link_state->up)
- netif_info(efx, link, efx->net_dev,
- "link up at %uMbps %s-duplex (MTU %d)\n",
- link_state->speed, link_state->fd ? "full" : "half",
- efx->net_dev->mtu);
- else
- netif_info(efx, link, efx->net_dev, "link down\n");
-}
-
-void efx_link_set_advertising(struct efx_nic *efx,
- const unsigned long *advertising)
-{
- memcpy(efx->link_advertising, advertising,
- sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
-
- efx->link_advertising[0] |= ADVERTISED_Autoneg;
- if (advertising[0] & ADVERTISED_Pause)
- efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
- else
- efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
- if (advertising[0] & ADVERTISED_Asym_Pause)
- efx->wanted_fc ^= EFX_FC_TX;
-}
-
/* Equivalent to efx_link_set_advertising with all-zeroes, except does not
* force the Autoneg bit on.
*/
@@ -1035,73 +161,6 @@ void efx_link_set_wanted_fc(struct efx_nic *efx, u8 wanted_fc)
static void efx_fini_port(struct efx_nic *efx);
-/* We assume that efx->type->reconfigure_mac will always try to sync RX
- * filters and therefore needs to read-lock the filter table against freeing
- */
-void efx_mac_reconfigure(struct efx_nic *efx)
-{
- down_read(&efx->filter_sem);
- efx->type->reconfigure_mac(efx);
- up_read(&efx->filter_sem);
-}
-
-/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
- * the MAC appropriately. All other PHY configuration changes are pushed
- * through phy_op->set_settings(), and pushed asynchronously to the MAC
- * through efx_monitor().
- *
- * Callers must hold the mac_lock
- */
-int __efx_reconfigure_port(struct efx_nic *efx)
-{
- enum efx_phy_mode phy_mode;
- int rc;
-
- WARN_ON(!mutex_is_locked(&efx->mac_lock));
-
- /* Disable PHY transmit in mac level loopbacks */
- phy_mode = efx->phy_mode;
- if (LOOPBACK_INTERNAL(efx))
- efx->phy_mode |= PHY_MODE_TX_DISABLED;
- else
- efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
-
- rc = efx->type->reconfigure_port(efx);
-
- if (rc)
- efx->phy_mode = phy_mode;
-
- return rc;
-}
-
-/* Reinitialise the MAC to pick up new PHY settings, even if the port is
- * disabled. */
-int efx_reconfigure_port(struct efx_nic *efx)
-{
- int rc;
-
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- mutex_lock(&efx->mac_lock);
- rc = __efx_reconfigure_port(efx);
- mutex_unlock(&efx->mac_lock);
-
- return rc;
-}
-
-/* Asynchronous work item for changing MAC promiscuity and multicast
- * hash. Avoid a drain/rx_ingress enable by reconfiguring the current
- * MAC directly. */
-static void efx_mac_work(struct work_struct *data)
-{
- struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
-
- mutex_lock(&efx->mac_lock);
- if (efx->port_enabled)
- efx_mac_reconfigure(efx);
- mutex_unlock(&efx->mac_lock);
-}
-
static int efx_probe_port(struct efx_nic *efx)
{
int rc;
@@ -1155,44 +214,6 @@ fail1:
return rc;
}
-static void efx_start_port(struct efx_nic *efx)
-{
- netif_dbg(efx, ifup, efx->net_dev, "start port\n");
- BUG_ON(efx->port_enabled);
-
- mutex_lock(&efx->mac_lock);
- efx->port_enabled = true;
-
- /* Ensure MAC ingress/egress is enabled */
- efx_mac_reconfigure(efx);
-
- mutex_unlock(&efx->mac_lock);
-}
-
-/* Cancel work for MAC reconfiguration, periodic hardware monitoring
- * and the async self-test, wait for them to finish and prevent them
- * being scheduled again. This doesn't cover online resets, which
- * should only be cancelled when removing the device.
- */
-static void efx_stop_port(struct efx_nic *efx)
-{
- netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
-
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- mutex_lock(&efx->mac_lock);
- efx->port_enabled = false;
- mutex_unlock(&efx->mac_lock);
-
- /* Serialise against efx_set_multicast_list() */
- netif_addr_lock_bh(efx->net_dev);
- netif_addr_unlock_bh(efx->net_dev);
-
- cancel_delayed_work_sync(&efx->monitor_work);
- efx_selftest_async_cancel(efx);
- cancel_work_sync(&efx->mac_work);
-}
-
static void efx_fini_port(struct efx_nic *efx)
{
netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
@@ -1291,583 +312,6 @@ static void efx_dissociate(struct efx_nic *efx)
}
}
-/* This configures the PCI device to enable I/O and DMA. */
-static int efx_init_io(struct efx_nic *efx)
-{
- struct pci_dev *pci_dev = efx->pci_dev;
- dma_addr_t dma_mask = efx->type->max_dma_mask;
- unsigned int mem_map_size = efx->type->mem_map_size(efx);
- int rc, bar;
-
- netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
-
- bar = efx->type->mem_bar(efx);
-
- rc = pci_enable_device(pci_dev);
- if (rc) {
- netif_err(efx, probe, efx->net_dev,
- "failed to enable PCI device\n");
- goto fail1;
- }
-
- pci_set_master(pci_dev);
-
- /* Set the PCI DMA mask. Try all possibilities from our genuine mask
- * down to 32 bits, because some architectures will allow 40 bit
- * masks event though they reject 46 bit masks.
- */
- while (dma_mask > 0x7fffffffUL) {
- rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
- if (rc == 0)
- break;
- dma_mask >>= 1;
- }
- if (rc) {
- netif_err(efx, probe, efx->net_dev,
- "could not find a suitable DMA mask\n");
- goto fail2;
- }
- netif_dbg(efx, probe, efx->net_dev,
- "using DMA mask %llx\n", (unsigned long long) dma_mask);
-
- efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
- rc = pci_request_region(pci_dev, bar, "sfc");
- if (rc) {
- netif_err(efx, probe, efx->net_dev,
- "request for memory BAR failed\n");
- rc = -EIO;
- goto fail3;
- }
- efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
- if (!efx->membase) {
- netif_err(efx, probe, efx->net_dev,
- "could not map memory BAR at %llx+%x\n",
- (unsigned long long)efx->membase_phys, mem_map_size);
- rc = -ENOMEM;
- goto fail4;
- }
- netif_dbg(efx, probe, efx->net_dev,
- "memory BAR at %llx+%x (virtual %p)\n",
- (unsigned long long)efx->membase_phys, mem_map_size,
- efx->membase);
-
- return 0;
-
- fail4:
- pci_release_region(efx->pci_dev, bar);
- fail3:
- efx->membase_phys = 0;
- fail2:
- pci_disable_device(efx->pci_dev);
- fail1:
- return rc;
-}
-
-static void efx_fini_io(struct efx_nic *efx)
-{
- int bar;
-
- netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
-
- if (efx->membase) {
- iounmap(efx->membase);
- efx->membase = NULL;
- }
-
- if (efx->membase_phys) {
- bar = efx->type->mem_bar(efx);
- pci_release_region(efx->pci_dev, bar);
- efx->membase_phys = 0;
- }
-
- /* Don't disable bus-mastering if VFs are assigned */
- if (!pci_vfs_assigned(efx->pci_dev))
- pci_disable_device(efx->pci_dev);
-}
-
-void efx_set_default_rx_indir_table(struct efx_nic *efx,
- struct efx_rss_context *ctx)
-{
- size_t i;
-
- for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
- ctx->rx_indir_table[i] =
- ethtool_rxfh_indir_default(i, efx->rss_spread);
-}
-
-static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
-{
- cpumask_var_t thread_mask;
- unsigned int count;
- int cpu;
-
- if (rss_cpus) {
- count = rss_cpus;
- } else {
- if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
- netif_warn(efx, probe, efx->net_dev,
- "RSS disabled due to allocation failure\n");
- return 1;
- }
-
- count = 0;
- for_each_online_cpu(cpu) {
- if (!cpumask_test_cpu(cpu, thread_mask)) {
- ++count;
- cpumask_or(thread_mask, thread_mask,
- topology_sibling_cpumask(cpu));
- }
- }
-
- free_cpumask_var(thread_mask);
- }
-
- if (count > EFX_MAX_RX_QUEUES) {
- netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
- "Reducing number of rx queues from %u to %u.\n",
- count, EFX_MAX_RX_QUEUES);
- count = EFX_MAX_RX_QUEUES;
- }
-
- /* If RSS is requested for the PF *and* VFs then we can't write RSS
- * table entries that are inaccessible to VFs
- */
-#ifdef CONFIG_SFC_SRIOV
- if (efx->type->sriov_wanted) {
- if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
- count > efx_vf_size(efx)) {
- netif_warn(efx, probe, efx->net_dev,
- "Reducing number of RSS channels from %u to %u for "
- "VF support. Increase vf-msix-limit to use more "
- "channels on the PF.\n",
- count, efx_vf_size(efx));
- count = efx_vf_size(efx);
- }
- }
-#endif
-
- return count;
-}
-
-static int efx_allocate_msix_channels(struct efx_nic *efx,
- unsigned int max_channels,
- unsigned int extra_channels,
- unsigned int parallelism)
-{
- unsigned int n_channels = parallelism;
- int vec_count;
- int n_xdp_tx;
- int n_xdp_ev;
-
- if (efx_separate_tx_channels)
- n_channels *= 2;
- n_channels += extra_channels;
-
- /* To allow XDP transmit to happen from arbitrary NAPI contexts
- * we allocate a TX queue per CPU. We share event queues across
- * multiple tx queues, assuming tx and ev queues are both
- * maximum size.
- */
-
- n_xdp_tx = num_possible_cpus();
- n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
-
- /* Check resources.
- * We need a channel per event queue, plus a VI per tx queue.
- * This may be more pessimistic than it needs to be.
- */
- if (n_channels + n_xdp_ev > max_channels) {
- netif_err(efx, drv, efx->net_dev,
- "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
- n_xdp_ev, n_channels, max_channels);
- efx->n_xdp_channels = 0;
- efx->xdp_tx_per_channel = 0;
- efx->xdp_tx_queue_count = 0;
- } else {
- efx->n_xdp_channels = n_xdp_ev;
- efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
- efx->xdp_tx_queue_count = n_xdp_tx;
- n_channels += n_xdp_ev;
- netif_dbg(efx, drv, efx->net_dev,
- "Allocating %d TX and %d event queues for XDP\n",
- n_xdp_tx, n_xdp_ev);
- }
-
- n_channels = min(n_channels, max_channels);
-
- vec_count = pci_msix_vec_count(efx->pci_dev);
- if (vec_count < 0)
- return vec_count;
- if (vec_count < n_channels) {
- netif_err(efx, drv, efx->net_dev,
- "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
- vec_count, n_channels);
- netif_err(efx, drv, efx->net_dev,
- "WARNING: Performance may be reduced.\n");
- n_channels = vec_count;
- }
-
- efx->n_channels = n_channels;
-
- /* Do not create the PTP TX queue(s) if PTP uses the MC directly. */
- if (extra_channels && !efx_ptp_use_mac_tx_timestamps(efx))
- n_channels--;
-
- /* Ignore XDP tx channels when creating rx channels. */
- n_channels -= efx->n_xdp_channels;
-
- if (efx_separate_tx_channels) {
- efx->n_tx_channels =
- min(max(n_channels / 2, 1U),
- efx->max_tx_channels);
- efx->tx_channel_offset =
- n_channels - efx->n_tx_channels;
- efx->n_rx_channels =
- max(n_channels -
- efx->n_tx_channels, 1U);
- } else {
- efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
- efx->tx_channel_offset = 0;
- efx->n_rx_channels = n_channels;
- }
-
- if (efx->n_xdp_channels)
- efx->xdp_channel_offset = efx->tx_channel_offset +
- efx->n_tx_channels;
- else
- efx->xdp_channel_offset = efx->n_channels;
-
- netif_dbg(efx, drv, efx->net_dev,
- "Allocating %u RX channels\n",
- efx->n_rx_channels);
-
- return efx->n_channels;
-}
-
-/* Probe the number and type of interrupts we are able to obtain, and
- * the resulting numbers of channels and RX queues.
- */
-static int efx_probe_interrupts(struct efx_nic *efx)
-{
- unsigned int extra_channels = 0;
- unsigned int i, j;
- int rc;
-
- for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
- if (efx->extra_channel_type[i])
- ++extra_channels;
-
- if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
- unsigned int parallelism = efx_wanted_parallelism(efx);
- struct msix_entry xentries[EFX_MAX_CHANNELS];
- unsigned int n_channels;
-
- rc = efx_allocate_msix_channels(efx, efx->max_channels,
- extra_channels, parallelism);
- if (rc >= 0) {
- n_channels = rc;
- for (i = 0; i < n_channels; i++)
- xentries[i].entry = i;
- rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
- n_channels);
- }
- if (rc < 0) {
- /* Fall back to single channel MSI */
- netif_err(efx, drv, efx->net_dev,
- "could not enable MSI-X\n");
- if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
- efx->interrupt_mode = EFX_INT_MODE_MSI;
- else
- return rc;
- } else if (rc < n_channels) {
- netif_err(efx, drv, efx->net_dev,
- "WARNING: Insufficient MSI-X vectors"
- " available (%d < %u).\n", rc, n_channels);
- netif_err(efx, drv, efx->net_dev,
- "WARNING: Performance may be reduced.\n");
- n_channels = rc;
- }
-
- if (rc > 0) {
- for (i = 0; i < efx->n_channels; i++)
- efx_get_channel(efx, i)->irq =
- xentries[i].vector;
- }
- }
-
- /* Try single interrupt MSI */
- if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
- efx->n_channels = 1;
- efx->n_rx_channels = 1;
- efx->n_tx_channels = 1;
- efx->n_xdp_channels = 0;
- efx->xdp_channel_offset = efx->n_channels;
- rc = pci_enable_msi(efx->pci_dev);
- if (rc == 0) {
- efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
- } else {
- netif_err(efx, drv, efx->net_dev,
- "could not enable MSI\n");
- if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
- efx->interrupt_mode = EFX_INT_MODE_LEGACY;
- else
- return rc;
- }
- }
-
- /* Assume legacy interrupts */
- if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
- efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
- efx->n_rx_channels = 1;
- efx->n_tx_channels = 1;
- efx->n_xdp_channels = 0;
- efx->xdp_channel_offset = efx->n_channels;
- efx->legacy_irq = efx->pci_dev->irq;
- }
-
- /* Assign extra channels if possible, before XDP channels */
- efx->n_extra_tx_channels = 0;
- j = efx->xdp_channel_offset;
- for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
- if (!efx->extra_channel_type[i])
- continue;
- if (efx->interrupt_mode != EFX_INT_MODE_MSIX ||
- efx->n_channels <= extra_channels) {
- efx->extra_channel_type[i]->handle_no_channel(efx);
- } else {
- --j;
- efx_get_channel(efx, j)->type =
- efx->extra_channel_type[i];
- if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
- efx->n_extra_tx_channels++;
- }
- }
-
- /* RSS might be usable on VFs even if it is disabled on the PF */
-#ifdef CONFIG_SFC_SRIOV
- if (efx->type->sriov_wanted) {
- efx->rss_spread = ((efx->n_rx_channels > 1 ||
- !efx->type->sriov_wanted(efx)) ?
- efx->n_rx_channels : efx_vf_size(efx));
- return 0;
- }
-#endif
- efx->rss_spread = efx->n_rx_channels;
-
- return 0;
-}
-
-#if defined(CONFIG_SMP)
-static void efx_set_interrupt_affinity(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- unsigned int cpu;
-
- efx_for_each_channel(channel, efx) {
- cpu = cpumask_local_spread(channel->channel,
- pcibus_to_node(efx->pci_dev->bus));
- irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
- }
-}
-
-static void efx_clear_interrupt_affinity(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- irq_set_affinity_hint(channel->irq, NULL);
-}
-#else
-static void
-efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
-{
-}
-
-static void
-efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
-{
-}
-#endif /* CONFIG_SMP */
-
-static int efx_soft_enable_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel, *end_channel;
- int rc;
-
- BUG_ON(efx->state == STATE_DISABLED);
-
- efx->irq_soft_enabled = true;
- smp_wmb();
-
- efx_for_each_channel(channel, efx) {
- if (!channel->type->keep_eventq) {
- rc = efx_init_eventq(channel);
- if (rc)
- goto fail;
- }
- efx_start_eventq(channel);
- }
-
- efx_mcdi_mode_event(efx);
-
- return 0;
-fail:
- end_channel = channel;
- efx_for_each_channel(channel, efx) {
- if (channel == end_channel)
- break;
- efx_stop_eventq(channel);
- if (!channel->type->keep_eventq)
- efx_fini_eventq(channel);
- }
-
- return rc;
-}
-
-static void efx_soft_disable_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- if (efx->state == STATE_DISABLED)
- return;
-
- efx_mcdi_mode_poll(efx);
-
- efx->irq_soft_enabled = false;
- smp_wmb();
-
- if (efx->legacy_irq)
- synchronize_irq(efx->legacy_irq);
-
- efx_for_each_channel(channel, efx) {
- if (channel->irq)
- synchronize_irq(channel->irq);
-
- efx_stop_eventq(channel);
- if (!channel->type->keep_eventq)
- efx_fini_eventq(channel);
- }
-
- /* Flush the asynchronous MCDI request queue */
- efx_mcdi_flush_async(efx);
-}
-
-static int efx_enable_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel, *end_channel;
- int rc;
-
- BUG_ON(efx->state == STATE_DISABLED);
-
- if (efx->eeh_disabled_legacy_irq) {
- enable_irq(efx->legacy_irq);
- efx->eeh_disabled_legacy_irq = false;
- }
-
- efx->type->irq_enable_master(efx);
-
- efx_for_each_channel(channel, efx) {
- if (channel->type->keep_eventq) {
- rc = efx_init_eventq(channel);
- if (rc)
- goto fail;
- }
- }
-
- rc = efx_soft_enable_interrupts(efx);
- if (rc)
- goto fail;
-
- return 0;
-
-fail:
- end_channel = channel;
- efx_for_each_channel(channel, efx) {
- if (channel == end_channel)
- break;
- if (channel->type->keep_eventq)
- efx_fini_eventq(channel);
- }
-
- efx->type->irq_disable_non_ev(efx);
-
- return rc;
-}
-
-static void efx_disable_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_soft_disable_interrupts(efx);
-
- efx_for_each_channel(channel, efx) {
- if (channel->type->keep_eventq)
- efx_fini_eventq(channel);
- }
-
- efx->type->irq_disable_non_ev(efx);
-}
-
-static void efx_remove_interrupts(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- /* Remove MSI/MSI-X interrupts */
- efx_for_each_channel(channel, efx)
- channel->irq = 0;
- pci_disable_msi(efx->pci_dev);
- pci_disable_msix(efx->pci_dev);
-
- /* Remove legacy interrupt */
- efx->legacy_irq = 0;
-}
-
-static int efx_set_channels(struct efx_nic *efx)
-{
- struct efx_channel *channel;
- struct efx_tx_queue *tx_queue;
- int xdp_queue_number;
-
- efx->tx_channel_offset =
- efx_separate_tx_channels ?
- efx->n_channels - efx->n_tx_channels : 0;
-
- if (efx->xdp_tx_queue_count) {
- EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
-
- /* Allocate array for XDP TX queue lookup. */
- efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
- sizeof(*efx->xdp_tx_queues),
- GFP_KERNEL);
- if (!efx->xdp_tx_queues)
- return -ENOMEM;
- }
-
- /* We need to mark which channels really have RX and TX
- * queues, and adjust the TX queue numbers if we have separate
- * RX-only and TX-only channels.
- */
- xdp_queue_number = 0;
- efx_for_each_channel(channel, efx) {
- if (channel->channel < efx->n_rx_channels)
- channel->rx_queue.core_index = channel->channel;
- else
- channel->rx_queue.core_index = -1;
-
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- tx_queue->queue -= (efx->tx_channel_offset *
- EFX_TXQ_TYPES);
-
- if (efx_channel_is_xdp_tx(channel) &&
- xdp_queue_number < efx->xdp_tx_queue_count) {
- efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
- xdp_queue_number++;
- }
- }
- }
- return 0;
-}
-
static int efx_probe_nic(struct efx_nic *efx)
{
int rc;
@@ -1940,70 +384,6 @@ static void efx_remove_nic(struct efx_nic *efx)
efx->type->remove(efx);
}
-static int efx_probe_filters(struct efx_nic *efx)
-{
- int rc;
-
- init_rwsem(&efx->filter_sem);
- mutex_lock(&efx->mac_lock);
- down_write(&efx->filter_sem);
- rc = efx->type->filter_table_probe(efx);
- if (rc)
- goto out_unlock;
-
-#ifdef CONFIG_RFS_ACCEL
- if (efx->type->offload_features & NETIF_F_NTUPLE) {
- struct efx_channel *channel;
- int i, success = 1;
-
- efx_for_each_channel(channel, efx) {
- channel->rps_flow_id =
- kcalloc(efx->type->max_rx_ip_filters,
- sizeof(*channel->rps_flow_id),
- GFP_KERNEL);
- if (!channel->rps_flow_id)
- success = 0;
- else
- for (i = 0;
- i < efx->type->max_rx_ip_filters;
- ++i)
- channel->rps_flow_id[i] =
- RPS_FLOW_ID_INVALID;
- channel->rfs_expire_index = 0;
- channel->rfs_filter_count = 0;
- }
-
- if (!success) {
- efx_for_each_channel(channel, efx)
- kfree(channel->rps_flow_id);
- efx->type->filter_table_remove(efx);
- rc = -ENOMEM;
- goto out_unlock;
- }
- }
-#endif
-out_unlock:
- up_write(&efx->filter_sem);
- mutex_unlock(&efx->mac_lock);
- return rc;
-}
-
-static void efx_remove_filters(struct efx_nic *efx)
-{
-#ifdef CONFIG_RFS_ACCEL
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx) {
- cancel_delayed_work_sync(&channel->filter_work);
- kfree(channel->rps_flow_id);
- }
-#endif
- down_write(&efx->filter_sem);
- efx->type->filter_table_remove(efx);
- up_write(&efx->filter_sem);
-}
-
-
/**************************************************************************
*
* NIC startup/shutdown
@@ -2068,81 +448,6 @@ static int efx_probe_all(struct efx_nic *efx)
return rc;
}
-/* If the interface is supposed to be running but is not, start
- * the hardware and software data path, regular activity for the port
- * (MAC statistics, link polling, etc.) and schedule the port to be
- * reconfigured. Interrupts must already be enabled. This function
- * is safe to call multiple times, so long as the NIC is not disabled.
- * Requires the RTNL lock.
- */
-static void efx_start_all(struct efx_nic *efx)
-{
- EFX_ASSERT_RESET_SERIALISED(efx);
- BUG_ON(efx->state == STATE_DISABLED);
-
- /* Check that it is appropriate to restart the interface. All
- * of these flags are safe to read under just the rtnl lock */
- if (efx->port_enabled || !netif_running(efx->net_dev) ||
- efx->reset_pending)
- return;
-
- efx_start_port(efx);
- efx_start_datapath(efx);
-
- /* Start the hardware monitor if there is one */
- if (efx->type->monitor != NULL)
- queue_delayed_work(efx->workqueue, &efx->monitor_work,
- efx_monitor_interval);
-
- /* Link state detection is normally event-driven; we have
- * to poll now because we could have missed a change
- */
- mutex_lock(&efx->mac_lock);
- if (efx->phy_op->poll(efx))
- efx_link_status_changed(efx);
- mutex_unlock(&efx->mac_lock);
-
- efx->type->start_stats(efx);
- efx->type->pull_stats(efx);
- spin_lock_bh(&efx->stats_lock);
- efx->type->update_stats(efx, NULL, NULL);
- spin_unlock_bh(&efx->stats_lock);
-}
-
-/* Quiesce the hardware and software data path, and regular activity
- * for the port without bringing the link down. Safe to call multiple
- * times with the NIC in almost any state, but interrupts should be
- * enabled. Requires the RTNL lock.
- */
-static void efx_stop_all(struct efx_nic *efx)
-{
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- /* port_enabled can be read safely under the rtnl lock */
- if (!efx->port_enabled)
- return;
-
- /* update stats before we go down so we can accurately count
- * rx_nodesc_drops
- */
- efx->type->pull_stats(efx);
- spin_lock_bh(&efx->stats_lock);
- efx->type->update_stats(efx, NULL, NULL);
- spin_unlock_bh(&efx->stats_lock);
- efx->type->stop_stats(efx);
- efx_stop_port(efx);
-
- /* Stop the kernel transmit interface. This is only valid if
- * the device is stopped or detached; otherwise the watchdog
- * may fire immediately.
- */
- WARN_ON(netif_running(efx->net_dev) &&
- netif_device_present(efx->net_dev));
- netif_tx_disable(efx->net_dev);
-
- efx_stop_datapath(efx);
-}
-
static void efx_remove_all(struct efx_nic *efx)
{
rtnl_lock();
@@ -2238,36 +543,6 @@ void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
/**************************************************************************
*
- * Hardware monitor
- *
- **************************************************************************/
-
-/* Run periodically off the general workqueue */
-static void efx_monitor(struct work_struct *data)
-{
- struct efx_nic *efx = container_of(data, struct efx_nic,
- monitor_work.work);
-
- netif_vdbg(efx, timer, efx->net_dev,
- "hardware monitor executing on CPU %d\n",
- raw_smp_processor_id());
- BUG_ON(efx->type->monitor == NULL);
-
- /* If the mac_lock is already held then it is likely a port
- * reconfiguration is already in place, which will likely do
- * most of the work of monitor() anyway. */
- if (mutex_trylock(&efx->mac_lock)) {
- if (efx->port_enabled)
- efx->type->monitor(efx);
- mutex_unlock(&efx->mac_lock);
- }
-
- queue_delayed_work(efx->workqueue, &efx->monitor_work,
- efx_monitor_interval);
-}
-
-/**************************************************************************
- *
* ioctls
*
*************************************************************************/
@@ -2295,45 +570,6 @@ static int efx_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
/**************************************************************************
*
- * NAPI interface
- *
- **************************************************************************/
-
-static void efx_init_napi_channel(struct efx_channel *channel)
-{
- struct efx_nic *efx = channel->efx;
-
- channel->napi_dev = efx->net_dev;
- netif_napi_add(channel->napi_dev, &channel->napi_str,
- efx_poll, napi_weight);
-}
-
-static void efx_init_napi(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- efx_init_napi_channel(channel);
-}
-
-static void efx_fini_napi_channel(struct efx_channel *channel)
-{
- if (channel->napi_dev)
- netif_napi_del(&channel->napi_str);
-
- channel->napi_dev = NULL;
-}
-
-static void efx_fini_napi(struct efx_nic *efx)
-{
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx)
- efx_fini_napi_channel(channel);
-}
-
-/**************************************************************************
- *
* Kernel net device interface
*
*************************************************************************/
@@ -2383,19 +619,8 @@ int efx_net_stop(struct net_device *net_dev)
return 0;
}
-/* Context: process, dev_base_lock or RTNL held, non-blocking. */
-static void efx_net_stats(struct net_device *net_dev,
- struct rtnl_link_stats64 *stats)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
-
- spin_lock_bh(&efx->stats_lock);
- efx->type->update_stats(efx, NULL, stats);
- spin_unlock_bh(&efx->stats_lock);
-}
-
/* Context: netif_tx_lock held, BHs disabled. */
-static void efx_watchdog(struct net_device *net_dev)
+static void efx_watchdog(struct net_device *net_dev, unsigned int txqueue)
{
struct efx_nic *efx = netdev_priv(net_dev);
@@ -2406,51 +631,6 @@ static void efx_watchdog(struct net_device *net_dev)
efx_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
}
-static unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
-{
- /* The maximum MTU that we can fit in a single page, allowing for
- * framing, overhead and XDP headroom.
- */
- int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
- efx->rx_prefix_size + efx->type->rx_buffer_padding +
- efx->rx_ip_align + XDP_PACKET_HEADROOM;
-
- return PAGE_SIZE - overhead;
-}
-
-/* Context: process, rtnl_lock() held. */
-static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- int rc;
-
- rc = efx_check_disabled(efx);
- if (rc)
- return rc;
-
- if (rtnl_dereference(efx->xdp_prog) &&
- new_mtu > efx_xdp_max_mtu(efx)) {
- netif_err(efx, drv, efx->net_dev,
- "Requested MTU of %d too big for XDP (max: %d)\n",
- new_mtu, efx_xdp_max_mtu(efx));
- return -EINVAL;
- }
-
- netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
-
- efx_device_detach_sync(efx);
- efx_stop_all(efx);
-
- mutex_lock(&efx->mac_lock);
- net_dev->mtu = new_mtu;
- efx_mac_reconfigure(efx);
- mutex_unlock(&efx->mac_lock);
-
- efx_start_all(efx);
- efx_device_attach_if_not_resetting(efx);
- return 0;
-}
-
static int efx_set_mac_address(struct net_device *net_dev, void *data)
{
struct efx_nic *efx = netdev_priv(net_dev);
@@ -2727,28 +907,6 @@ show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
}
static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
-#ifdef CONFIG_SFC_MCDI_LOGGING
-static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct efx_nic *efx = dev_get_drvdata(dev);
- struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
-
- return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
-}
-static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
- const char *buf, size_t count)
-{
- struct efx_nic *efx = dev_get_drvdata(dev);
- struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
- bool enable = count > 0 && *buf != '0';
-
- mcdi->logging_enabled = enable;
- return count;
-}
-static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
-#endif
-
static int efx_register_netdev(struct efx_nic *efx)
{
struct net_device *net_dev = efx->net_dev;
@@ -2808,21 +966,11 @@ static int efx_register_netdev(struct efx_nic *efx)
"failed to init net dev attributes\n");
goto fail_registered;
}
-#ifdef CONFIG_SFC_MCDI_LOGGING
- rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
- if (rc) {
- netif_err(efx, drv, efx->net_dev,
- "failed to init net dev attributes\n");
- goto fail_attr_mcdi_logging;
- }
-#endif
+
+ efx_init_mcdi_logging(efx);
return 0;
-#ifdef CONFIG_SFC_MCDI_LOGGING
-fail_attr_mcdi_logging:
- device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
-#endif
fail_registered:
rtnl_lock();
efx_dissociate(efx);
@@ -2843,9 +991,7 @@ static void efx_unregister_netdev(struct efx_nic *efx)
if (efx_dev_registered(efx)) {
strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
-#ifdef CONFIG_SFC_MCDI_LOGGING
- device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
-#endif
+ efx_fini_mcdi_logging(efx);
device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
unregister_netdev(efx->net_dev);
}
@@ -2853,292 +999,6 @@ static void efx_unregister_netdev(struct efx_nic *efx)
/**************************************************************************
*
- * Device reset and suspend
- *
- **************************************************************************/
-
-/* Tears down the entire software state and most of the hardware state
- * before reset. */
-void efx_reset_down(struct efx_nic *efx, enum reset_type method)
-{
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- if (method == RESET_TYPE_MCDI_TIMEOUT)
- efx->type->prepare_flr(efx);
-
- efx_stop_all(efx);
- efx_disable_interrupts(efx);
-
- mutex_lock(&efx->mac_lock);
- down_write(&efx->filter_sem);
- mutex_lock(&efx->rss_lock);
- if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
- method != RESET_TYPE_DATAPATH)
- efx->phy_op->fini(efx);
- efx->type->fini(efx);
-}
-
-/* This function will always ensure that the locks acquired in
- * efx_reset_down() are released. A failure return code indicates
- * that we were unable to reinitialise the hardware, and the
- * driver should be disabled. If ok is false, then the rx and tx
- * engines are not restarted, pending a RESET_DISABLE. */
-int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
-{
- int rc;
-
- EFX_ASSERT_RESET_SERIALISED(efx);
-
- if (method == RESET_TYPE_MCDI_TIMEOUT)
- efx->type->finish_flr(efx);
-
- /* Ensure that SRAM is initialised even if we're disabling the device */
- rc = efx->type->init(efx);
- if (rc) {
- netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
- goto fail;
- }
-
- if (!ok)
- goto fail;
-
- if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
- method != RESET_TYPE_DATAPATH) {
- rc = efx->phy_op->init(efx);
- if (rc)
- goto fail;
- rc = efx->phy_op->reconfigure(efx);
- if (rc && rc != -EPERM)
- netif_err(efx, drv, efx->net_dev,
- "could not restore PHY settings\n");
- }
-
- rc = efx_enable_interrupts(efx);
- if (rc)
- goto fail;
-
-#ifdef CONFIG_SFC_SRIOV
- rc = efx->type->vswitching_restore(efx);
- if (rc) /* not fatal; the PF will still work fine */
- netif_warn(efx, probe, efx->net_dev,
- "failed to restore vswitching rc=%d;"
- " VFs may not function\n", rc);
-#endif
-
- if (efx->type->rx_restore_rss_contexts)
- efx->type->rx_restore_rss_contexts(efx);
- mutex_unlock(&efx->rss_lock);
- efx->type->filter_table_restore(efx);
- up_write(&efx->filter_sem);
- if (efx->type->sriov_reset)
- efx->type->sriov_reset(efx);
-
- mutex_unlock(&efx->mac_lock);
-
- efx_start_all(efx);
-
- if (efx->type->udp_tnl_push_ports)
- efx->type->udp_tnl_push_ports(efx);
-
- return 0;
-
-fail:
- efx->port_initialized = false;
-
- mutex_unlock(&efx->rss_lock);
- up_write(&efx->filter_sem);
- mutex_unlock(&efx->mac_lock);
-
- return rc;
-}
-
-/* Reset the NIC using the specified method. Note that the reset may
- * fail, in which case the card will be left in an unusable state.
- *
- * Caller must hold the rtnl_lock.
- */
-int efx_reset(struct efx_nic *efx, enum reset_type method)
-{
- int rc, rc2;
- bool disabled;
-
- netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
- RESET_TYPE(method));
-
- efx_device_detach_sync(efx);
- efx_reset_down(efx, method);
-
- rc = efx->type->reset(efx, method);
- if (rc) {
- netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
- goto out;
- }
-
- /* Clear flags for the scopes we covered. We assume the NIC and
- * driver are now quiescent so that there is no race here.
- */
- if (method < RESET_TYPE_MAX_METHOD)
- efx->reset_pending &= -(1 << (method + 1));
- else /* it doesn't fit into the well-ordered scope hierarchy */
- __clear_bit(method, &efx->reset_pending);
-
- /* Reinitialise bus-mastering, which may have been turned off before
- * the reset was scheduled. This is still appropriate, even in the
- * RESET_TYPE_DISABLE since this driver generally assumes the hardware
- * can respond to requests. */
- pci_set_master(efx->pci_dev);
-
-out:
- /* Leave device stopped if necessary */
- disabled = rc ||
- method == RESET_TYPE_DISABLE ||
- method == RESET_TYPE_RECOVER_OR_DISABLE;
- rc2 = efx_reset_up(efx, method, !disabled);
- if (rc2) {
- disabled = true;
- if (!rc)
- rc = rc2;
- }
-
- if (disabled) {
- dev_close(efx->net_dev);
- netif_err(efx, drv, efx->net_dev, "has been disabled\n");
- efx->state = STATE_DISABLED;
- } else {
- netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
- efx_device_attach_if_not_resetting(efx);
- }
- return rc;
-}
-
-/* Try recovery mechanisms.
- * For now only EEH is supported.
- * Returns 0 if the recovery mechanisms are unsuccessful.
- * Returns a non-zero value otherwise.
- */
-int efx_try_recovery(struct efx_nic *efx)
-{
-#ifdef CONFIG_EEH
- /* A PCI error can occur and not be seen by EEH because nothing
- * happens on the PCI bus. In this case the driver may fail and
- * schedule a 'recover or reset', leading to this recovery handler.
- * Manually call the eeh failure check function.
- */
- struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
- if (eeh_dev_check_failure(eehdev)) {
- /* The EEH mechanisms will handle the error and reset the
- * device if necessary.
- */
- return 1;
- }
-#endif
- return 0;
-}
-
-static void efx_wait_for_bist_end(struct efx_nic *efx)
-{
- int i;
-
- for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
- if (efx_mcdi_poll_reboot(efx))
- goto out;
- msleep(BIST_WAIT_DELAY_MS);
- }
-
- netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
-out:
- /* Either way unset the BIST flag. If we found no reboot we probably
- * won't recover, but we should try.
- */
- efx->mc_bist_for_other_fn = false;
-}
-
-/* The worker thread exists so that code that cannot sleep can
- * schedule a reset for later.
- */
-static void efx_reset_work(struct work_struct *data)
-{
- struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
- unsigned long pending;
- enum reset_type method;
-
- pending = READ_ONCE(efx->reset_pending);
- method = fls(pending) - 1;
-
- if (method == RESET_TYPE_MC_BIST)
- efx_wait_for_bist_end(efx);
-
- if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
- method == RESET_TYPE_RECOVER_OR_ALL) &&
- efx_try_recovery(efx))
- return;
-
- if (!pending)
- return;
-
- rtnl_lock();
-
- /* We checked the state in efx_schedule_reset() but it may
- * have changed by now. Now that we have the RTNL lock,
- * it cannot change again.
- */
- if (efx->state == STATE_READY)
- (void)efx_reset(efx, method);
-
- rtnl_unlock();
-}
-
-void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
-{
- enum reset_type method;
-
- if (efx->state == STATE_RECOVERY) {
- netif_dbg(efx, drv, efx->net_dev,
- "recovering: skip scheduling %s reset\n",
- RESET_TYPE(type));
- return;
- }
-
- switch (type) {
- case RESET_TYPE_INVISIBLE:
- case RESET_TYPE_ALL:
- case RESET_TYPE_RECOVER_OR_ALL:
- case RESET_TYPE_WORLD:
- case RESET_TYPE_DISABLE:
- case RESET_TYPE_RECOVER_OR_DISABLE:
- case RESET_TYPE_DATAPATH:
- case RESET_TYPE_MC_BIST:
- case RESET_TYPE_MCDI_TIMEOUT:
- method = type;
- netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
- RESET_TYPE(method));
- break;
- default:
- method = efx->type->map_reset_reason(type);
- netif_dbg(efx, drv, efx->net_dev,
- "scheduling %s reset for %s\n",
- RESET_TYPE(method), RESET_TYPE(type));
- break;
- }
-
- set_bit(method, &efx->reset_pending);
- smp_mb(); /* ensure we change reset_pending before checking state */
-
- /* If we're not READY then just leave the flags set as the cue
- * to abort probing or reschedule the reset later.
- */
- if (READ_ONCE(efx->state) != STATE_READY)
- return;
-
- /* efx_process_channel() will no longer read events once a
- * reset is scheduled. So switch back to poll'd MCDI completions. */
- efx_mcdi_mode_poll(efx);
-
- queue_work(reset_workqueue, &efx->reset_work);
-}
-
-/**************************************************************************
- *
* List of NICs we support
*
**************************************************************************/
@@ -3170,139 +1030,10 @@ static const struct pci_device_id efx_pci_table[] = {
/**************************************************************************
*
- * Dummy PHY/MAC operations
- *
- * Can be used for some unimplemented operations
- * Needed so all function pointers are valid and do not have to be tested
- * before use
- *
- **************************************************************************/
-int efx_port_dummy_op_int(struct efx_nic *efx)
-{
- return 0;
-}
-void efx_port_dummy_op_void(struct efx_nic *efx) {}
-
-static bool efx_port_dummy_op_poll(struct efx_nic *efx)
-{
- return false;
-}
-
-static const struct efx_phy_operations efx_dummy_phy_operations = {
- .init = efx_port_dummy_op_int,
- .reconfigure = efx_port_dummy_op_int,
- .poll = efx_port_dummy_op_poll,
- .fini = efx_port_dummy_op_void,
-};
-
-/**************************************************************************
- *
* Data housekeeping
*
**************************************************************************/
-/* This zeroes out and then fills in the invariants in a struct
- * efx_nic (including all sub-structures).
- */
-static int efx_init_struct(struct efx_nic *efx,
- struct pci_dev *pci_dev, struct net_device *net_dev)
-{
- int rc = -ENOMEM, i;
-
- /* Initialise common structures */
- INIT_LIST_HEAD(&efx->node);
- INIT_LIST_HEAD(&efx->secondary_list);
- spin_lock_init(&efx->biu_lock);
-#ifdef CONFIG_SFC_MTD
- INIT_LIST_HEAD(&efx->mtd_list);
-#endif
- INIT_WORK(&efx->reset_work, efx_reset_work);
- INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
- INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
- efx->pci_dev = pci_dev;
- efx->msg_enable = debug;
- efx->state = STATE_UNINIT;
- strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
-
- efx->net_dev = net_dev;
- efx->rx_prefix_size = efx->type->rx_prefix_size;
- efx->rx_ip_align =
- NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
- efx->rx_packet_hash_offset =
- efx->type->rx_hash_offset - efx->type->rx_prefix_size;
- efx->rx_packet_ts_offset =
- efx->type->rx_ts_offset - efx->type->rx_prefix_size;
- INIT_LIST_HEAD(&efx->rss_context.list);
- mutex_init(&efx->rss_lock);
- spin_lock_init(&efx->stats_lock);
- efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
- efx->num_mac_stats = MC_CMD_MAC_NSTATS;
- BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
- mutex_init(&efx->mac_lock);
-#ifdef CONFIG_RFS_ACCEL
- mutex_init(&efx->rps_mutex);
- spin_lock_init(&efx->rps_hash_lock);
- /* Failure to allocate is not fatal, but may degrade ARFS performance */
- efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
- sizeof(*efx->rps_hash_table), GFP_KERNEL);
-#endif
- efx->phy_op = &efx_dummy_phy_operations;
- efx->mdio.dev = net_dev;
- INIT_WORK(&efx->mac_work, efx_mac_work);
- init_waitqueue_head(&efx->flush_wq);
-
- for (i = 0; i < EFX_MAX_CHANNELS; i++) {
- efx->channel[i] = efx_alloc_channel(efx, i, NULL);
- if (!efx->channel[i])
- goto fail;
- efx->msi_context[i].efx = efx;
- efx->msi_context[i].index = i;
- }
-
- /* Higher numbered interrupt modes are less capable! */
- if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
- efx->type->min_interrupt_mode)) {
- rc = -EIO;
- goto fail;
- }
- efx->interrupt_mode = max(efx->type->max_interrupt_mode,
- interrupt_mode);
- efx->interrupt_mode = min(efx->type->min_interrupt_mode,
- interrupt_mode);
-
- /* Would be good to use the net_dev name, but we're too early */
- snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
- pci_name(pci_dev));
- efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
- if (!efx->workqueue)
- goto fail;
-
- return 0;
-
-fail:
- efx_fini_struct(efx);
- return rc;
-}
-
-static void efx_fini_struct(struct efx_nic *efx)
-{
- int i;
-
-#ifdef CONFIG_RFS_ACCEL
- kfree(efx->rps_hash_table);
-#endif
-
- for (i = 0; i < EFX_MAX_CHANNELS; i++)
- kfree(efx->channel[i]);
-
- kfree(efx->vpd_sn);
-
- if (efx->workqueue) {
- destroy_workqueue(efx->workqueue);
- efx->workqueue = NULL;
- }
-}
-
void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
{
u64 n_rx_nodesc_trunc = 0;
@@ -3314,197 +1045,6 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats)
stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
}
-bool efx_filter_spec_equal(const struct efx_filter_spec *left,
- const struct efx_filter_spec *right)
-{
- if ((left->match_flags ^ right->match_flags) |
- ((left->flags ^ right->flags) &
- (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
- return false;
-
- return memcmp(&left->outer_vid, &right->outer_vid,
- sizeof(struct efx_filter_spec) -
- offsetof(struct efx_filter_spec, outer_vid)) == 0;
-}
-
-u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
-{
- BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
- return jhash2((const u32 *)&spec->outer_vid,
- (sizeof(struct efx_filter_spec) -
- offsetof(struct efx_filter_spec, outer_vid)) / 4,
- 0);
-}
-
-#ifdef CONFIG_RFS_ACCEL
-bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
- bool *force)
-{
- if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
- /* ARFS is currently updating this entry, leave it */
- return false;
- }
- if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
- /* ARFS tried and failed to update this, so it's probably out
- * of date. Remove the filter and the ARFS rule entry.
- */
- rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
- *force = true;
- return true;
- } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
- /* ARFS has moved on, so old filter is not needed. Since we did
- * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
- * not be removed by efx_rps_hash_del() subsequently.
- */
- *force = true;
- return true;
- }
- /* Remove it iff ARFS wants to. */
- return true;
-}
-
-static
-struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
- const struct efx_filter_spec *spec)
-{
- u32 hash = efx_filter_spec_hash(spec);
-
- lockdep_assert_held(&efx->rps_hash_lock);
- if (!efx->rps_hash_table)
- return NULL;
- return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
-}
-
-struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
- const struct efx_filter_spec *spec)
-{
- struct efx_arfs_rule *rule;
- struct hlist_head *head;
- struct hlist_node *node;
-
- head = efx_rps_hash_bucket(efx, spec);
- if (!head)
- return NULL;
- hlist_for_each(node, head) {
- rule = container_of(node, struct efx_arfs_rule, node);
- if (efx_filter_spec_equal(spec, &rule->spec))
- return rule;
- }
- return NULL;
-}
-
-struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
- const struct efx_filter_spec *spec,
- bool *new)
-{
- struct efx_arfs_rule *rule;
- struct hlist_head *head;
- struct hlist_node *node;
-
- head = efx_rps_hash_bucket(efx, spec);
- if (!head)
- return NULL;
- hlist_for_each(node, head) {
- rule = container_of(node, struct efx_arfs_rule, node);
- if (efx_filter_spec_equal(spec, &rule->spec)) {
- *new = false;
- return rule;
- }
- }
- rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
- *new = true;
- if (rule) {
- memcpy(&rule->spec, spec, sizeof(rule->spec));
- hlist_add_head(&rule->node, head);
- }
- return rule;
-}
-
-void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
-{
- struct efx_arfs_rule *rule;
- struct hlist_head *head;
- struct hlist_node *node;
-
- head = efx_rps_hash_bucket(efx, spec);
- if (WARN_ON(!head))
- return;
- hlist_for_each(node, head) {
- rule = container_of(node, struct efx_arfs_rule, node);
- if (efx_filter_spec_equal(spec, &rule->spec)) {
- /* Someone already reused the entry. We know that if
- * this check doesn't fire (i.e. filter_id == REMOVING)
- * then the REMOVING mark was put there by our caller,
- * because caller is holding a lock on filter table and
- * only holders of that lock set REMOVING.
- */
- if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
- return;
- hlist_del(node);
- kfree(rule);
- return;
- }
- }
- /* We didn't find it. */
- WARN_ON(1);
-}
-#endif
-
-/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because
- * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
- */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
-{
- struct list_head *head = &efx->rss_context.list;
- struct efx_rss_context *ctx, *new;
- u32 id = 1; /* Don't use zero, that refers to the master RSS context */
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
- /* Search for first gap in the numbering */
- list_for_each_entry(ctx, head, list) {
- if (ctx->user_id != id)
- break;
- id++;
- /* Check for wrap. If this happens, we have nearly 2^32
- * allocated RSS contexts, which seems unlikely.
- */
- if (WARN_ON_ONCE(!id))
- return NULL;
- }
-
- /* Create the new entry */
- new = kmalloc(sizeof(struct efx_rss_context), GFP_KERNEL);
- if (!new)
- return NULL;
- new->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
- new->rx_hash_udp_4tuple = false;
-
- /* Insert the new entry into the gap */
- new->user_id = id;
- list_add_tail(&new->list, &ctx->list);
- return new;
-}
-
-struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
-{
- struct list_head *head = &efx->rss_context.list;
- struct efx_rss_context *ctx;
-
- WARN_ON(!mutex_is_locked(&efx->rss_lock));
-
- list_for_each_entry(ctx, head, list)
- if (ctx->user_id == id)
- return ctx;
- return NULL;
-}
-
-void efx_free_rss_context_entry(struct efx_rss_context *ctx)
-{
- list_del(&ctx->list);
- kfree(ctx);
-}
-
/**************************************************************************
*
* PCI interface
@@ -3520,7 +1060,7 @@ static void efx_pci_remove_main(struct efx_nic *efx)
* are not READY.
*/
BUG_ON(efx->state == STATE_READY);
- cancel_work_sync(&efx->reset_work);
+ efx_flush_reset_workqueue(efx);
efx_disable_interrupts(efx);
efx_clear_interrupt_affinity(efx);
@@ -3560,7 +1100,7 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
efx_pci_remove_main(efx);
- efx_fini_io(efx);
+ efx_fini_io(efx, efx->type->mem_bar(efx));
netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
efx_fini_struct(efx);
@@ -3783,7 +1323,8 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
efx_probe_vpd_strings(efx);
/* Set up basic I/O (BAR mappings etc) */
- rc = efx_init_io(efx);
+ rc = efx_init_io(efx, efx->type->mem_bar(efx), efx->type->max_dma_mask,
+ efx->type->mem_map_size(efx));
if (rc)
goto fail2;
@@ -3827,7 +1368,7 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
return 0;
fail3:
- efx_fini_io(efx);
+ efx_fini_io(efx, efx->type->mem_bar(efx));
fail2:
efx_fini_struct(efx);
fail1:
@@ -3905,7 +1446,7 @@ static int efx_pm_thaw(struct device *dev)
rtnl_unlock();
/* Reschedule any quenched resets scheduled during efx_pm_freeze() */
- queue_work(reset_workqueue, &efx->reset_work);
+ efx_queue_reset_work(efx);
return 0;
@@ -4084,10 +1625,6 @@ static struct pci_driver efx_pci_driver = {
*
*************************************************************************/
-module_param(interrupt_mode, uint, 0444);
-MODULE_PARM_DESC(interrupt_mode,
- "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
-
static int __init efx_init_module(void)
{
int rc;
@@ -4104,11 +1641,9 @@ static int __init efx_init_module(void)
goto err_sriov;
#endif
- reset_workqueue = create_singlethread_workqueue("sfc_reset");
- if (!reset_workqueue) {
- rc = -ENOMEM;
+ rc = efx_create_reset_workqueue();
+ if (rc)
goto err_reset;
- }
rc = pci_register_driver(&efx_pci_driver);
if (rc < 0)
@@ -4117,7 +1652,7 @@ static int __init efx_init_module(void)
return 0;
err_pci:
- destroy_workqueue(reset_workqueue);
+ efx_destroy_reset_workqueue();
err_reset:
#ifdef CONFIG_SFC_SRIOV
efx_fini_sriov();
@@ -4133,7 +1668,7 @@ static void __exit efx_exit_module(void)
printk(KERN_INFO "Solarflare NET driver unloading\n");
pci_unregister_driver(&efx_pci_driver);
- destroy_workqueue(reset_workqueue);
+ efx_destroy_reset_workqueue();
#ifdef CONFIG_SFC_SRIOV
efx_fini_sriov();
#endif
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 2dd8d5002315..f1bdb04efbe4 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -15,31 +15,17 @@ int efx_net_open(struct net_device *net_dev);
int efx_net_stop(struct net_device *net_dev);
/* TX */
-int efx_probe_tx_queue(struct efx_tx_queue *tx_queue);
-void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
-void efx_init_tx_queue(struct efx_tx_queue *tx_queue);
void efx_init_tx_queue_core_txq(struct efx_tx_queue *tx_queue);
-void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
struct net_device *net_dev);
netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
void *type_data);
-unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
extern unsigned int efx_piobuf_size;
extern bool efx_separate_tx_channels;
/* RX */
-void efx_set_default_rx_indir_table(struct efx_nic *efx,
- struct efx_rss_context *ctx);
-void efx_rx_config_page_split(struct efx_nic *efx);
-int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
-void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
-void efx_rx_slow_fill(struct timer_list *t);
void __efx_rx_packet(struct efx_channel *channel);
void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
unsigned int n_frags, unsigned int len, u16 flags);
@@ -48,7 +34,6 @@ static inline void efx_rx_flush_packet(struct efx_channel *channel)
if (channel->rx_pkt_n_frags)
__efx_rx_packet(channel);
}
-void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
#define EFX_MAX_DMAQ_SIZE 4096UL
#define EFX_DEFAULT_DMAQ_SIZE 1024UL
@@ -80,8 +65,6 @@ static inline bool efx_rss_enabled(struct efx_nic *efx)
/* Filters */
-void efx_mac_reconfigure(struct efx_nic *efx);
-
/**
* efx_filter_insert_filter - add or replace a filter
* @efx: NIC in which to insert the filter
@@ -186,58 +169,17 @@ static inline void efx_filter_rfs_expire(struct work_struct *data)
static inline void efx_filter_rfs_expire(struct work_struct *data) {}
#define efx_filter_rfs_enabled() 0
#endif
-bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
-
-bool efx_filter_spec_equal(const struct efx_filter_spec *left,
- const struct efx_filter_spec *right);
-u32 efx_filter_spec_hash(const struct efx_filter_spec *spec);
-
-#ifdef CONFIG_RFS_ACCEL
-bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
- bool *force);
-
-struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
- const struct efx_filter_spec *spec);
-
-/* @new is written to indicate if entry was newly added (true) or if an old
- * entry was found and returned (false).
- */
-struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
- const struct efx_filter_spec *spec,
- bool *new);
-
-void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
-#endif
/* RSS contexts */
-struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
-struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
-void efx_free_rss_context_entry(struct efx_rss_context *ctx);
static inline bool efx_rss_active(struct efx_rss_context *ctx)
{
- return ctx->context_id != EFX_EF10_RSS_CONTEXT_INVALID;
+ return ctx->context_id != EFX_MCDI_RSS_CONTEXT_INVALID;
}
-/* Channels */
-int efx_channel_dummy_op_int(struct efx_channel *channel);
-void efx_channel_dummy_op_void(struct efx_channel *channel);
-int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
-
-/* Ports */
-int efx_reconfigure_port(struct efx_nic *efx);
-int __efx_reconfigure_port(struct efx_nic *efx);
-
/* Ethtool support */
extern const struct ethtool_ops efx_ethtool_ops;
-/* Reset handling */
-int efx_reset(struct efx_nic *efx, enum reset_type method);
-void efx_reset_down(struct efx_nic *efx, enum reset_type method);
-int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok);
-int efx_try_recovery(struct efx_nic *efx);
-
/* Global */
-void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs);
unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks);
int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
@@ -245,8 +187,6 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs,
bool rx_may_override_tx);
void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs,
unsigned int *rx_usecs, bool *rx_adaptive);
-void efx_stop_eventq(struct efx_channel *channel);
-void efx_start_eventq(struct efx_channel *channel);
/* Dummy PHY ops for PHY drivers */
int efx_port_dummy_op_int(struct efx_nic *efx);
@@ -293,9 +233,6 @@ static inline void efx_schedule_channel_irq(struct efx_channel *channel)
efx_schedule_channel(channel);
}
-void efx_link_status_changed(struct efx_nic *efx);
-void efx_link_set_advertising(struct efx_nic *efx,
- const unsigned long *advertising);
void efx_link_clear_advertising(struct efx_nic *efx);
void efx_link_set_wanted_fc(struct efx_nic *efx, u8);
diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
new file mode 100644
index 000000000000..aeb5e8aa2f2a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -0,0 +1,1234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include "efx_channels.h"
+#include "efx.h"
+#include "efx_common.h"
+#include "tx_common.h"
+#include "rx_common.h"
+#include "nic.h"
+#include "sriov.h"
+
+/* This is the first interrupt mode to try out of:
+ * 0 => MSI-X
+ * 1 => MSI
+ * 2 => legacy
+ */
+static unsigned int interrupt_mode;
+module_param(interrupt_mode, uint, 0444);
+MODULE_PARM_DESC(interrupt_mode,
+ "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
+
+/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
+ * i.e. the number of CPUs among which we may distribute simultaneous
+ * interrupt handling.
+ *
+ * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
+ * The default (0) means to assign an interrupt to each core.
+ */
+static unsigned int rss_cpus;
+module_param(rss_cpus, uint, 0444);
+MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
+
+static unsigned int irq_adapt_low_thresh = 8000;
+module_param(irq_adapt_low_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_low_thresh,
+ "Threshold score for reducing IRQ moderation");
+
+static unsigned int irq_adapt_high_thresh = 16000;
+module_param(irq_adapt_high_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_high_thresh,
+ "Threshold score for increasing IRQ moderation");
+
+/* This is the weight assigned to each of the (per-channel) virtual
+ * NAPI devices.
+ */
+static int napi_weight = 64;
+
+/***************
+ * Housekeeping
+ ***************/
+
+int efx_channel_dummy_op_int(struct efx_channel *channel)
+{
+ return 0;
+}
+
+void efx_channel_dummy_op_void(struct efx_channel *channel)
+{
+}
+
+static const struct efx_channel_type efx_default_channel_type = {
+ .pre_probe = efx_channel_dummy_op_int,
+ .post_remove = efx_channel_dummy_op_void,
+ .get_name = efx_get_channel_name,
+ .copy = efx_copy_channel,
+ .want_txqs = efx_default_channel_want_txqs,
+ .keep_eventq = false,
+ .want_pio = true,
+};
+
+/*************
+ * INTERRUPTS
+ *************/
+
+static unsigned int efx_wanted_parallelism(struct efx_nic *efx)
+{
+ cpumask_var_t thread_mask;
+ unsigned int count;
+ int cpu;
+
+ if (rss_cpus) {
+ count = rss_cpus;
+ } else {
+ if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
+ netif_warn(efx, probe, efx->net_dev,
+ "RSS disabled due to allocation failure\n");
+ return 1;
+ }
+
+ count = 0;
+ for_each_online_cpu(cpu) {
+ if (!cpumask_test_cpu(cpu, thread_mask)) {
+ ++count;
+ cpumask_or(thread_mask, thread_mask,
+ topology_sibling_cpumask(cpu));
+ }
+ }
+
+ free_cpumask_var(thread_mask);
+ }
+
+ if (count > EFX_MAX_RX_QUEUES) {
+ netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn,
+ "Reducing number of rx queues from %u to %u.\n",
+ count, EFX_MAX_RX_QUEUES);
+ count = EFX_MAX_RX_QUEUES;
+ }
+
+ /* If RSS is requested for the PF *and* VFs then we can't write RSS
+ * table entries that are inaccessible to VFs
+ */
+#ifdef CONFIG_SFC_SRIOV
+ if (efx->type->sriov_wanted) {
+ if (efx->type->sriov_wanted(efx) && efx_vf_size(efx) > 1 &&
+ count > efx_vf_size(efx)) {
+ netif_warn(efx, probe, efx->net_dev,
+ "Reducing number of RSS channels from %u to %u for "
+ "VF support. Increase vf-msix-limit to use more "
+ "channels on the PF.\n",
+ count, efx_vf_size(efx));
+ count = efx_vf_size(efx);
+ }
+ }
+#endif
+
+ return count;
+}
+
+static int efx_allocate_msix_channels(struct efx_nic *efx,
+ unsigned int max_channels,
+ unsigned int extra_channels,
+ unsigned int parallelism)
+{
+ unsigned int n_channels = parallelism;
+ int vec_count;
+ int n_xdp_tx;
+ int n_xdp_ev;
+
+ if (efx_separate_tx_channels)
+ n_channels *= 2;
+ n_channels += extra_channels;
+
+ /* To allow XDP transmit to happen from arbitrary NAPI contexts
+ * we allocate a TX queue per CPU. We share event queues across
+ * multiple tx queues, assuming tx and ev queues are both
+ * maximum size.
+ */
+
+ n_xdp_tx = num_possible_cpus();
+ n_xdp_ev = DIV_ROUND_UP(n_xdp_tx, EFX_TXQ_TYPES);
+
+ vec_count = pci_msix_vec_count(efx->pci_dev);
+ if (vec_count < 0)
+ return vec_count;
+
+ max_channels = min_t(unsigned int, vec_count, max_channels);
+
+ /* Check resources.
+ * We need a channel per event queue, plus a VI per tx queue.
+ * This may be more pessimistic than it needs to be.
+ */
+ if (n_channels + n_xdp_ev > max_channels) {
+ netif_err(efx, drv, efx->net_dev,
+ "Insufficient resources for %d XDP event queues (%d other channels, max %d)\n",
+ n_xdp_ev, n_channels, max_channels);
+ efx->n_xdp_channels = 0;
+ efx->xdp_tx_per_channel = 0;
+ efx->xdp_tx_queue_count = 0;
+ } else {
+ efx->n_xdp_channels = n_xdp_ev;
+ efx->xdp_tx_per_channel = EFX_TXQ_TYPES;
+ efx->xdp_tx_queue_count = n_xdp_tx;
+ n_channels += n_xdp_ev;
+ netif_dbg(efx, drv, efx->net_dev,
+ "Allocating %d TX and %d event queues for XDP\n",
+ n_xdp_tx, n_xdp_ev);
+ }
+
+ if (vec_count < n_channels) {
+ netif_err(efx, drv, efx->net_dev,
+ "WARNING: Insufficient MSI-X vectors available (%d < %u).\n",
+ vec_count, n_channels);
+ netif_err(efx, drv, efx->net_dev,
+ "WARNING: Performance may be reduced.\n");
+ n_channels = vec_count;
+ }
+
+ n_channels = min(n_channels, max_channels);
+
+ efx->n_channels = n_channels;
+
+ /* Ignore XDP tx channels when creating rx channels. */
+ n_channels -= efx->n_xdp_channels;
+
+ if (efx_separate_tx_channels) {
+ efx->n_tx_channels =
+ min(max(n_channels / 2, 1U),
+ efx->max_tx_channels);
+ efx->tx_channel_offset =
+ n_channels - efx->n_tx_channels;
+ efx->n_rx_channels =
+ max(n_channels -
+ efx->n_tx_channels, 1U);
+ } else {
+ efx->n_tx_channels = min(n_channels, efx->max_tx_channels);
+ efx->tx_channel_offset = 0;
+ efx->n_rx_channels = n_channels;
+ }
+
+ efx->n_rx_channels = min(efx->n_rx_channels, parallelism);
+ efx->n_tx_channels = min(efx->n_tx_channels, parallelism);
+
+ efx->xdp_channel_offset = n_channels;
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "Allocating %u RX channels\n",
+ efx->n_rx_channels);
+
+ return efx->n_channels;
+}
+
+/* Probe the number and type of interrupts we are able to obtain, and
+ * the resulting numbers of channels and RX queues.
+ */
+int efx_probe_interrupts(struct efx_nic *efx)
+{
+ unsigned int extra_channels = 0;
+ unsigned int rss_spread;
+ unsigned int i, j;
+ int rc;
+
+ for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++)
+ if (efx->extra_channel_type[i])
+ ++extra_channels;
+
+ if (efx->interrupt_mode == EFX_INT_MODE_MSIX) {
+ unsigned int parallelism = efx_wanted_parallelism(efx);
+ struct msix_entry xentries[EFX_MAX_CHANNELS];
+ unsigned int n_channels;
+
+ rc = efx_allocate_msix_channels(efx, efx->max_channels,
+ extra_channels, parallelism);
+ if (rc >= 0) {
+ n_channels = rc;
+ for (i = 0; i < n_channels; i++)
+ xentries[i].entry = i;
+ rc = pci_enable_msix_range(efx->pci_dev, xentries, 1,
+ n_channels);
+ }
+ if (rc < 0) {
+ /* Fall back to single channel MSI */
+ netif_err(efx, drv, efx->net_dev,
+ "could not enable MSI-X\n");
+ if (efx->type->min_interrupt_mode >= EFX_INT_MODE_MSI)
+ efx->interrupt_mode = EFX_INT_MODE_MSI;
+ else
+ return rc;
+ } else if (rc < n_channels) {
+ netif_err(efx, drv, efx->net_dev,
+ "WARNING: Insufficient MSI-X vectors"
+ " available (%d < %u).\n", rc, n_channels);
+ netif_err(efx, drv, efx->net_dev,
+ "WARNING: Performance may be reduced.\n");
+ n_channels = rc;
+ }
+
+ if (rc > 0) {
+ for (i = 0; i < efx->n_channels; i++)
+ efx_get_channel(efx, i)->irq =
+ xentries[i].vector;
+ }
+ }
+
+ /* Try single interrupt MSI */
+ if (efx->interrupt_mode == EFX_INT_MODE_MSI) {
+ efx->n_channels = 1;
+ efx->n_rx_channels = 1;
+ efx->n_tx_channels = 1;
+ efx->n_xdp_channels = 0;
+ efx->xdp_channel_offset = efx->n_channels;
+ rc = pci_enable_msi(efx->pci_dev);
+ if (rc == 0) {
+ efx_get_channel(efx, 0)->irq = efx->pci_dev->irq;
+ } else {
+ netif_err(efx, drv, efx->net_dev,
+ "could not enable MSI\n");
+ if (efx->type->min_interrupt_mode >= EFX_INT_MODE_LEGACY)
+ efx->interrupt_mode = EFX_INT_MODE_LEGACY;
+ else
+ return rc;
+ }
+ }
+
+ /* Assume legacy interrupts */
+ if (efx->interrupt_mode == EFX_INT_MODE_LEGACY) {
+ efx->n_channels = 1 + (efx_separate_tx_channels ? 1 : 0);
+ efx->n_rx_channels = 1;
+ efx->n_tx_channels = 1;
+ efx->n_xdp_channels = 0;
+ efx->xdp_channel_offset = efx->n_channels;
+ efx->legacy_irq = efx->pci_dev->irq;
+ }
+
+ /* Assign extra channels if possible, before XDP channels */
+ efx->n_extra_tx_channels = 0;
+ j = efx->xdp_channel_offset;
+ for (i = 0; i < EFX_MAX_EXTRA_CHANNELS; i++) {
+ if (!efx->extra_channel_type[i])
+ continue;
+ if (j <= efx->tx_channel_offset + efx->n_tx_channels) {
+ efx->extra_channel_type[i]->handle_no_channel(efx);
+ } else {
+ --j;
+ efx_get_channel(efx, j)->type =
+ efx->extra_channel_type[i];
+ if (efx_channel_has_tx_queues(efx_get_channel(efx, j)))
+ efx->n_extra_tx_channels++;
+ }
+ }
+
+ rss_spread = efx->n_rx_channels;
+ /* RSS might be usable on VFs even if it is disabled on the PF */
+#ifdef CONFIG_SFC_SRIOV
+ if (efx->type->sriov_wanted) {
+ efx->rss_spread = ((rss_spread > 1 ||
+ !efx->type->sriov_wanted(efx)) ?
+ rss_spread : efx_vf_size(efx));
+ return 0;
+ }
+#endif
+ efx->rss_spread = rss_spread;
+
+ return 0;
+}
+
+#if defined(CONFIG_SMP)
+void efx_set_interrupt_affinity(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ unsigned int cpu;
+
+ efx_for_each_channel(channel, efx) {
+ cpu = cpumask_local_spread(channel->channel,
+ pcibus_to_node(efx->pci_dev->bus));
+ irq_set_affinity_hint(channel->irq, cpumask_of(cpu));
+ }
+}
+
+void efx_clear_interrupt_affinity(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ irq_set_affinity_hint(channel->irq, NULL);
+}
+#else
+void
+efx_set_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+
+void
+efx_clear_interrupt_affinity(struct efx_nic *efx __attribute__ ((unused)))
+{
+}
+#endif /* CONFIG_SMP */
+
+void efx_remove_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ /* Remove MSI/MSI-X interrupts */
+ efx_for_each_channel(channel, efx)
+ channel->irq = 0;
+ pci_disable_msi(efx->pci_dev);
+ pci_disable_msix(efx->pci_dev);
+
+ /* Remove legacy interrupt */
+ efx->legacy_irq = 0;
+}
+
+/***************
+ * EVENT QUEUES
+ ***************/
+
+/* Create event queue
+ * Event queue memory allocations are done only once. If the channel
+ * is reset, the memory buffer will be reused; this guards against
+ * errors during channel reset and also simplifies interrupt handling.
+ */
+int efx_probe_eventq(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ unsigned long entries;
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "chan %d create event queue\n", channel->channel);
+
+ /* Build an event queue with room for one event per tx and rx buffer,
+ * plus some extra for link state events and MCDI completions.
+ */
+ entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
+ EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
+ channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
+
+ return efx_nic_probe_eventq(channel);
+}
+
+/* Prepare channel's event queue */
+int efx_init_eventq(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+ int rc;
+
+ EFX_WARN_ON_PARANOID(channel->eventq_init);
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "chan %d init event queue\n", channel->channel);
+
+ rc = efx_nic_init_eventq(channel);
+ if (rc == 0) {
+ efx->type->push_irq_moderation(channel);
+ channel->eventq_read_ptr = 0;
+ channel->eventq_init = true;
+ }
+ return rc;
+}
+
+/* Enable event queue processing and NAPI */
+void efx_start_eventq(struct efx_channel *channel)
+{
+ netif_dbg(channel->efx, ifup, channel->efx->net_dev,
+ "chan %d start event queue\n", channel->channel);
+
+ /* Make sure the NAPI handler sees the enabled flag set */
+ channel->enabled = true;
+ smp_wmb();
+
+ napi_enable(&channel->napi_str);
+ efx_nic_eventq_read_ack(channel);
+}
+
+/* Disable event queue processing and NAPI */
+void efx_stop_eventq(struct efx_channel *channel)
+{
+ if (!channel->enabled)
+ return;
+
+ napi_disable(&channel->napi_str);
+ channel->enabled = false;
+}
+
+void efx_fini_eventq(struct efx_channel *channel)
+{
+ if (!channel->eventq_init)
+ return;
+
+ netif_dbg(channel->efx, drv, channel->efx->net_dev,
+ "chan %d fini event queue\n", channel->channel);
+
+ efx_nic_fini_eventq(channel);
+ channel->eventq_init = false;
+}
+
+void efx_remove_eventq(struct efx_channel *channel)
+{
+ netif_dbg(channel->efx, drv, channel->efx->net_dev,
+ "chan %d remove event queue\n", channel->channel);
+
+ efx_nic_remove_eventq(channel);
+}
+
+/**************************************************************************
+ *
+ * Channel handling
+ *
+ *************************************************************************/
+
+/* Allocate and initialise a channel structure. */
+struct efx_channel *
+efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel)
+{
+ struct efx_rx_queue *rx_queue;
+ struct efx_tx_queue *tx_queue;
+ struct efx_channel *channel;
+ int j;
+
+ channel = kzalloc(sizeof(*channel), GFP_KERNEL);
+ if (!channel)
+ return NULL;
+
+ channel->efx = efx;
+ channel->channel = i;
+ channel->type = &efx_default_channel_type;
+
+ for (j = 0; j < EFX_TXQ_TYPES; j++) {
+ tx_queue = &channel->tx_queue[j];
+ tx_queue->efx = efx;
+ tx_queue->queue = i * EFX_TXQ_TYPES + j;
+ tx_queue->channel = channel;
+ }
+
+#ifdef CONFIG_RFS_ACCEL
+ INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
+ rx_queue = &channel->rx_queue;
+ rx_queue->efx = efx;
+ timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+
+ return channel;
+}
+
+int efx_init_channels(struct efx_nic *efx)
+{
+ unsigned int i;
+
+ for (i = 0; i < EFX_MAX_CHANNELS; i++) {
+ efx->channel[i] = efx_alloc_channel(efx, i, NULL);
+ if (!efx->channel[i])
+ return -ENOMEM;
+ efx->msi_context[i].efx = efx;
+ efx->msi_context[i].index = i;
+ }
+
+ /* Higher numbered interrupt modes are less capable! */
+ if (WARN_ON_ONCE(efx->type->max_interrupt_mode >
+ efx->type->min_interrupt_mode)) {
+ return -EIO;
+ }
+ efx->interrupt_mode = max(efx->type->max_interrupt_mode,
+ interrupt_mode);
+ efx->interrupt_mode = min(efx->type->min_interrupt_mode,
+ interrupt_mode);
+
+ return 0;
+}
+
+void efx_fini_channels(struct efx_nic *efx)
+{
+ unsigned int i;
+
+ for (i = 0; i < EFX_MAX_CHANNELS; i++)
+ if (efx->channel[i]) {
+ kfree(efx->channel[i]);
+ efx->channel[i] = NULL;
+ }
+}
+
+/* Allocate and initialise a channel structure, copying parameters
+ * (but not resources) from an old channel structure.
+ */
+struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel)
+{
+ struct efx_rx_queue *rx_queue;
+ struct efx_tx_queue *tx_queue;
+ struct efx_channel *channel;
+ int j;
+
+ channel = kmalloc(sizeof(*channel), GFP_KERNEL);
+ if (!channel)
+ return NULL;
+
+ *channel = *old_channel;
+
+ channel->napi_dev = NULL;
+ INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
+ channel->napi_str.napi_id = 0;
+ channel->napi_str.state = 0;
+ memset(&channel->eventq, 0, sizeof(channel->eventq));
+
+ for (j = 0; j < EFX_TXQ_TYPES; j++) {
+ tx_queue = &channel->tx_queue[j];
+ if (tx_queue->channel)
+ tx_queue->channel = channel;
+ tx_queue->buffer = NULL;
+ memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
+ }
+
+ rx_queue = &channel->rx_queue;
+ rx_queue->buffer = NULL;
+ memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
+ timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0);
+#ifdef CONFIG_RFS_ACCEL
+ INIT_DELAYED_WORK(&channel->filter_work, efx_filter_rfs_expire);
+#endif
+
+ return channel;
+}
+
+static int efx_probe_channel(struct efx_channel *channel)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ int rc;
+
+ netif_dbg(channel->efx, probe, channel->efx->net_dev,
+ "creating channel %d\n", channel->channel);
+
+ rc = channel->type->pre_probe(channel);
+ if (rc)
+ goto fail;
+
+ rc = efx_probe_eventq(channel);
+ if (rc)
+ goto fail;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ rc = efx_probe_tx_queue(tx_queue);
+ if (rc)
+ goto fail;
+ }
+
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ rc = efx_probe_rx_queue(rx_queue);
+ if (rc)
+ goto fail;
+ }
+
+ channel->rx_list = NULL;
+
+ return 0;
+
+fail:
+ efx_remove_channel(channel);
+ return rc;
+}
+
+void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len)
+{
+ struct efx_nic *efx = channel->efx;
+ const char *type;
+ int number;
+
+ number = channel->channel;
+
+ if (number >= efx->xdp_channel_offset &&
+ !WARN_ON_ONCE(!efx->n_xdp_channels)) {
+ type = "-xdp";
+ number -= efx->xdp_channel_offset;
+ } else if (efx->tx_channel_offset == 0) {
+ type = "";
+ } else if (number < efx->tx_channel_offset) {
+ type = "-rx";
+ } else {
+ type = "-tx";
+ number -= efx->tx_channel_offset;
+ }
+ snprintf(buf, len, "%s%s-%d", efx->name, type, number);
+}
+
+void efx_set_channel_names(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ channel->type->get_name(channel,
+ efx->msi_context[channel->channel].name,
+ sizeof(efx->msi_context[0].name));
+}
+
+int efx_probe_channels(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ int rc;
+
+ /* Restart special buffer allocation */
+ efx->next_buffer_table = 0;
+
+ /* Probe channels in reverse, so that any 'extra' channels
+ * use the start of the buffer table. This allows the traffic
+ * channels to be resized without moving them or wasting the
+ * entries before them.
+ */
+ efx_for_each_channel_rev(channel, efx) {
+ rc = efx_probe_channel(channel);
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "failed to create channel %d\n",
+ channel->channel);
+ goto fail;
+ }
+ }
+ efx_set_channel_names(efx);
+
+ return 0;
+
+fail:
+ efx_remove_channels(efx);
+ return rc;
+}
+
+void efx_remove_channel(struct efx_channel *channel)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+
+ netif_dbg(channel->efx, drv, channel->efx->net_dev,
+ "destroy chan %d\n", channel->channel);
+
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_remove_rx_queue(rx_queue);
+ efx_for_each_possible_channel_tx_queue(tx_queue, channel)
+ efx_remove_tx_queue(tx_queue);
+ efx_remove_eventq(channel);
+ channel->type->post_remove(channel);
+}
+
+void efx_remove_channels(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ efx_remove_channel(channel);
+
+ kfree(efx->xdp_tx_queues);
+}
+
+int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries)
+{
+ struct efx_channel *other_channel[EFX_MAX_CHANNELS], *channel;
+ unsigned int i, next_buffer_table = 0;
+ u32 old_rxq_entries, old_txq_entries;
+ int rc, rc2;
+
+ rc = efx_check_disabled(efx);
+ if (rc)
+ return rc;
+
+ /* Not all channels should be reallocated. We must avoid
+ * reallocating their buffer table entries.
+ */
+ efx_for_each_channel(channel, efx) {
+ struct efx_rx_queue *rx_queue;
+ struct efx_tx_queue *tx_queue;
+
+ if (channel->type->copy)
+ continue;
+ next_buffer_table = max(next_buffer_table,
+ channel->eventq.index +
+ channel->eventq.entries);
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ next_buffer_table = max(next_buffer_table,
+ rx_queue->rxd.index +
+ rx_queue->rxd.entries);
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ next_buffer_table = max(next_buffer_table,
+ tx_queue->txd.index +
+ tx_queue->txd.entries);
+ }
+
+ efx_device_detach_sync(efx);
+ efx_stop_all(efx);
+ efx_soft_disable_interrupts(efx);
+
+ /* Clone channels (where possible) */
+ memset(other_channel, 0, sizeof(other_channel));
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = efx->channel[i];
+ if (channel->type->copy)
+ channel = channel->type->copy(channel);
+ if (!channel) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ other_channel[i] = channel;
+ }
+
+ /* Swap entry counts and channel pointers */
+ old_rxq_entries = efx->rxq_entries;
+ old_txq_entries = efx->txq_entries;
+ efx->rxq_entries = rxq_entries;
+ efx->txq_entries = txq_entries;
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = efx->channel[i];
+ efx->channel[i] = other_channel[i];
+ other_channel[i] = channel;
+ }
+
+ /* Restart buffer table allocation */
+ efx->next_buffer_table = next_buffer_table;
+
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = efx->channel[i];
+ if (!channel->type->copy)
+ continue;
+ rc = efx_probe_channel(channel);
+ if (rc)
+ goto rollback;
+ efx_init_napi_channel(efx->channel[i]);
+ }
+
+out:
+ /* Destroy unused channel structures */
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = other_channel[i];
+ if (channel && channel->type->copy) {
+ efx_fini_napi_channel(channel);
+ efx_remove_channel(channel);
+ kfree(channel);
+ }
+ }
+
+ rc2 = efx_soft_enable_interrupts(efx);
+ if (rc2) {
+ rc = rc ? rc : rc2;
+ netif_err(efx, drv, efx->net_dev,
+ "unable to restart interrupts on channel reallocation\n");
+ efx_schedule_reset(efx, RESET_TYPE_DISABLE);
+ } else {
+ efx_start_all(efx);
+ efx_device_attach_if_not_resetting(efx);
+ }
+ return rc;
+
+rollback:
+ /* Swap back */
+ efx->rxq_entries = old_rxq_entries;
+ efx->txq_entries = old_txq_entries;
+ for (i = 0; i < efx->n_channels; i++) {
+ channel = efx->channel[i];
+ efx->channel[i] = other_channel[i];
+ other_channel[i] = channel;
+ }
+ goto out;
+}
+
+int efx_set_channels(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+ int xdp_queue_number;
+
+ efx->tx_channel_offset =
+ efx_separate_tx_channels ?
+ efx->n_channels - efx->n_tx_channels : 0;
+
+ if (efx->xdp_tx_queue_count) {
+ EFX_WARN_ON_PARANOID(efx->xdp_tx_queues);
+
+ /* Allocate array for XDP TX queue lookup. */
+ efx->xdp_tx_queues = kcalloc(efx->xdp_tx_queue_count,
+ sizeof(*efx->xdp_tx_queues),
+ GFP_KERNEL);
+ if (!efx->xdp_tx_queues)
+ return -ENOMEM;
+ }
+
+ /* We need to mark which channels really have RX and TX
+ * queues, and adjust the TX queue numbers if we have separate
+ * RX-only and TX-only channels.
+ */
+ xdp_queue_number = 0;
+ efx_for_each_channel(channel, efx) {
+ if (channel->channel < efx->n_rx_channels)
+ channel->rx_queue.core_index = channel->channel;
+ else
+ channel->rx_queue.core_index = -1;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ tx_queue->queue -= (efx->tx_channel_offset *
+ EFX_TXQ_TYPES);
+
+ if (efx_channel_is_xdp_tx(channel) &&
+ xdp_queue_number < efx->xdp_tx_queue_count) {
+ efx->xdp_tx_queues[xdp_queue_number] = tx_queue;
+ xdp_queue_number++;
+ }
+ }
+ }
+ return 0;
+}
+
+bool efx_default_channel_want_txqs(struct efx_channel *channel)
+{
+ return channel->channel - channel->efx->tx_channel_offset <
+ channel->efx->n_tx_channels;
+}
+
+/*************
+ * START/STOP
+ *************/
+
+int efx_soft_enable_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel, *end_channel;
+ int rc;
+
+ BUG_ON(efx->state == STATE_DISABLED);
+
+ efx->irq_soft_enabled = true;
+ smp_wmb();
+
+ efx_for_each_channel(channel, efx) {
+ if (!channel->type->keep_eventq) {
+ rc = efx_init_eventq(channel);
+ if (rc)
+ goto fail;
+ }
+ efx_start_eventq(channel);
+ }
+
+ efx_mcdi_mode_event(efx);
+
+ return 0;
+fail:
+ end_channel = channel;
+ efx_for_each_channel(channel, efx) {
+ if (channel == end_channel)
+ break;
+ efx_stop_eventq(channel);
+ if (!channel->type->keep_eventq)
+ efx_fini_eventq(channel);
+ }
+
+ return rc;
+}
+
+void efx_soft_disable_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ if (efx->state == STATE_DISABLED)
+ return;
+
+ efx_mcdi_mode_poll(efx);
+
+ efx->irq_soft_enabled = false;
+ smp_wmb();
+
+ if (efx->legacy_irq)
+ synchronize_irq(efx->legacy_irq);
+
+ efx_for_each_channel(channel, efx) {
+ if (channel->irq)
+ synchronize_irq(channel->irq);
+
+ efx_stop_eventq(channel);
+ if (!channel->type->keep_eventq)
+ efx_fini_eventq(channel);
+ }
+
+ /* Flush the asynchronous MCDI request queue */
+ efx_mcdi_flush_async(efx);
+}
+
+int efx_enable_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel, *end_channel;
+ int rc;
+
+ /* TODO: Is this really a bug? */
+ BUG_ON(efx->state == STATE_DISABLED);
+
+ if (efx->eeh_disabled_legacy_irq) {
+ enable_irq(efx->legacy_irq);
+ efx->eeh_disabled_legacy_irq = false;
+ }
+
+ efx->type->irq_enable_master(efx);
+
+ efx_for_each_channel(channel, efx) {
+ if (channel->type->keep_eventq) {
+ rc = efx_init_eventq(channel);
+ if (rc)
+ goto fail;
+ }
+ }
+
+ rc = efx_soft_enable_interrupts(efx);
+ if (rc)
+ goto fail;
+
+ return 0;
+
+fail:
+ end_channel = channel;
+ efx_for_each_channel(channel, efx) {
+ if (channel == end_channel)
+ break;
+ if (channel->type->keep_eventq)
+ efx_fini_eventq(channel);
+ }
+
+ efx->type->irq_disable_non_ev(efx);
+
+ return rc;
+}
+
+void efx_disable_interrupts(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_soft_disable_interrupts(efx);
+
+ efx_for_each_channel(channel, efx) {
+ if (channel->type->keep_eventq)
+ efx_fini_eventq(channel);
+ }
+
+ efx->type->irq_disable_non_ev(efx);
+}
+
+void efx_start_channels(struct efx_nic *efx)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ efx_init_tx_queue(tx_queue);
+ atomic_inc(&efx->active_queues);
+ }
+
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ efx_init_rx_queue(rx_queue);
+ atomic_inc(&efx->active_queues);
+ efx_stop_eventq(channel);
+ efx_fast_push_rx_descriptors(rx_queue, false);
+ efx_start_eventq(channel);
+ }
+
+ WARN_ON(channel->rx_pkt_n_frags);
+ }
+}
+
+void efx_stop_channels(struct efx_nic *efx)
+{
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ struct efx_channel *channel;
+ int rc = 0;
+
+ /* Stop RX refill */
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ rx_queue->refill_enabled = false;
+ }
+
+ efx_for_each_channel(channel, efx) {
+ /* RX packet processing is pipelined, so wait for the
+ * NAPI handler to complete. At least event queue 0
+ * might be kept active by non-data events, so don't
+ * use napi_synchronize() but actually disable NAPI
+ * temporarily.
+ */
+ if (efx_channel_has_rx_queue(channel)) {
+ efx_stop_eventq(channel);
+ efx_start_eventq(channel);
+ }
+ }
+
+ if (efx->type->fini_dmaq)
+ rc = efx->type->fini_dmaq(efx);
+
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
+ } else {
+ netif_dbg(efx, drv, efx->net_dev,
+ "successfully flushed all queues\n");
+ }
+
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_rx_queue(rx_queue, channel)
+ efx_fini_rx_queue(rx_queue);
+ efx_for_each_possible_channel_tx_queue(tx_queue, channel)
+ efx_fini_tx_queue(tx_queue);
+ }
+}
+
+/**************************************************************************
+ *
+ * NAPI interface
+ *
+ *************************************************************************/
+
+/* Process channel's event queue
+ *
+ * This function is responsible for processing the event queue of a
+ * single channel. The caller must guarantee that this function will
+ * never be concurrently called more than once on the same channel,
+ * though different channels may be being processed concurrently.
+ */
+static int efx_process_channel(struct efx_channel *channel, int budget)
+{
+ struct efx_tx_queue *tx_queue;
+ struct list_head rx_list;
+ int spent;
+
+ if (unlikely(!channel->enabled))
+ return 0;
+
+ /* Prepare the batch receive list */
+ EFX_WARN_ON_PARANOID(channel->rx_list != NULL);
+ INIT_LIST_HEAD(&rx_list);
+ channel->rx_list = &rx_list;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ tx_queue->pkts_compl = 0;
+ tx_queue->bytes_compl = 0;
+ }
+
+ spent = efx_nic_process_eventq(channel, budget);
+ if (spent && efx_channel_has_rx_queue(channel)) {
+ struct efx_rx_queue *rx_queue =
+ efx_channel_get_rx_queue(channel);
+
+ efx_rx_flush_packet(channel);
+ efx_fast_push_rx_descriptors(rx_queue, true);
+ }
+
+ /* Update BQL */
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ if (tx_queue->bytes_compl) {
+ netdev_tx_completed_queue(tx_queue->core_txq,
+ tx_queue->pkts_compl,
+ tx_queue->bytes_compl);
+ }
+ }
+
+ /* Receive any packets we queued up */
+ netif_receive_skb_list(channel->rx_list);
+ channel->rx_list = NULL;
+
+ return spent;
+}
+
+static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel)
+{
+ int step = efx->irq_mod_step_us;
+
+ if (channel->irq_mod_score < irq_adapt_low_thresh) {
+ if (channel->irq_moderation_us > step) {
+ channel->irq_moderation_us -= step;
+ efx->type->push_irq_moderation(channel);
+ }
+ } else if (channel->irq_mod_score > irq_adapt_high_thresh) {
+ if (channel->irq_moderation_us <
+ efx->irq_rx_moderation_us) {
+ channel->irq_moderation_us += step;
+ efx->type->push_irq_moderation(channel);
+ }
+ }
+
+ channel->irq_count = 0;
+ channel->irq_mod_score = 0;
+}
+
+/* NAPI poll handler
+ *
+ * NAPI guarantees serialisation of polls of the same device, which
+ * provides the guarantee required by efx_process_channel().
+ */
+static int efx_poll(struct napi_struct *napi, int budget)
+{
+ struct efx_channel *channel =
+ container_of(napi, struct efx_channel, napi_str);
+ struct efx_nic *efx = channel->efx;
+ int spent;
+
+ netif_vdbg(efx, intr, efx->net_dev,
+ "channel %d NAPI poll executing on CPU %d\n",
+ channel->channel, raw_smp_processor_id());
+
+ spent = efx_process_channel(channel, budget);
+
+ xdp_do_flush_map();
+
+ if (spent < budget) {
+ if (efx_channel_has_rx_queue(channel) &&
+ efx->irq_rx_adaptive &&
+ unlikely(++channel->irq_count == 1000)) {
+ efx_update_irq_mod(efx, channel);
+ }
+
+#ifdef CONFIG_RFS_ACCEL
+ /* Perhaps expire some ARFS filters */
+ mod_delayed_work(system_wq, &channel->filter_work, 0);
+#endif
+
+ /* There is no race here; although napi_disable() will
+ * only wait for napi_complete(), this isn't a problem
+ * since efx_nic_eventq_read_ack() will have no effect if
+ * interrupts have already been disabled.
+ */
+ if (napi_complete_done(napi, spent))
+ efx_nic_eventq_read_ack(channel);
+ }
+
+ return spent;
+}
+
+void efx_init_napi_channel(struct efx_channel *channel)
+{
+ struct efx_nic *efx = channel->efx;
+
+ channel->napi_dev = efx->net_dev;
+ netif_napi_add(channel->napi_dev, &channel->napi_str,
+ efx_poll, napi_weight);
+}
+
+void efx_init_napi(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ efx_init_napi_channel(channel);
+}
+
+void efx_fini_napi_channel(struct efx_channel *channel)
+{
+ if (channel->napi_dev)
+ netif_napi_del(&channel->napi_str);
+
+ channel->napi_dev = NULL;
+}
+
+void efx_fini_napi(struct efx_nic *efx)
+{
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx)
+ efx_fini_napi_channel(channel);
+}
diff --git a/drivers/net/ethernet/sfc/efx_channels.h b/drivers/net/ethernet/sfc/efx_channels.h
new file mode 100644
index 000000000000..8d7b8c4142d7
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_channels.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_CHANNELS_H
+#define EFX_CHANNELS_H
+
+int efx_probe_interrupts(struct efx_nic *efx);
+void efx_remove_interrupts(struct efx_nic *efx);
+int efx_soft_enable_interrupts(struct efx_nic *efx);
+void efx_soft_disable_interrupts(struct efx_nic *efx);
+int efx_enable_interrupts(struct efx_nic *efx);
+void efx_disable_interrupts(struct efx_nic *efx);
+
+void efx_set_interrupt_affinity(struct efx_nic *efx);
+void efx_clear_interrupt_affinity(struct efx_nic *efx);
+
+int efx_probe_eventq(struct efx_channel *channel);
+int efx_init_eventq(struct efx_channel *channel);
+void efx_start_eventq(struct efx_channel *channel);
+void efx_stop_eventq(struct efx_channel *channel);
+void efx_fini_eventq(struct efx_channel *channel);
+void efx_remove_eventq(struct efx_channel *channel);
+
+struct efx_channel *
+efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel);
+int efx_realloc_channels(struct efx_nic *efx, u32 rxq_entries, u32 txq_entries);
+void efx_get_channel_name(struct efx_channel *channel, char *buf, size_t len);
+void efx_set_channel_names(struct efx_nic *efx);
+int efx_init_channels(struct efx_nic *efx);
+int efx_probe_channels(struct efx_nic *efx);
+int efx_set_channels(struct efx_nic *efx);
+bool efx_default_channel_want_txqs(struct efx_channel *channel);
+void efx_remove_channel(struct efx_channel *channel);
+void efx_remove_channels(struct efx_nic *efx);
+void efx_fini_channels(struct efx_nic *efx);
+struct efx_channel *efx_copy_channel(const struct efx_channel *old_channel);
+void efx_start_channels(struct efx_nic *efx);
+void efx_stop_channels(struct efx_nic *efx);
+
+void efx_init_napi_channel(struct efx_channel *channel);
+void efx_init_napi(struct efx_nic *efx);
+void efx_fini_napi_channel(struct efx_channel *channel);
+void efx_fini_napi(struct efx_nic *efx);
+
+int efx_channel_dummy_op_int(struct efx_channel *channel);
+void efx_channel_dummy_op_void(struct efx_channel *channel);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c
new file mode 100644
index 000000000000..ab0ce62f81c1
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_common.c
@@ -0,0 +1,1102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include "efx_common.h"
+#include "efx_channels.h"
+#include "efx.h"
+#include "mcdi.h"
+#include "selftest.h"
+#include "rx_common.h"
+#include "tx_common.h"
+#include "nic.h"
+#include "io.h"
+#include "mcdi_pcol.h"
+
+static unsigned int debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
+ NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
+ NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
+ NETIF_MSG_TX_ERR | NETIF_MSG_HW);
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
+
+/* This is the time (in jiffies) between invocations of the hardware
+ * monitor.
+ * On Falcon-based NICs, this will:
+ * - Check the on-board hardware monitor;
+ * - Poll the link state and reconfigure the hardware as necessary.
+ * On Siena-based NICs for power systems with EEH support, this will give EEH a
+ * chance to start.
+ */
+static unsigned int efx_monitor_interval = 1 * HZ;
+
+/* How often and how many times to poll for a reset while waiting for a
+ * BIST that another function started to complete.
+ */
+#define BIST_WAIT_DELAY_MS 100
+#define BIST_WAIT_DELAY_COUNT 100
+
+/* Default stats update time */
+#define STATS_PERIOD_MS_DEFAULT 1000
+
+const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
+const char *const efx_reset_type_names[] = {
+ [RESET_TYPE_INVISIBLE] = "INVISIBLE",
+ [RESET_TYPE_ALL] = "ALL",
+ [RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
+ [RESET_TYPE_WORLD] = "WORLD",
+ [RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
+ [RESET_TYPE_DATAPATH] = "DATAPATH",
+ [RESET_TYPE_MC_BIST] = "MC_BIST",
+ [RESET_TYPE_DISABLE] = "DISABLE",
+ [RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
+ [RESET_TYPE_INT_ERROR] = "INT_ERROR",
+ [RESET_TYPE_DMA_ERROR] = "DMA_ERROR",
+ [RESET_TYPE_TX_SKIP] = "TX_SKIP",
+ [RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
+ [RESET_TYPE_MCDI_TIMEOUT] = "MCDI_TIMEOUT (FLR)",
+};
+
+#define RESET_TYPE(type) \
+ STRING_TABLE_LOOKUP(type, efx_reset_type)
+
+/* Loopback mode names (see LOOPBACK_MODE()) */
+const unsigned int efx_loopback_mode_max = LOOPBACK_MAX;
+const char *const efx_loopback_mode_names[] = {
+ [LOOPBACK_NONE] = "NONE",
+ [LOOPBACK_DATA] = "DATAPATH",
+ [LOOPBACK_GMAC] = "GMAC",
+ [LOOPBACK_XGMII] = "XGMII",
+ [LOOPBACK_XGXS] = "XGXS",
+ [LOOPBACK_XAUI] = "XAUI",
+ [LOOPBACK_GMII] = "GMII",
+ [LOOPBACK_SGMII] = "SGMII",
+ [LOOPBACK_XGBR] = "XGBR",
+ [LOOPBACK_XFI] = "XFI",
+ [LOOPBACK_XAUI_FAR] = "XAUI_FAR",
+ [LOOPBACK_GMII_FAR] = "GMII_FAR",
+ [LOOPBACK_SGMII_FAR] = "SGMII_FAR",
+ [LOOPBACK_XFI_FAR] = "XFI_FAR",
+ [LOOPBACK_GPHY] = "GPHY",
+ [LOOPBACK_PHYXS] = "PHYXS",
+ [LOOPBACK_PCS] = "PCS",
+ [LOOPBACK_PMAPMD] = "PMA/PMD",
+ [LOOPBACK_XPORT] = "XPORT",
+ [LOOPBACK_XGMII_WS] = "XGMII_WS",
+ [LOOPBACK_XAUI_WS] = "XAUI_WS",
+ [LOOPBACK_XAUI_WS_FAR] = "XAUI_WS_FAR",
+ [LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
+ [LOOPBACK_GMII_WS] = "GMII_WS",
+ [LOOPBACK_XFI_WS] = "XFI_WS",
+ [LOOPBACK_XFI_WS_FAR] = "XFI_WS_FAR",
+ [LOOPBACK_PHYXS_WS] = "PHYXS_WS",
+};
+
+/* Reset workqueue. If any NIC has a hardware failure then a reset will be
+ * queued onto this work queue. This is not a per-nic work queue, because
+ * efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
+ */
+static struct workqueue_struct *reset_workqueue;
+
+int efx_create_reset_workqueue(void)
+{
+ reset_workqueue = create_singlethread_workqueue("sfc_reset");
+ if (!reset_workqueue) {
+ printk(KERN_ERR "Failed to create reset workqueue\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void efx_queue_reset_work(struct efx_nic *efx)
+{
+ queue_work(reset_workqueue, &efx->reset_work);
+}
+
+void efx_flush_reset_workqueue(struct efx_nic *efx)
+{
+ cancel_work_sync(&efx->reset_work);
+}
+
+void efx_destroy_reset_workqueue(void)
+{
+ if (reset_workqueue) {
+ destroy_workqueue(reset_workqueue);
+ reset_workqueue = NULL;
+ }
+}
+
+/* We assume that efx->type->reconfigure_mac will always try to sync RX
+ * filters and therefore needs to read-lock the filter table against freeing
+ */
+void efx_mac_reconfigure(struct efx_nic *efx)
+{
+ if (efx->type->reconfigure_mac) {
+ down_read(&efx->filter_sem);
+ efx->type->reconfigure_mac(efx);
+ up_read(&efx->filter_sem);
+ }
+}
+
+/* Asynchronous work item for changing MAC promiscuity and multicast
+ * hash. Avoid a drain/rx_ingress enable by reconfiguring the current
+ * MAC directly.
+ */
+static void efx_mac_work(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic, mac_work);
+
+ mutex_lock(&efx->mac_lock);
+ if (efx->port_enabled)
+ efx_mac_reconfigure(efx);
+ mutex_unlock(&efx->mac_lock);
+}
+
+/* This ensures that the kernel is kept informed (via
+ * netif_carrier_on/off) of the link status, and also maintains the
+ * link status's stop on the port's TX queue.
+ */
+void efx_link_status_changed(struct efx_nic *efx)
+{
+ struct efx_link_state *link_state = &efx->link_state;
+
+ /* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
+ * that no events are triggered between unregister_netdev() and the
+ * driver unloading. A more general condition is that NETDEV_CHANGE
+ * can only be generated between NETDEV_UP and NETDEV_DOWN
+ */
+ if (!netif_running(efx->net_dev))
+ return;
+
+ if (link_state->up != netif_carrier_ok(efx->net_dev)) {
+ efx->n_link_state_changes++;
+
+ if (link_state->up)
+ netif_carrier_on(efx->net_dev);
+ else
+ netif_carrier_off(efx->net_dev);
+ }
+
+ /* Status message for kernel log */
+ if (link_state->up)
+ netif_info(efx, link, efx->net_dev,
+ "link up at %uMbps %s-duplex (MTU %d)\n",
+ link_state->speed, link_state->fd ? "full" : "half",
+ efx->net_dev->mtu);
+ else
+ netif_info(efx, link, efx->net_dev, "link down\n");
+}
+
+unsigned int efx_xdp_max_mtu(struct efx_nic *efx)
+{
+ /* The maximum MTU that we can fit in a single page, allowing for
+ * framing, overhead and XDP headroom.
+ */
+ int overhead = EFX_MAX_FRAME_LEN(0) + sizeof(struct efx_rx_page_state) +
+ efx->rx_prefix_size + efx->type->rx_buffer_padding +
+ efx->rx_ip_align + XDP_PACKET_HEADROOM;
+
+ return PAGE_SIZE - overhead;
+}
+
+/* Context: process, rtnl_lock() held. */
+int efx_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ int rc;
+
+ rc = efx_check_disabled(efx);
+ if (rc)
+ return rc;
+
+ if (rtnl_dereference(efx->xdp_prog) &&
+ new_mtu > efx_xdp_max_mtu(efx)) {
+ netif_err(efx, drv, efx->net_dev,
+ "Requested MTU of %d too big for XDP (max: %d)\n",
+ new_mtu, efx_xdp_max_mtu(efx));
+ return -EINVAL;
+ }
+
+ netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
+
+ efx_device_detach_sync(efx);
+ efx_stop_all(efx);
+
+ mutex_lock(&efx->mac_lock);
+ net_dev->mtu = new_mtu;
+ efx_mac_reconfigure(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ efx_start_all(efx);
+ efx_device_attach_if_not_resetting(efx);
+ return 0;
+}
+
+/**************************************************************************
+ *
+ * Hardware monitor
+ *
+ **************************************************************************/
+
+/* Run periodically off the general workqueue */
+static void efx_monitor(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic,
+ monitor_work.work);
+
+ netif_vdbg(efx, timer, efx->net_dev,
+ "hardware monitor executing on CPU %d\n",
+ raw_smp_processor_id());
+ BUG_ON(efx->type->monitor == NULL);
+
+ /* If the mac_lock is already held then it is likely a port
+ * reconfiguration is already in place, which will likely do
+ * most of the work of monitor() anyway.
+ */
+ if (mutex_trylock(&efx->mac_lock)) {
+ if (efx->port_enabled && efx->type->monitor)
+ efx->type->monitor(efx);
+ mutex_unlock(&efx->mac_lock);
+ }
+
+ efx_start_monitor(efx);
+}
+
+void efx_start_monitor(struct efx_nic *efx)
+{
+ if (efx->type->monitor)
+ queue_delayed_work(efx->workqueue, &efx->monitor_work,
+ efx_monitor_interval);
+}
+
+/**************************************************************************
+ *
+ * Event queue processing
+ *
+ *************************************************************************/
+
+/* Channels are shutdown and reinitialised whilst the NIC is running
+ * to propagate configuration changes (mtu, checksum offload), or
+ * to clear hardware error conditions
+ */
+static void efx_start_datapath(struct efx_nic *efx)
+{
+ netdev_features_t old_features = efx->net_dev->features;
+ bool old_rx_scatter = efx->rx_scatter;
+ size_t rx_buf_len;
+
+ /* Calculate the rx buffer allocation parameters required to
+ * support the current MTU, including padding for header
+ * alignment and overruns.
+ */
+ efx->rx_dma_len = (efx->rx_prefix_size +
+ EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
+ efx->type->rx_buffer_padding);
+ rx_buf_len = (sizeof(struct efx_rx_page_state) + XDP_PACKET_HEADROOM +
+ efx->rx_ip_align + efx->rx_dma_len);
+ if (rx_buf_len <= PAGE_SIZE) {
+ efx->rx_scatter = efx->type->always_rx_scatter;
+ efx->rx_buffer_order = 0;
+ } else if (efx->type->can_rx_scatter) {
+ BUILD_BUG_ON(EFX_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
+ BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
+ 2 * ALIGN(NET_IP_ALIGN + EFX_RX_USR_BUF_SIZE,
+ EFX_RX_BUF_ALIGNMENT) >
+ PAGE_SIZE);
+ efx->rx_scatter = true;
+ efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
+ efx->rx_buffer_order = 0;
+ } else {
+ efx->rx_scatter = false;
+ efx->rx_buffer_order = get_order(rx_buf_len);
+ }
+
+ efx_rx_config_page_split(efx);
+ if (efx->rx_buffer_order)
+ netif_dbg(efx, drv, efx->net_dev,
+ "RX buf len=%u; page order=%u batch=%u\n",
+ efx->rx_dma_len, efx->rx_buffer_order,
+ efx->rx_pages_per_batch);
+ else
+ netif_dbg(efx, drv, efx->net_dev,
+ "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
+ efx->rx_dma_len, efx->rx_page_buf_step,
+ efx->rx_bufs_per_page, efx->rx_pages_per_batch);
+
+ /* Restore previously fixed features in hw_features and remove
+ * features which are fixed now
+ */
+ efx->net_dev->hw_features |= efx->net_dev->features;
+ efx->net_dev->hw_features &= ~efx->fixed_features;
+ efx->net_dev->features |= efx->fixed_features;
+ if (efx->net_dev->features != old_features)
+ netdev_features_change(efx->net_dev);
+
+ /* RX filters may also have scatter-enabled flags */
+ if ((efx->rx_scatter != old_rx_scatter) &&
+ efx->type->filter_update_rx_scatter)
+ efx->type->filter_update_rx_scatter(efx);
+
+ /* We must keep at least one descriptor in a TX ring empty.
+ * We could avoid this when the queue size does not exactly
+ * match the hardware ring size, but it's not that important.
+ * Therefore we stop the queue when one more skb might fill
+ * the ring completely. We wake it when half way back to
+ * empty.
+ */
+ efx->txq_stop_thresh = efx->txq_entries - efx_tx_max_skb_descs(efx);
+ efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
+
+ /* Initialise the channels */
+ efx_start_channels(efx);
+
+ efx_ptp_start_datapath(efx);
+
+ if (netif_device_present(efx->net_dev))
+ netif_tx_wake_all_queues(efx->net_dev);
+}
+
+static void efx_stop_datapath(struct efx_nic *efx)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+ BUG_ON(efx->port_enabled);
+
+ efx_ptp_stop_datapath(efx);
+
+ efx_stop_channels(efx);
+}
+
+/**************************************************************************
+ *
+ * Port handling
+ *
+ **************************************************************************/
+
+static void efx_start_port(struct efx_nic *efx)
+{
+ netif_dbg(efx, ifup, efx->net_dev, "start port\n");
+ BUG_ON(efx->port_enabled);
+
+ mutex_lock(&efx->mac_lock);
+ efx->port_enabled = true;
+
+ /* Ensure MAC ingress/egress is enabled */
+ efx_mac_reconfigure(efx);
+
+ mutex_unlock(&efx->mac_lock);
+}
+
+/* Cancel work for MAC reconfiguration, periodic hardware monitoring
+ * and the async self-test, wait for them to finish and prevent them
+ * being scheduled again. This doesn't cover online resets, which
+ * should only be cancelled when removing the device.
+ */
+static void efx_stop_port(struct efx_nic *efx)
+{
+ netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ mutex_lock(&efx->mac_lock);
+ efx->port_enabled = false;
+ mutex_unlock(&efx->mac_lock);
+
+ /* Serialise against efx_set_multicast_list() */
+ netif_addr_lock_bh(efx->net_dev);
+ netif_addr_unlock_bh(efx->net_dev);
+
+ cancel_delayed_work_sync(&efx->monitor_work);
+ efx_selftest_async_cancel(efx);
+ cancel_work_sync(&efx->mac_work);
+}
+
+/* If the interface is supposed to be running but is not, start
+ * the hardware and software data path, regular activity for the port
+ * (MAC statistics, link polling, etc.) and schedule the port to be
+ * reconfigured. Interrupts must already be enabled. This function
+ * is safe to call multiple times, so long as the NIC is not disabled.
+ * Requires the RTNL lock.
+ */
+void efx_start_all(struct efx_nic *efx)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+ BUG_ON(efx->state == STATE_DISABLED);
+
+ /* Check that it is appropriate to restart the interface. All
+ * of these flags are safe to read under just the rtnl lock
+ */
+ if (efx->port_enabled || !netif_running(efx->net_dev) ||
+ efx->reset_pending)
+ return;
+
+ efx_start_port(efx);
+ efx_start_datapath(efx);
+
+ /* Start the hardware monitor if there is one */
+ efx_start_monitor(efx);
+
+ /* Link state detection is normally event-driven; we have
+ * to poll now because we could have missed a change
+ */
+ mutex_lock(&efx->mac_lock);
+ if (efx->phy_op->poll(efx))
+ efx_link_status_changed(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ if (efx->type->start_stats) {
+ efx->type->start_stats(efx);
+ efx->type->pull_stats(efx);
+ spin_lock_bh(&efx->stats_lock);
+ efx->type->update_stats(efx, NULL, NULL);
+ spin_unlock_bh(&efx->stats_lock);
+ }
+}
+
+/* Quiesce the hardware and software data path, and regular activity
+ * for the port without bringing the link down. Safe to call multiple
+ * times with the NIC in almost any state, but interrupts should be
+ * enabled. Requires the RTNL lock.
+ */
+void efx_stop_all(struct efx_nic *efx)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ /* port_enabled can be read safely under the rtnl lock */
+ if (!efx->port_enabled)
+ return;
+
+ if (efx->type->update_stats) {
+ /* update stats before we go down so we can accurately count
+ * rx_nodesc_drops
+ */
+ efx->type->pull_stats(efx);
+ spin_lock_bh(&efx->stats_lock);
+ efx->type->update_stats(efx, NULL, NULL);
+ spin_unlock_bh(&efx->stats_lock);
+ efx->type->stop_stats(efx);
+ }
+
+ efx_stop_port(efx);
+
+ /* Stop the kernel transmit interface. This is only valid if
+ * the device is stopped or detached; otherwise the watchdog
+ * may fire immediately.
+ */
+ WARN_ON(netif_running(efx->net_dev) &&
+ netif_device_present(efx->net_dev));
+ netif_tx_disable(efx->net_dev);
+
+ efx_stop_datapath(efx);
+}
+
+/* Context: process, dev_base_lock or RTNL held, non-blocking. */
+void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ spin_lock_bh(&efx->stats_lock);
+ efx->type->update_stats(efx, NULL, stats);
+ spin_unlock_bh(&efx->stats_lock);
+}
+
+/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
+ * the MAC appropriately. All other PHY configuration changes are pushed
+ * through phy_op->set_settings(), and pushed asynchronously to the MAC
+ * through efx_monitor().
+ *
+ * Callers must hold the mac_lock
+ */
+int __efx_reconfigure_port(struct efx_nic *efx)
+{
+ enum efx_phy_mode phy_mode;
+ int rc = 0;
+
+ WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+ /* Disable PHY transmit in mac level loopbacks */
+ phy_mode = efx->phy_mode;
+ if (LOOPBACK_INTERNAL(efx))
+ efx->phy_mode |= PHY_MODE_TX_DISABLED;
+ else
+ efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
+
+ if (efx->type->reconfigure_port)
+ rc = efx->type->reconfigure_port(efx);
+
+ if (rc)
+ efx->phy_mode = phy_mode;
+
+ return rc;
+}
+
+/* Reinitialise the MAC to pick up new PHY settings, even if the port is
+ * disabled.
+ */
+int efx_reconfigure_port(struct efx_nic *efx)
+{
+ int rc;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ mutex_lock(&efx->mac_lock);
+ rc = __efx_reconfigure_port(efx);
+ mutex_unlock(&efx->mac_lock);
+
+ return rc;
+}
+
+/**************************************************************************
+ *
+ * Device reset and suspend
+ *
+ **************************************************************************/
+
+static void efx_wait_for_bist_end(struct efx_nic *efx)
+{
+ int i;
+
+ for (i = 0; i < BIST_WAIT_DELAY_COUNT; ++i) {
+ if (efx_mcdi_poll_reboot(efx))
+ goto out;
+ msleep(BIST_WAIT_DELAY_MS);
+ }
+
+ netif_err(efx, drv, efx->net_dev, "Warning: No MC reboot after BIST mode\n");
+out:
+ /* Either way unset the BIST flag. If we found no reboot we probably
+ * won't recover, but we should try.
+ */
+ efx->mc_bist_for_other_fn = false;
+}
+
+/* Try recovery mechanisms.
+ * For now only EEH is supported.
+ * Returns 0 if the recovery mechanisms are unsuccessful.
+ * Returns a non-zero value otherwise.
+ */
+int efx_try_recovery(struct efx_nic *efx)
+{
+#ifdef CONFIG_EEH
+ /* A PCI error can occur and not be seen by EEH because nothing
+ * happens on the PCI bus. In this case the driver may fail and
+ * schedule a 'recover or reset', leading to this recovery handler.
+ * Manually call the eeh failure check function.
+ */
+ struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
+ if (eeh_dev_check_failure(eehdev)) {
+ /* The EEH mechanisms will handle the error and reset the
+ * device if necessary.
+ */
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+/* Tears down the entire software state and most of the hardware state
+ * before reset.
+ */
+void efx_reset_down(struct efx_nic *efx, enum reset_type method)
+{
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ if (method == RESET_TYPE_MCDI_TIMEOUT)
+ efx->type->prepare_flr(efx);
+
+ efx_stop_all(efx);
+ efx_disable_interrupts(efx);
+
+ mutex_lock(&efx->mac_lock);
+ down_write(&efx->filter_sem);
+ mutex_lock(&efx->rss_lock);
+ if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+ method != RESET_TYPE_DATAPATH)
+ efx->phy_op->fini(efx);
+ efx->type->fini(efx);
+}
+
+/* This function will always ensure that the locks acquired in
+ * efx_reset_down() are released. A failure return code indicates
+ * that we were unable to reinitialise the hardware, and the
+ * driver should be disabled. If ok is false, then the rx and tx
+ * engines are not restarted, pending a RESET_DISABLE.
+ */
+int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok)
+{
+ int rc;
+
+ EFX_ASSERT_RESET_SERIALISED(efx);
+
+ if (method == RESET_TYPE_MCDI_TIMEOUT)
+ efx->type->finish_flr(efx);
+
+ /* Ensure that SRAM is initialised even if we're disabling the device */
+ rc = efx->type->init(efx);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
+ goto fail;
+ }
+
+ if (!ok)
+ goto fail;
+
+ if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+ method != RESET_TYPE_DATAPATH) {
+ rc = efx->phy_op->init(efx);
+ if (rc)
+ goto fail;
+ rc = efx->phy_op->reconfigure(efx);
+ if (rc && rc != -EPERM)
+ netif_err(efx, drv, efx->net_dev,
+ "could not restore PHY settings\n");
+ }
+
+ rc = efx_enable_interrupts(efx);
+ if (rc)
+ goto fail;
+
+#ifdef CONFIG_SFC_SRIOV
+ rc = efx->type->vswitching_restore(efx);
+ if (rc) /* not fatal; the PF will still work fine */
+ netif_warn(efx, probe, efx->net_dev,
+ "failed to restore vswitching rc=%d;"
+ " VFs may not function\n", rc);
+#endif
+
+ if (efx->type->rx_restore_rss_contexts)
+ efx->type->rx_restore_rss_contexts(efx);
+ mutex_unlock(&efx->rss_lock);
+ efx->type->filter_table_restore(efx);
+ up_write(&efx->filter_sem);
+ if (efx->type->sriov_reset)
+ efx->type->sriov_reset(efx);
+
+ mutex_unlock(&efx->mac_lock);
+
+ efx_start_all(efx);
+
+ if (efx->type->udp_tnl_push_ports)
+ efx->type->udp_tnl_push_ports(efx);
+
+ return 0;
+
+fail:
+ efx->port_initialized = false;
+
+ mutex_unlock(&efx->rss_lock);
+ up_write(&efx->filter_sem);
+ mutex_unlock(&efx->mac_lock);
+
+ return rc;
+}
+
+/* Reset the NIC using the specified method. Note that the reset may
+ * fail, in which case the card will be left in an unusable state.
+ *
+ * Caller must hold the rtnl_lock.
+ */
+int efx_reset(struct efx_nic *efx, enum reset_type method)
+{
+ bool disabled;
+ int rc, rc2;
+
+ netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
+ RESET_TYPE(method));
+
+ efx_device_detach_sync(efx);
+ efx_reset_down(efx, method);
+
+ rc = efx->type->reset(efx, method);
+ if (rc) {
+ netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
+ goto out;
+ }
+
+ /* Clear flags for the scopes we covered. We assume the NIC and
+ * driver are now quiescent so that there is no race here.
+ */
+ if (method < RESET_TYPE_MAX_METHOD)
+ efx->reset_pending &= -(1 << (method + 1));
+ else /* it doesn't fit into the well-ordered scope hierarchy */
+ __clear_bit(method, &efx->reset_pending);
+
+ /* Reinitialise bus-mastering, which may have been turned off before
+ * the reset was scheduled. This is still appropriate, even in the
+ * RESET_TYPE_DISABLE since this driver generally assumes the hardware
+ * can respond to requests.
+ */
+ pci_set_master(efx->pci_dev);
+
+out:
+ /* Leave device stopped if necessary */
+ disabled = rc ||
+ method == RESET_TYPE_DISABLE ||
+ method == RESET_TYPE_RECOVER_OR_DISABLE;
+ rc2 = efx_reset_up(efx, method, !disabled);
+ if (rc2) {
+ disabled = true;
+ if (!rc)
+ rc = rc2;
+ }
+
+ if (disabled) {
+ dev_close(efx->net_dev);
+ netif_err(efx, drv, efx->net_dev, "has been disabled\n");
+ efx->state = STATE_DISABLED;
+ } else {
+ netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
+ efx_device_attach_if_not_resetting(efx);
+ }
+ return rc;
+}
+
+/* The worker thread exists so that code that cannot sleep can
+ * schedule a reset for later.
+ */
+static void efx_reset_work(struct work_struct *data)
+{
+ struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
+ unsigned long pending;
+ enum reset_type method;
+
+ pending = READ_ONCE(efx->reset_pending);
+ method = fls(pending) - 1;
+
+ if (method == RESET_TYPE_MC_BIST)
+ efx_wait_for_bist_end(efx);
+
+ if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
+ method == RESET_TYPE_RECOVER_OR_ALL) &&
+ efx_try_recovery(efx))
+ return;
+
+ if (!pending)
+ return;
+
+ rtnl_lock();
+
+ /* We checked the state in efx_schedule_reset() but it may
+ * have changed by now. Now that we have the RTNL lock,
+ * it cannot change again.
+ */
+ if (efx->state == STATE_READY)
+ (void)efx_reset(efx, method);
+
+ rtnl_unlock();
+}
+
+void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
+{
+ enum reset_type method;
+
+ if (efx->state == STATE_RECOVERY) {
+ netif_dbg(efx, drv, efx->net_dev,
+ "recovering: skip scheduling %s reset\n",
+ RESET_TYPE(type));
+ return;
+ }
+
+ switch (type) {
+ case RESET_TYPE_INVISIBLE:
+ case RESET_TYPE_ALL:
+ case RESET_TYPE_RECOVER_OR_ALL:
+ case RESET_TYPE_WORLD:
+ case RESET_TYPE_DISABLE:
+ case RESET_TYPE_RECOVER_OR_DISABLE:
+ case RESET_TYPE_DATAPATH:
+ case RESET_TYPE_MC_BIST:
+ case RESET_TYPE_MCDI_TIMEOUT:
+ method = type;
+ netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
+ RESET_TYPE(method));
+ break;
+ default:
+ method = efx->type->map_reset_reason(type);
+ netif_dbg(efx, drv, efx->net_dev,
+ "scheduling %s reset for %s\n",
+ RESET_TYPE(method), RESET_TYPE(type));
+ break;
+ }
+
+ set_bit(method, &efx->reset_pending);
+ smp_mb(); /* ensure we change reset_pending before checking state */
+
+ /* If we're not READY then just leave the flags set as the cue
+ * to abort probing or reschedule the reset later.
+ */
+ if (READ_ONCE(efx->state) != STATE_READY)
+ return;
+
+ /* efx_process_channel() will no longer read events once a
+ * reset is scheduled. So switch back to poll'd MCDI completions.
+ */
+ efx_mcdi_mode_poll(efx);
+
+ efx_queue_reset_work(efx);
+}
+
+/**************************************************************************
+ *
+ * Dummy PHY/MAC operations
+ *
+ * Can be used for some unimplemented operations
+ * Needed so all function pointers are valid and do not have to be tested
+ * before use
+ *
+ **************************************************************************/
+int efx_port_dummy_op_int(struct efx_nic *efx)
+{
+ return 0;
+}
+void efx_port_dummy_op_void(struct efx_nic *efx) {}
+
+static bool efx_port_dummy_op_poll(struct efx_nic *efx)
+{
+ return false;
+}
+
+static const struct efx_phy_operations efx_dummy_phy_operations = {
+ .init = efx_port_dummy_op_int,
+ .reconfigure = efx_port_dummy_op_int,
+ .poll = efx_port_dummy_op_poll,
+ .fini = efx_port_dummy_op_void,
+};
+
+/**************************************************************************
+ *
+ * Data housekeeping
+ *
+ **************************************************************************/
+
+/* This zeroes out and then fills in the invariants in a struct
+ * efx_nic (including all sub-structures).
+ */
+int efx_init_struct(struct efx_nic *efx,
+ struct pci_dev *pci_dev, struct net_device *net_dev)
+{
+ int rc = -ENOMEM;
+
+ /* Initialise common structures */
+ INIT_LIST_HEAD(&efx->node);
+ INIT_LIST_HEAD(&efx->secondary_list);
+ spin_lock_init(&efx->biu_lock);
+#ifdef CONFIG_SFC_MTD
+ INIT_LIST_HEAD(&efx->mtd_list);
+#endif
+ INIT_WORK(&efx->reset_work, efx_reset_work);
+ INIT_DELAYED_WORK(&efx->monitor_work, efx_monitor);
+ efx_selftest_async_init(efx);
+ efx->pci_dev = pci_dev;
+ efx->msg_enable = debug;
+ efx->state = STATE_UNINIT;
+ strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
+
+ efx->net_dev = net_dev;
+ efx->rx_prefix_size = efx->type->rx_prefix_size;
+ efx->rx_ip_align =
+ NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
+ efx->rx_packet_hash_offset =
+ efx->type->rx_hash_offset - efx->type->rx_prefix_size;
+ efx->rx_packet_ts_offset =
+ efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+ INIT_LIST_HEAD(&efx->rss_context.list);
+ mutex_init(&efx->rss_lock);
+ spin_lock_init(&efx->stats_lock);
+ efx->vi_stride = EFX_DEFAULT_VI_STRIDE;
+ efx->num_mac_stats = MC_CMD_MAC_NSTATS;
+ BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END);
+ mutex_init(&efx->mac_lock);
+#ifdef CONFIG_RFS_ACCEL
+ mutex_init(&efx->rps_mutex);
+ spin_lock_init(&efx->rps_hash_lock);
+ /* Failure to allocate is not fatal, but may degrade ARFS performance */
+ efx->rps_hash_table = kcalloc(EFX_ARFS_HASH_TABLE_SIZE,
+ sizeof(*efx->rps_hash_table), GFP_KERNEL);
+#endif
+ efx->phy_op = &efx_dummy_phy_operations;
+ efx->mdio.dev = net_dev;
+ INIT_WORK(&efx->mac_work, efx_mac_work);
+ init_waitqueue_head(&efx->flush_wq);
+
+ rc = efx_init_channels(efx);
+ if (rc)
+ goto fail;
+
+ /* Would be good to use the net_dev name, but we're too early */
+ snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
+ pci_name(pci_dev));
+ efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
+ if (!efx->workqueue) {
+ rc = -ENOMEM;
+ goto fail;
+ }
+
+ return 0;
+
+fail:
+ efx_fini_struct(efx);
+ return rc;
+}
+
+void efx_fini_struct(struct efx_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+ kfree(efx->rps_hash_table);
+#endif
+
+ efx_fini_channels(efx);
+
+ kfree(efx->vpd_sn);
+
+ if (efx->workqueue) {
+ destroy_workqueue(efx->workqueue);
+ efx->workqueue = NULL;
+ }
+}
+
+/* This configures the PCI device to enable I/O and DMA. */
+int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
+ unsigned int mem_map_size)
+{
+ struct pci_dev *pci_dev = efx->pci_dev;
+ int rc;
+
+ netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
+
+ rc = pci_enable_device(pci_dev);
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "failed to enable PCI device\n");
+ goto fail1;
+ }
+
+ pci_set_master(pci_dev);
+
+ /* Set the PCI DMA mask. Try all possibilities from our
+ * genuine mask down to 32 bits, because some architectures
+ * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+ * masks event though they reject 46 bit masks.
+ */
+ while (dma_mask > 0x7fffffffUL) {
+ rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
+ if (rc == 0)
+ break;
+ dma_mask >>= 1;
+ }
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "could not find a suitable DMA mask\n");
+ goto fail2;
+ }
+ netif_dbg(efx, probe, efx->net_dev,
+ "using DMA mask %llx\n", (unsigned long long)dma_mask);
+
+ efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
+ if (!efx->membase_phys) {
+ netif_err(efx, probe, efx->net_dev,
+ "ERROR: No BAR%d mapping from the BIOS. "
+ "Try pci=realloc on the kernel command line\n", bar);
+ rc = -ENODEV;
+ goto fail3;
+ }
+
+ rc = pci_request_region(pci_dev, bar, "sfc");
+ if (rc) {
+ netif_err(efx, probe, efx->net_dev,
+ "request for memory BAR failed\n");
+ rc = -EIO;
+ goto fail3;
+ }
+
+ efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
+ if (!efx->membase) {
+ netif_err(efx, probe, efx->net_dev,
+ "could not map memory BAR at %llx+%x\n",
+ (unsigned long long)efx->membase_phys, mem_map_size);
+ rc = -ENOMEM;
+ goto fail4;
+ }
+ netif_dbg(efx, probe, efx->net_dev,
+ "memory BAR at %llx+%x (virtual %p)\n",
+ (unsigned long long)efx->membase_phys, mem_map_size,
+ efx->membase);
+
+ return 0;
+
+fail4:
+ pci_release_region(efx->pci_dev, bar);
+fail3:
+ efx->membase_phys = 0;
+fail2:
+ pci_disable_device(efx->pci_dev);
+fail1:
+ return rc;
+}
+
+void efx_fini_io(struct efx_nic *efx, int bar)
+{
+ netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
+
+ if (efx->membase) {
+ iounmap(efx->membase);
+ efx->membase = NULL;
+ }
+
+ if (efx->membase_phys) {
+ pci_release_region(efx->pci_dev, bar);
+ efx->membase_phys = 0;
+ }
+
+ /* Don't disable bus-mastering if VFs are assigned */
+ if (!pci_vfs_assigned(efx->pci_dev))
+ pci_disable_device(efx->pci_dev);
+}
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+static ssize_t show_mcdi_log(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct efx_nic *efx = dev_get_drvdata(dev);
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", mcdi->logging_enabled);
+}
+
+static ssize_t set_mcdi_log(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct efx_nic *efx = dev_get_drvdata(dev);
+ struct efx_mcdi_iface *mcdi = efx_mcdi(efx);
+ bool enable = count > 0 && *buf != '0';
+
+ mcdi->logging_enabled = enable;
+ return count;
+}
+
+static DEVICE_ATTR(mcdi_logging, 0644, show_mcdi_log, set_mcdi_log);
+
+void efx_init_mcdi_logging(struct efx_nic *efx)
+{
+ int rc = device_create_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
+
+ if (rc) {
+ netif_warn(efx, drv, efx->net_dev,
+ "failed to init net dev attributes\n");
+ }
+}
+
+void efx_fini_mcdi_logging(struct efx_nic *efx)
+{
+ device_remove_file(&efx->pci_dev->dev, &dev_attr_mcdi_logging);
+}
+#endif
diff --git a/drivers/net/ethernet/sfc/efx_common.h b/drivers/net/ethernet/sfc/efx_common.h
new file mode 100644
index 000000000000..fa2fc681e7f9
--- /dev/null
+++ b/drivers/net/ethernet/sfc/efx_common.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_COMMON_H
+#define EFX_COMMON_H
+
+int efx_init_io(struct efx_nic *efx, int bar, dma_addr_t dma_mask,
+ unsigned int mem_map_size);
+void efx_fini_io(struct efx_nic *efx, int bar);
+int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev,
+ struct net_device *net_dev);
+void efx_fini_struct(struct efx_nic *efx);
+
+void efx_start_all(struct efx_nic *efx);
+void efx_stop_all(struct efx_nic *efx);
+
+void efx_net_stats(struct net_device *net_dev, struct rtnl_link_stats64 *stats);
+
+int efx_create_reset_workqueue(void);
+void efx_queue_reset_work(struct efx_nic *efx);
+void efx_flush_reset_workqueue(struct efx_nic *efx);
+void efx_destroy_reset_workqueue(void);
+
+void efx_start_monitor(struct efx_nic *efx);
+
+int __efx_reconfigure_port(struct efx_nic *efx);
+int efx_reconfigure_port(struct efx_nic *efx);
+
+#define EFX_ASSERT_RESET_SERIALISED(efx) \
+ do { \
+ if ((efx->state == STATE_READY) || \
+ (efx->state == STATE_RECOVERY) || \
+ (efx->state == STATE_DISABLED)) \
+ ASSERT_RTNL(); \
+ } while (0)
+
+int efx_try_recovery(struct efx_nic *efx);
+void efx_reset_down(struct efx_nic *efx, enum reset_type method);
+int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok);
+int efx_reset(struct efx_nic *efx, enum reset_type method);
+void efx_schedule_reset(struct efx_nic *efx, enum reset_type type);
+
+static inline int efx_check_disabled(struct efx_nic *efx)
+{
+ if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
+ netif_err(efx, drv, efx->net_dev,
+ "device is disabled due to earlier errors\n");
+ return -EIO;
+ }
+ return 0;
+}
+
+#ifdef CONFIG_SFC_MCDI_LOGGING
+void efx_init_mcdi_logging(struct efx_nic *efx);
+void efx_fini_mcdi_logging(struct efx_nic *efx);
+#else
+static inline void efx_init_mcdi_logging(struct efx_nic *efx) {}
+static inline void efx_fini_mcdi_logging(struct efx_nic *efx) {}
+#endif
+
+void efx_mac_reconfigure(struct efx_nic *efx);
+void efx_link_status_changed(struct efx_nic *efx);
+unsigned int efx_xdp_max_mtu(struct efx_nic *efx);
+int efx_change_mtu(struct net_device *net_dev, int new_mtu);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index b31032da4bcb..993b5769525b 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -13,92 +13,13 @@
#include "workarounds.h"
#include "selftest.h"
#include "efx.h"
+#include "efx_channels.h"
+#include "rx_common.h"
+#include "tx_common.h"
+#include "ethtool_common.h"
#include "filter.h"
#include "nic.h"
-struct efx_sw_stat_desc {
- const char *name;
- enum {
- EFX_ETHTOOL_STAT_SOURCE_nic,
- EFX_ETHTOOL_STAT_SOURCE_channel,
- EFX_ETHTOOL_STAT_SOURCE_tx_queue
- } source;
- unsigned offset;
- u64(*get_stat) (void *field); /* Reader function */
-};
-
-/* Initialiser for a struct efx_sw_stat_desc with type-checking */
-#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
- get_stat_function) { \
- .name = #stat_name, \
- .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \
- .offset = ((((field_type *) 0) == \
- &((struct efx_##source_name *)0)->field) ? \
- offsetof(struct efx_##source_name, field) : \
- offsetof(struct efx_##source_name, field)), \
- .get_stat = get_stat_function, \
-}
-
-static u64 efx_get_uint_stat(void *field)
-{
- return *(unsigned int *)field;
-}
-
-static u64 efx_get_atomic_stat(void *field)
-{
- return atomic_read((atomic_t *) field);
-}
-
-#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \
- EFX_ETHTOOL_STAT(field, nic, field, \
- atomic_t, efx_get_atomic_stat)
-
-#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \
- EFX_ETHTOOL_STAT(field, channel, n_##field, \
- unsigned int, efx_get_uint_stat)
-#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \
- EFX_ETHTOOL_STAT(field, channel, field, \
- unsigned int, efx_get_uint_stat)
-
-#define EFX_ETHTOOL_UINT_TXQ_STAT(field) \
- EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \
- unsigned int, efx_get_uint_stat)
-
-static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
- EFX_ETHTOOL_UINT_TXQ_STAT(merge_events),
- EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts),
- EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
- EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
- EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks),
- EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
- EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
- EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
- EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
-#ifdef CONFIG_RFS_ACCEL
- EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded),
- EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed),
-#endif
-};
-
-#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
-
#define EFX_ETHTOOL_EEPROM_MAGIC 0xEFAB
/**************************************************************************
@@ -185,18 +106,6 @@ efx_ethtool_set_link_ksettings(struct net_device *net_dev,
return rc;
}
-static void efx_ethtool_get_drvinfo(struct net_device *net_dev,
- struct ethtool_drvinfo *info)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
-
- strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
- strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
- efx_mcdi_print_fwver(efx, info->fw_version,
- sizeof(info->fw_version));
- strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
-}
-
static int efx_ethtool_get_regs_len(struct net_device *net_dev)
{
return efx_nic_get_regs_len(netdev_priv(net_dev));
@@ -211,341 +120,6 @@ static void efx_ethtool_get_regs(struct net_device *net_dev,
efx_nic_get_regs(efx, buf);
}
-static u32 efx_ethtool_get_msglevel(struct net_device *net_dev)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- return efx->msg_enable;
-}
-
-static void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- efx->msg_enable = msg_enable;
-}
-
-/**
- * efx_fill_test - fill in an individual self-test entry
- * @test_index: Index of the test
- * @strings: Ethtool strings, or %NULL
- * @data: Ethtool test results, or %NULL
- * @test: Pointer to test result (used only if data != %NULL)
- * @unit_format: Unit name format (e.g. "chan\%d")
- * @unit_id: Unit id (e.g. 0 for "chan0")
- * @test_format: Test name format (e.g. "loopback.\%s.tx.sent")
- * @test_id: Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent")
- *
- * Fill in an individual self-test entry.
- */
-static void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data,
- int *test, const char *unit_format, int unit_id,
- const char *test_format, const char *test_id)
-{
- char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN];
-
- /* Fill data value, if applicable */
- if (data)
- data[test_index] = *test;
-
- /* Fill string, if applicable */
- if (strings) {
- if (strchr(unit_format, '%'))
- snprintf(unit_str, sizeof(unit_str),
- unit_format, unit_id);
- else
- strcpy(unit_str, unit_format);
- snprintf(test_str, sizeof(test_str), test_format, test_id);
- snprintf(strings + test_index * ETH_GSTRING_LEN,
- ETH_GSTRING_LEN,
- "%-6s %-24s", unit_str, test_str);
- }
-}
-
-#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel
-#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
-#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
-#define EFX_LOOPBACK_NAME(_mode, _counter) \
- "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode)
-
-/**
- * efx_fill_loopback_test - fill in a block of loopback self-test entries
- * @efx: Efx NIC
- * @lb_tests: Efx loopback self-test results structure
- * @mode: Loopback test mode
- * @test_index: Starting index of the test
- * @strings: Ethtool strings, or %NULL
- * @data: Ethtool test results, or %NULL
- *
- * Fill in a block of loopback self-test entries. Return new test
- * index.
- */
-static int efx_fill_loopback_test(struct efx_nic *efx,
- struct efx_loopback_self_tests *lb_tests,
- enum efx_loopback_mode mode,
- unsigned int test_index,
- u8 *strings, u64 *data)
-{
- struct efx_channel *channel =
- efx_get_channel(efx, efx->tx_channel_offset);
- struct efx_tx_queue *tx_queue;
-
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- efx_fill_test(test_index++, strings, data,
- &lb_tests->tx_sent[tx_queue->queue],
- EFX_TX_QUEUE_NAME(tx_queue),
- EFX_LOOPBACK_NAME(mode, "tx_sent"));
- efx_fill_test(test_index++, strings, data,
- &lb_tests->tx_done[tx_queue->queue],
- EFX_TX_QUEUE_NAME(tx_queue),
- EFX_LOOPBACK_NAME(mode, "tx_done"));
- }
- efx_fill_test(test_index++, strings, data,
- &lb_tests->rx_good,
- "rx", 0,
- EFX_LOOPBACK_NAME(mode, "rx_good"));
- efx_fill_test(test_index++, strings, data,
- &lb_tests->rx_bad,
- "rx", 0,
- EFX_LOOPBACK_NAME(mode, "rx_bad"));
-
- return test_index;
-}
-
-/**
- * efx_ethtool_fill_self_tests - get self-test details
- * @efx: Efx NIC
- * @tests: Efx self-test results structure, or %NULL
- * @strings: Ethtool strings, or %NULL
- * @data: Ethtool test results, or %NULL
- *
- * Get self-test number of strings, strings, and/or test results.
- * Return number of strings (== number of test results).
- *
- * The reason for merging these three functions is to make sure that
- * they can never be inconsistent.
- */
-static int efx_ethtool_fill_self_tests(struct efx_nic *efx,
- struct efx_self_tests *tests,
- u8 *strings, u64 *data)
-{
- struct efx_channel *channel;
- unsigned int n = 0, i;
- enum efx_loopback_mode mode;
-
- efx_fill_test(n++, strings, data, &tests->phy_alive,
- "phy", 0, "alive", NULL);
- efx_fill_test(n++, strings, data, &tests->nvram,
- "core", 0, "nvram", NULL);
- efx_fill_test(n++, strings, data, &tests->interrupt,
- "core", 0, "interrupt", NULL);
-
- /* Event queues */
- efx_for_each_channel(channel, efx) {
- efx_fill_test(n++, strings, data,
- &tests->eventq_dma[channel->channel],
- EFX_CHANNEL_NAME(channel),
- "eventq.dma", NULL);
- efx_fill_test(n++, strings, data,
- &tests->eventq_int[channel->channel],
- EFX_CHANNEL_NAME(channel),
- "eventq.int", NULL);
- }
-
- efx_fill_test(n++, strings, data, &tests->memory,
- "core", 0, "memory", NULL);
- efx_fill_test(n++, strings, data, &tests->registers,
- "core", 0, "registers", NULL);
-
- if (efx->phy_op->run_tests != NULL) {
- EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL);
-
- for (i = 0; true; ++i) {
- const char *name;
-
- EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
- name = efx->phy_op->test_name(efx, i);
- if (name == NULL)
- break;
-
- efx_fill_test(n++, strings, data, &tests->phy_ext[i],
- "phy", 0, name, NULL);
- }
- }
-
- /* Loopback tests */
- for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
- if (!(efx->loopback_modes & (1 << mode)))
- continue;
- n = efx_fill_loopback_test(efx,
- &tests->loopback[mode], mode, n,
- strings, data);
- }
-
- return n;
-}
-
-static size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
-{
- size_t n_stats = 0;
- struct efx_channel *channel;
-
- efx_for_each_channel(channel, efx) {
- if (efx_channel_has_tx_queues(channel)) {
- n_stats++;
- if (strings != NULL) {
- snprintf(strings, ETH_GSTRING_LEN,
- "tx-%u.tx_packets",
- channel->tx_queue[0].queue /
- EFX_TXQ_TYPES);
-
- strings += ETH_GSTRING_LEN;
- }
- }
- }
- efx_for_each_channel(channel, efx) {
- if (efx_channel_has_rx_queue(channel)) {
- n_stats++;
- if (strings != NULL) {
- snprintf(strings, ETH_GSTRING_LEN,
- "rx-%d.rx_packets", channel->channel);
- strings += ETH_GSTRING_LEN;
- }
- }
- }
- if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
- unsigned short xdp;
-
- for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
- n_stats++;
- if (strings) {
- snprintf(strings, ETH_GSTRING_LEN,
- "tx-xdp-cpu-%hu.tx_packets", xdp);
- strings += ETH_GSTRING_LEN;
- }
- }
- }
-
- return n_stats;
-}
-
-static int efx_ethtool_get_sset_count(struct net_device *net_dev,
- int string_set)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
-
- switch (string_set) {
- case ETH_SS_STATS:
- return efx->type->describe_stats(efx, NULL) +
- EFX_ETHTOOL_SW_STAT_COUNT +
- efx_describe_per_queue_stats(efx, NULL) +
- efx_ptp_describe_stats(efx, NULL);
- case ETH_SS_TEST:
- return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
- default:
- return -EINVAL;
- }
-}
-
-static void efx_ethtool_get_strings(struct net_device *net_dev,
- u32 string_set, u8 *strings)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- int i;
-
- switch (string_set) {
- case ETH_SS_STATS:
- strings += (efx->type->describe_stats(efx, strings) *
- ETH_GSTRING_LEN);
- for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++)
- strlcpy(strings + i * ETH_GSTRING_LEN,
- efx_sw_stat_desc[i].name, ETH_GSTRING_LEN);
- strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN;
- strings += (efx_describe_per_queue_stats(efx, strings) *
- ETH_GSTRING_LEN);
- efx_ptp_describe_stats(efx, strings);
- break;
- case ETH_SS_TEST:
- efx_ethtool_fill_self_tests(efx, NULL, strings, NULL);
- break;
- default:
- /* No other string sets */
- break;
- }
-}
-
-static void efx_ethtool_get_stats(struct net_device *net_dev,
- struct ethtool_stats *stats,
- u64 *data)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
- const struct efx_sw_stat_desc *stat;
- struct efx_channel *channel;
- struct efx_tx_queue *tx_queue;
- struct efx_rx_queue *rx_queue;
- int i;
-
- spin_lock_bh(&efx->stats_lock);
-
- /* Get NIC statistics */
- data += efx->type->update_stats(efx, data, NULL);
-
- /* Get software statistics */
- for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) {
- stat = &efx_sw_stat_desc[i];
- switch (stat->source) {
- case EFX_ETHTOOL_STAT_SOURCE_nic:
- data[i] = stat->get_stat((void *)efx + stat->offset);
- break;
- case EFX_ETHTOOL_STAT_SOURCE_channel:
- data[i] = 0;
- efx_for_each_channel(channel, efx)
- data[i] += stat->get_stat((void *)channel +
- stat->offset);
- break;
- case EFX_ETHTOOL_STAT_SOURCE_tx_queue:
- data[i] = 0;
- efx_for_each_channel(channel, efx) {
- efx_for_each_channel_tx_queue(tx_queue, channel)
- data[i] +=
- stat->get_stat((void *)tx_queue
- + stat->offset);
- }
- break;
- }
- }
- data += EFX_ETHTOOL_SW_STAT_COUNT;
-
- spin_unlock_bh(&efx->stats_lock);
-
- efx_for_each_channel(channel, efx) {
- if (efx_channel_has_tx_queues(channel)) {
- *data = 0;
- efx_for_each_channel_tx_queue(tx_queue, channel) {
- *data += tx_queue->tx_packets;
- }
- data++;
- }
- }
- efx_for_each_channel(channel, efx) {
- if (efx_channel_has_rx_queue(channel)) {
- *data = 0;
- efx_for_each_channel_rx_queue(rx_queue, channel) {
- *data += rx_queue->rx_packets;
- }
- data++;
- }
- }
- if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
- int xdp;
-
- for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
- data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
- data++;
- }
- }
-
- efx_ptp_update_stats(efx, data);
-}
-
static void efx_ethtool_self_test(struct net_device *net_dev,
struct ethtool_test *test, u64 *data)
{
@@ -787,16 +361,6 @@ out:
return rc;
}
-static void efx_ethtool_get_pauseparam(struct net_device *net_dev,
- struct ethtool_pauseparam *pause)
-{
- struct efx_nic *efx = netdev_priv(net_dev);
-
- pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX);
- pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX);
- pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO);
-}
-
static void efx_ethtool_get_wol(struct net_device *net_dev,
struct ethtool_wolinfo *wol)
{
@@ -1456,7 +1020,7 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev,
rc = -ENOMEM;
goto out_unlock;
}
- ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID;
+ ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
/* Initialise indir table and key to defaults */
efx_set_default_rx_indir_table(efx, ctx);
netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key));
diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c
new file mode 100644
index 000000000000..3d7f75cc5cf0
--- /dev/null
+++ b/drivers/net/ethernet/sfc/ethtool_common.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include "net_driver.h"
+#include "mcdi.h"
+#include "nic.h"
+#include "selftest.h"
+#include "ethtool_common.h"
+
+struct efx_sw_stat_desc {
+ const char *name;
+ enum {
+ EFX_ETHTOOL_STAT_SOURCE_nic,
+ EFX_ETHTOOL_STAT_SOURCE_channel,
+ EFX_ETHTOOL_STAT_SOURCE_tx_queue
+ } source;
+ unsigned int offset;
+ u64 (*get_stat)(void *field); /* Reader function */
+};
+
+/* Initialiser for a struct efx_sw_stat_desc with type-checking */
+#define EFX_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
+ get_stat_function) { \
+ .name = #stat_name, \
+ .source = EFX_ETHTOOL_STAT_SOURCE_##source_name, \
+ .offset = ((((field_type *) 0) == \
+ &((struct efx_##source_name *)0)->field) ? \
+ offsetof(struct efx_##source_name, field) : \
+ offsetof(struct efx_##source_name, field)), \
+ .get_stat = get_stat_function, \
+}
+
+static u64 efx_get_uint_stat(void *field)
+{
+ return *(unsigned int *)field;
+}
+
+static u64 efx_get_atomic_stat(void *field)
+{
+ return atomic_read((atomic_t *) field);
+}
+
+#define EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field) \
+ EFX_ETHTOOL_STAT(field, nic, field, \
+ atomic_t, efx_get_atomic_stat)
+
+#define EFX_ETHTOOL_UINT_CHANNEL_STAT(field) \
+ EFX_ETHTOOL_STAT(field, channel, n_##field, \
+ unsigned int, efx_get_uint_stat)
+#define EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(field) \
+ EFX_ETHTOOL_STAT(field, channel, field, \
+ unsigned int, efx_get_uint_stat)
+
+#define EFX_ETHTOOL_UINT_TXQ_STAT(field) \
+ EFX_ETHTOOL_STAT(tx_##field, tx_queue, field, \
+ unsigned int, efx_get_uint_stat)
+
+static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
+ EFX_ETHTOOL_UINT_TXQ_STAT(merge_events),
+ EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts),
+ EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
+ EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
+ EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks),
+ EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
+ EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
+ EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
+ EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_ip_hdr_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_inner_tcp_udp_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_ip_hdr_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_outer_tcp_udp_chksum_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_eth_crc_err),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_drops),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_bad_drops),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_tx),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_xdp_redirect),
+#ifdef CONFIG_RFS_ACCEL
+ EFX_ETHTOOL_UINT_CHANNEL_STAT_NO_N(rfs_filter_count),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_succeeded),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rfs_failed),
+#endif
+};
+
+#define EFX_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(efx_sw_stat_desc)
+
+void efx_ethtool_get_drvinfo(struct net_device *net_dev,
+ struct ethtool_drvinfo *info)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+ strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
+ efx_mcdi_print_fwver(efx, info->fw_version,
+ sizeof(info->fw_version));
+ strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
+}
+
+u32 efx_ethtool_get_msglevel(struct net_device *net_dev)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ return efx->msg_enable;
+}
+
+void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ efx->msg_enable = msg_enable;
+}
+
+void efx_ethtool_get_pauseparam(struct net_device *net_dev,
+ struct ethtool_pauseparam *pause)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ pause->rx_pause = !!(efx->wanted_fc & EFX_FC_RX);
+ pause->tx_pause = !!(efx->wanted_fc & EFX_FC_TX);
+ pause->autoneg = !!(efx->wanted_fc & EFX_FC_AUTO);
+}
+
+/**
+ * efx_fill_test - fill in an individual self-test entry
+ * @test_index: Index of the test
+ * @strings: Ethtool strings, or %NULL
+ * @data: Ethtool test results, or %NULL
+ * @test: Pointer to test result (used only if data != %NULL)
+ * @unit_format: Unit name format (e.g. "chan\%d")
+ * @unit_id: Unit id (e.g. 0 for "chan0")
+ * @test_format: Test name format (e.g. "loopback.\%s.tx.sent")
+ * @test_id: Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent")
+ *
+ * Fill in an individual self-test entry.
+ */
+void efx_fill_test(unsigned int test_index, u8 *strings, u64 *data,
+ int *test, const char *unit_format, int unit_id,
+ const char *test_format, const char *test_id)
+{
+ char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN];
+
+ /* Fill data value, if applicable */
+ if (data)
+ data[test_index] = *test;
+
+ /* Fill string, if applicable */
+ if (strings) {
+ if (strchr(unit_format, '%'))
+ snprintf(unit_str, sizeof(unit_str),
+ unit_format, unit_id);
+ else
+ strcpy(unit_str, unit_format);
+ snprintf(test_str, sizeof(test_str), test_format, test_id);
+ snprintf(strings + test_index * ETH_GSTRING_LEN,
+ ETH_GSTRING_LEN,
+ "%-6s %-24s", unit_str, test_str);
+ }
+}
+
+#define EFX_CHANNEL_NAME(_channel) "chan%d", _channel->channel
+#define EFX_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
+#define EFX_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
+#define EFX_LOOPBACK_NAME(_mode, _counter) \
+ "loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, efx_loopback_mode)
+
+/**
+ * efx_fill_loopback_test - fill in a block of loopback self-test entries
+ * @efx: Efx NIC
+ * @lb_tests: Efx loopback self-test results structure
+ * @mode: Loopback test mode
+ * @test_index: Starting index of the test
+ * @strings: Ethtool strings, or %NULL
+ * @data: Ethtool test results, or %NULL
+ *
+ * Fill in a block of loopback self-test entries. Return new test
+ * index.
+ */
+int efx_fill_loopback_test(struct efx_nic *efx,
+ struct efx_loopback_self_tests *lb_tests,
+ enum efx_loopback_mode mode,
+ unsigned int test_index, u8 *strings, u64 *data)
+{
+ struct efx_channel *channel =
+ efx_get_channel(efx, efx->tx_channel_offset);
+ struct efx_tx_queue *tx_queue;
+
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ efx_fill_test(test_index++, strings, data,
+ &lb_tests->tx_sent[tx_queue->queue],
+ EFX_TX_QUEUE_NAME(tx_queue),
+ EFX_LOOPBACK_NAME(mode, "tx_sent"));
+ efx_fill_test(test_index++, strings, data,
+ &lb_tests->tx_done[tx_queue->queue],
+ EFX_TX_QUEUE_NAME(tx_queue),
+ EFX_LOOPBACK_NAME(mode, "tx_done"));
+ }
+ efx_fill_test(test_index++, strings, data,
+ &lb_tests->rx_good,
+ "rx", 0,
+ EFX_LOOPBACK_NAME(mode, "rx_good"));
+ efx_fill_test(test_index++, strings, data,
+ &lb_tests->rx_bad,
+ "rx", 0,
+ EFX_LOOPBACK_NAME(mode, "rx_bad"));
+
+ return test_index;
+}
+
+/**
+ * efx_ethtool_fill_self_tests - get self-test details
+ * @efx: Efx NIC
+ * @tests: Efx self-test results structure, or %NULL
+ * @strings: Ethtool strings, or %NULL
+ * @data: Ethtool test results, or %NULL
+ *
+ * Get self-test number of strings, strings, and/or test results.
+ * Return number of strings (== number of test results).
+ *
+ * The reason for merging these three functions is to make sure that
+ * they can never be inconsistent.
+ */
+int efx_ethtool_fill_self_tests(struct efx_nic *efx,
+ struct efx_self_tests *tests,
+ u8 *strings, u64 *data)
+{
+ struct efx_channel *channel;
+ unsigned int n = 0, i;
+ enum efx_loopback_mode mode;
+
+ efx_fill_test(n++, strings, data, &tests->phy_alive,
+ "phy", 0, "alive", NULL);
+ efx_fill_test(n++, strings, data, &tests->nvram,
+ "core", 0, "nvram", NULL);
+ efx_fill_test(n++, strings, data, &tests->interrupt,
+ "core", 0, "interrupt", NULL);
+
+ /* Event queues */
+ efx_for_each_channel(channel, efx) {
+ efx_fill_test(n++, strings, data,
+ &tests->eventq_dma[channel->channel],
+ EFX_CHANNEL_NAME(channel),
+ "eventq.dma", NULL);
+ efx_fill_test(n++, strings, data,
+ &tests->eventq_int[channel->channel],
+ EFX_CHANNEL_NAME(channel),
+ "eventq.int", NULL);
+ }
+
+ efx_fill_test(n++, strings, data, &tests->memory,
+ "core", 0, "memory", NULL);
+ efx_fill_test(n++, strings, data, &tests->registers,
+ "core", 0, "registers", NULL);
+
+ if (efx->phy_op->run_tests != NULL) {
+ EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL);
+
+ for (i = 0; true; ++i) {
+ const char *name;
+
+ EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
+ name = efx->phy_op->test_name(efx, i);
+ if (name == NULL)
+ break;
+
+ efx_fill_test(n++, strings, data, &tests->phy_ext[i],
+ "phy", 0, name, NULL);
+ }
+ }
+
+ /* Loopback tests */
+ for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
+ if (!(efx->loopback_modes & (1 << mode)))
+ continue;
+ n = efx_fill_loopback_test(efx,
+ &tests->loopback[mode], mode, n,
+ strings, data);
+ }
+
+ return n;
+}
+
+size_t efx_describe_per_queue_stats(struct efx_nic *efx, u8 *strings)
+{
+ size_t n_stats = 0;
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx) {
+ if (efx_channel_has_tx_queues(channel)) {
+ n_stats++;
+ if (strings != NULL) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ "tx-%u.tx_packets",
+ channel->tx_queue[0].queue /
+ EFX_TXQ_TYPES);
+
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+ }
+ efx_for_each_channel(channel, efx) {
+ if (efx_channel_has_rx_queue(channel)) {
+ n_stats++;
+ if (strings != NULL) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ "rx-%d.rx_packets", channel->channel);
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+ }
+ if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+ unsigned short xdp;
+
+ for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+ n_stats++;
+ if (strings) {
+ snprintf(strings, ETH_GSTRING_LEN,
+ "tx-xdp-cpu-%hu.tx_packets", xdp);
+ strings += ETH_GSTRING_LEN;
+ }
+ }
+ }
+
+ return n_stats;
+}
+
+int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+
+ switch (string_set) {
+ case ETH_SS_STATS:
+ return efx->type->describe_stats(efx, NULL) +
+ EFX_ETHTOOL_SW_STAT_COUNT +
+ efx_describe_per_queue_stats(efx, NULL) +
+ efx_ptp_describe_stats(efx, NULL);
+ case ETH_SS_TEST:
+ return efx_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
+ default:
+ return -EINVAL;
+ }
+}
+
+void efx_ethtool_get_strings(struct net_device *net_dev,
+ u32 string_set, u8 *strings)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ int i;
+
+ switch (string_set) {
+ case ETH_SS_STATS:
+ strings += (efx->type->describe_stats(efx, strings) *
+ ETH_GSTRING_LEN);
+ for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++)
+ strlcpy(strings + i * ETH_GSTRING_LEN,
+ efx_sw_stat_desc[i].name, ETH_GSTRING_LEN);
+ strings += EFX_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN;
+ strings += (efx_describe_per_queue_stats(efx, strings) *
+ ETH_GSTRING_LEN);
+ efx_ptp_describe_stats(efx, strings);
+ break;
+ case ETH_SS_TEST:
+ efx_ethtool_fill_self_tests(efx, NULL, strings, NULL);
+ break;
+ default:
+ /* No other string sets */
+ break;
+ }
+}
+
+void efx_ethtool_get_stats(struct net_device *net_dev,
+ struct ethtool_stats *stats,
+ u64 *data)
+{
+ struct efx_nic *efx = netdev_priv(net_dev);
+ const struct efx_sw_stat_desc *stat;
+ struct efx_channel *channel;
+ struct efx_tx_queue *tx_queue;
+ struct efx_rx_queue *rx_queue;
+ int i;
+
+ spin_lock_bh(&efx->stats_lock);
+
+ /* Get NIC statistics */
+ data += efx->type->update_stats(efx, data, NULL);
+
+ /* Get software statistics */
+ for (i = 0; i < EFX_ETHTOOL_SW_STAT_COUNT; i++) {
+ stat = &efx_sw_stat_desc[i];
+ switch (stat->source) {
+ case EFX_ETHTOOL_STAT_SOURCE_nic:
+ data[i] = stat->get_stat((void *)efx + stat->offset);
+ break;
+ case EFX_ETHTOOL_STAT_SOURCE_channel:
+ data[i] = 0;
+ efx_for_each_channel(channel, efx)
+ data[i] += stat->get_stat((void *)channel +
+ stat->offset);
+ break;
+ case EFX_ETHTOOL_STAT_SOURCE_tx_queue:
+ data[i] = 0;
+ efx_for_each_channel(channel, efx) {
+ efx_for_each_channel_tx_queue(tx_queue, channel)
+ data[i] +=
+ stat->get_stat((void *)tx_queue
+ + stat->offset);
+ }
+ break;
+ }
+ }
+ data += EFX_ETHTOOL_SW_STAT_COUNT;
+
+ spin_unlock_bh(&efx->stats_lock);
+
+ efx_for_each_channel(channel, efx) {
+ if (efx_channel_has_tx_queues(channel)) {
+ *data = 0;
+ efx_for_each_channel_tx_queue(tx_queue, channel) {
+ *data += tx_queue->tx_packets;
+ }
+ data++;
+ }
+ }
+ efx_for_each_channel(channel, efx) {
+ if (efx_channel_has_rx_queue(channel)) {
+ *data = 0;
+ efx_for_each_channel_rx_queue(rx_queue, channel) {
+ *data += rx_queue->rx_packets;
+ }
+ data++;
+ }
+ }
+ if (efx->xdp_tx_queue_count && efx->xdp_tx_queues) {
+ int xdp;
+
+ for (xdp = 0; xdp < efx->xdp_tx_queue_count; xdp++) {
+ data[0] = efx->xdp_tx_queues[xdp]->tx_packets;
+ data++;
+ }
+ }
+
+ efx_ptp_update_stats(efx, data);
+}
diff --git a/drivers/net/ethernet/sfc/ethtool_common.h b/drivers/net/ethernet/sfc/ethtool_common.h
new file mode 100644
index 000000000000..fa624313f330
--- /dev/null
+++ b/drivers/net/ethernet/sfc/ethtool_common.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_ETHTOOL_COMMON_H
+#define EFX_ETHTOOL_COMMON_H
+
+void efx_ethtool_get_drvinfo(struct net_device *net_dev,
+ struct ethtool_drvinfo *info);
+u32 efx_ethtool_get_msglevel(struct net_device *net_dev);
+void efx_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable);
+void efx_ethtool_get_pauseparam(struct net_device *net_dev,
+ struct ethtool_pauseparam *pause);
+int efx_ethtool_fill_self_tests(struct efx_nic *efx,
+ struct efx_self_tests *tests,
+ u8 *strings, u64 *data);
+int efx_ethtool_get_sset_count(struct net_device *net_dev, int string_set);
+void efx_ethtool_get_strings(struct net_device *net_dev, u32 string_set,
+ u8 *strings);
+void efx_ethtool_get_stats(struct net_device *net_dev,
+ struct ethtool_stats *stats __attribute__ ((unused)),
+ u64 *data);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
index eecc348b1c32..bee4cd9d7135 100644
--- a/drivers/net/ethernet/sfc/falcon/efx.c
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -2108,7 +2108,7 @@ static void ef4_net_stats(struct net_device *net_dev,
}
/* Context: netif_tx_lock held, BHs disabled. */
-static void ef4_watchdog(struct net_device *net_dev)
+static void ef4_watchdog(struct net_device *net_dev, unsigned int txqueue)
{
struct ef4_nic *efx = netdev_priv(net_dev);
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index eedd32e2bfcb..dbbb898adddb 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -15,6 +15,7 @@
#include "net_driver.h"
#include "bitfield.h"
#include "efx.h"
+#include "rx_common.h"
#include "nic.h"
#include "farch_regs.h"
#include "sriov.h"
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 9081f84a2604..54a45010b576 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -346,11 +346,8 @@ int efx_mcdi_flush_rxqs(struct efx_nic *efx);
int efx_mcdi_port_probe(struct efx_nic *efx);
void efx_mcdi_port_remove(struct efx_nic *efx);
int efx_mcdi_port_reconfigure(struct efx_nic *efx);
-int efx_mcdi_port_get_number(struct efx_nic *efx);
u32 efx_mcdi_phy_get_caps(struct efx_nic *efx);
void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
-int efx_mcdi_set_mac(struct efx_nic *efx);
-#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1))
void efx_mcdi_mac_start_stats(struct efx_nic *efx);
void efx_mcdi_mac_stop_stats(struct efx_nic *efx);
void efx_mcdi_mac_pull_stats(struct efx_nic *efx);
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.c b/drivers/net/ethernet/sfc/mcdi_functions.c
new file mode 100644
index 000000000000..dcfe78b0fa5a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_functions.c
@@ -0,0 +1,386 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2019 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "mcdi_functions.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+
+int efx_mcdi_free_vis(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ size_t outlen;
+ int rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FREE_VIS, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+
+ /* -EALREADY means nothing to free, so ignore */
+ if (rc == -EALREADY)
+ rc = 0;
+ if (rc)
+ efx_mcdi_display_error(efx, MC_CMD_FREE_VIS, 0, outbuf, outlen,
+ rc);
+ return rc;
+}
+
+int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis,
+ unsigned int max_vis, unsigned int *vi_base,
+ unsigned int *allocated_vis)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_ALLOC_VIS_OUT_LEN);
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_ALLOC_VIS_IN_LEN);
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MIN_VI_COUNT, min_vis);
+ MCDI_SET_DWORD(inbuf, ALLOC_VIS_IN_MAX_VI_COUNT, max_vis);
+ rc = efx_mcdi_rpc(efx, MC_CMD_ALLOC_VIS, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc != 0)
+ return rc;
+
+ if (outlen < MC_CMD_ALLOC_VIS_OUT_LEN)
+ return -EIO;
+
+ netif_dbg(efx, drv, efx->net_dev, "base VI is A0x%03x\n",
+ MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE));
+
+ if (vi_base)
+ *vi_base = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_BASE);
+ if (allocated_vis)
+ *allocated_vis = MCDI_DWORD(outbuf, ALLOC_VIS_OUT_VI_COUNT);
+ return 0;
+}
+
+int efx_mcdi_ev_probe(struct efx_channel *channel)
+{
+ return efx_nic_alloc_buffer(channel->efx, &channel->eventq.buf,
+ (channel->eventq_mask + 1) *
+ sizeof(efx_qword_t),
+ GFP_KERNEL);
+}
+
+int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2)
+{
+ MCDI_DECLARE_BUF(inbuf,
+ MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 /
+ EFX_BUF_SIZE));
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN);
+ size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE;
+ struct efx_nic *efx = channel->efx;
+ size_t inlen, outlen;
+ dma_addr_t dma_addr;
+ int rc, i;
+
+ /* Fill event queue with all ones (i.e. empty events) */
+ memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
+
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_SIZE, channel->eventq_mask + 1);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel);
+ /* INIT_EVQ expects index in vector table, not absolute */
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE,
+ MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_RELOAD, 0);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_MODE,
+ MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS);
+ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0);
+
+ if (v2) {
+ /* Use the new generic approach to specifying event queue
+ * configuration, requesting lower latency or higher throughput.
+ * The options that actually get used appear in the output.
+ */
+ MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS,
+ INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1,
+ INIT_EVQ_V2_IN_FLAG_TYPE,
+ MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO);
+ } else {
+ MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS,
+ INIT_EVQ_IN_FLAG_INTERRUPTING, 1,
+ INIT_EVQ_IN_FLAG_RX_MERGE, 1,
+ INIT_EVQ_IN_FLAG_TX_MERGE, 1,
+ INIT_EVQ_IN_FLAG_CUT_THRU, v1_cut_thru);
+ }
+
+ dma_addr = channel->eventq.buf.dma_addr;
+ for (i = 0; i < entries; ++i) {
+ MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr);
+ dma_addr += EFX_BUF_SIZE;
+ }
+
+ inlen = MC_CMD_INIT_EVQ_IN_LEN(entries);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen,
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN)
+ netif_dbg(efx, drv, efx->net_dev,
+ "Channel %d using event queue flags %08x\n",
+ channel->channel,
+ MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS));
+
+ return rc;
+}
+
+void efx_mcdi_ev_remove(struct efx_channel *channel)
+{
+ efx_nic_free_buffer(channel->efx, &channel->eventq.buf);
+}
+
+void efx_mcdi_ev_fini(struct efx_channel *channel)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_EVQ_IN_LEN);
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ struct efx_nic *efx = channel->efx;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, FINI_EVQ_IN_INSTANCE, channel->channel);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_EVQ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (rc && rc != -EALREADY)
+ goto fail;
+
+ return;
+
+fail:
+ efx_mcdi_display_error(efx, MC_CMD_FINI_EVQ, MC_CMD_FINI_EVQ_IN_LEN,
+ outbuf, outlen, rc);
+}
+
+int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
+ EFX_BUF_SIZE));
+ bool csum_offload = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
+ size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
+ struct efx_channel *channel = tx_queue->channel;
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_ef10_nic_data *nic_data;
+ dma_addr_t dma_addr;
+ size_t inlen;
+ int rc, i;
+
+ BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
+
+ nic_data = efx->nic_data;
+
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
+ MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
+
+ dma_addr = tx_queue->txd.buf.dma_addr;
+
+ netif_dbg(efx, hw, efx->net_dev, "pushing TXQ %d. %zu entries (%llx)\n",
+ tx_queue->queue, entries, (u64)dma_addr);
+
+ for (i = 0; i < entries; ++i) {
+ MCDI_SET_ARRAY_QWORD(inbuf, INIT_TXQ_IN_DMA_ADDR, i, dma_addr);
+ dma_addr += EFX_BUF_SIZE;
+ }
+
+ inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
+
+ do {
+ MCDI_POPULATE_DWORD_4(inbuf, INIT_TXQ_IN_FLAGS,
+ /* This flag was removed from mcdi_pcol.h for
+ * the non-_EXT version of INIT_TXQ. However,
+ * firmware still honours it.
+ */
+ INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
+ INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
+ INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload,
+ INIT_TXQ_EXT_IN_FLAG_TIMESTAMP,
+ tx_queue->timestamping);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
+ NULL, 0, NULL);
+ if (rc == -ENOSPC && tso_v2) {
+ /* Retry without TSOv2 if we're short on contexts. */
+ tso_v2 = false;
+ netif_warn(efx, probe, efx->net_dev,
+ "TSOv2 context not available to segment in "
+ "hardware. TCP performance may be reduced.\n"
+ );
+ } else if (rc) {
+ efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
+ MC_CMD_INIT_TXQ_EXT_IN_LEN,
+ NULL, 0, rc);
+ goto fail;
+ }
+ } while (rc);
+
+ return 0;
+
+fail:
+ return rc;
+}
+
+void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue)
+{
+ efx_nic_free_buffer(tx_queue->efx, &tx_queue->txd.buf);
+}
+
+void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_TXQ_IN_LEN);
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ struct efx_nic *efx = tx_queue->efx;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, FINI_TXQ_IN_INSTANCE,
+ tx_queue->queue);
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_TXQ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (rc && rc != -EALREADY)
+ goto fail;
+
+ return;
+
+fail:
+ efx_mcdi_display_error(efx, MC_CMD_FINI_TXQ, MC_CMD_FINI_TXQ_IN_LEN,
+ outbuf, outlen, rc);
+}
+
+int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue)
+{
+ return efx_nic_alloc_buffer(rx_queue->efx, &rx_queue->rxd.buf,
+ (rx_queue->ptr_mask + 1) *
+ sizeof(efx_qword_t),
+ GFP_KERNEL);
+}
+
+void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue)
+{
+ MCDI_DECLARE_BUF(inbuf,
+ MC_CMD_INIT_RXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
+ EFX_BUF_SIZE));
+ struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
+ size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE;
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
+ dma_addr_t dma_addr;
+ size_t inlen;
+ int rc;
+ int i;
+ BUILD_BUG_ON(MC_CMD_INIT_RXQ_OUT_LEN != 0);
+
+ rx_queue->scatter_n = 0;
+ rx_queue->scatter_len = 0;
+
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_SIZE, rx_queue->ptr_mask + 1);
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_TARGET_EVQ, channel->channel);
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_LABEL, efx_rx_queue_index(rx_queue));
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_INSTANCE,
+ efx_rx_queue_index(rx_queue));
+ MCDI_POPULATE_DWORD_2(inbuf, INIT_RXQ_IN_FLAGS,
+ INIT_RXQ_IN_FLAG_PREFIX, 1,
+ INIT_RXQ_IN_FLAG_TIMESTAMP, 1);
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0);
+ MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id);
+
+ dma_addr = rx_queue->rxd.buf.dma_addr;
+
+ netif_dbg(efx, hw, efx->net_dev, "pushing RXQ %d. %zu entries (%llx)\n",
+ efx_rx_queue_index(rx_queue), entries, (u64)dma_addr);
+
+ for (i = 0; i < entries; ++i) {
+ MCDI_SET_ARRAY_QWORD(inbuf, INIT_RXQ_IN_DMA_ADDR, i, dma_addr);
+ dma_addr += EFX_BUF_SIZE;
+ }
+
+ inlen = MC_CMD_INIT_RXQ_IN_LEN(entries);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_INIT_RXQ, inbuf, inlen,
+ NULL, 0, NULL);
+ if (rc)
+ netdev_WARN(efx->net_dev, "failed to initialise RXQ %d\n",
+ efx_rx_queue_index(rx_queue));
+}
+
+void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue)
+{
+ efx_nic_free_buffer(rx_queue->efx, &rx_queue->rxd.buf);
+}
+
+void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_FINI_RXQ_IN_LEN);
+ MCDI_DECLARE_BUF_ERR(outbuf);
+ struct efx_nic *efx = rx_queue->efx;
+ size_t outlen;
+ int rc;
+
+ MCDI_SET_DWORD(inbuf, FINI_RXQ_IN_INSTANCE,
+ efx_rx_queue_index(rx_queue));
+
+ rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FINI_RXQ, inbuf, sizeof(inbuf),
+ outbuf, sizeof(outbuf), &outlen);
+
+ if (rc && rc != -EALREADY)
+ goto fail;
+
+ return;
+
+fail:
+ efx_mcdi_display_error(efx, MC_CMD_FINI_RXQ, MC_CMD_FINI_RXQ_IN_LEN,
+ outbuf, outlen, rc);
+}
+
+int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode)
+{
+ switch (vi_window_mode) {
+ case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K:
+ efx->vi_stride = 8192;
+ break;
+ case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K:
+ efx->vi_stride = 16384;
+ break;
+ case MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K:
+ efx->vi_stride = 65536;
+ break;
+ default:
+ netif_err(efx, probe, efx->net_dev,
+ "Unrecognised VI window mode %d\n",
+ vi_window_mode);
+ return -EIO;
+ }
+ netif_dbg(efx, probe, efx->net_dev, "vi_stride = %u\n",
+ efx->vi_stride);
+ return 0;
+}
+
+int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_FUNCTION_INFO_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_FUNCTION_INFO, NULL, 0, outbuf,
+ sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < sizeof(outbuf))
+ return -EIO;
+
+ *pf_index = MCDI_DWORD(outbuf, GET_FUNCTION_INFO_OUT_PF);
+ return 0;
+}
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.h b/drivers/net/ethernet/sfc/mcdi_functions.h
new file mode 100644
index 000000000000..ca4a5ac1a66b
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_functions.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_FUNCTIONS_H
+#define EFX_MCDI_FUNCTIONS_H
+
+int efx_mcdi_alloc_vis(struct efx_nic *efx, unsigned int min_vis,
+ unsigned int max_vis, unsigned int *vi_base,
+ unsigned int *allocated_vis);
+int efx_mcdi_free_vis(struct efx_nic *efx);
+
+int efx_mcdi_ev_probe(struct efx_channel *channel);
+int efx_mcdi_ev_init(struct efx_channel *channel, bool v1_cut_thru, bool v2);
+void efx_mcdi_ev_remove(struct efx_channel *channel);
+void efx_mcdi_ev_fini(struct efx_channel *channel);
+int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2);
+void efx_mcdi_tx_remove(struct efx_tx_queue *tx_queue);
+void efx_mcdi_tx_fini(struct efx_tx_queue *tx_queue);
+int efx_mcdi_rx_probe(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_remove(struct efx_rx_queue *rx_queue);
+void efx_mcdi_rx_fini(struct efx_rx_queue *rx_queue);
+int efx_mcdi_window_mode_to_stride(struct efx_nic *efx, u8 vi_window_mode);
+int efx_get_pf_index(struct efx_nic *efx, unsigned int *pf_index);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index fb7cde4980ed..ab5227b13ae6 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -14,106 +14,7 @@
#include "mcdi_pcol.h"
#include "nic.h"
#include "selftest.h"
-
-struct efx_mcdi_phy_data {
- u32 flags;
- u32 type;
- u32 supported_cap;
- u32 channel;
- u32 port;
- u32 stats_mask;
- u8 name[20];
- u32 media;
- u32 mmd_mask;
- u8 revision[20];
- u32 forced_cap;
-};
-
-static int
-efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN);
- size_t outlen;
- int rc;
-
- BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0);
- BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name));
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
- if (rc)
- goto fail;
-
- if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) {
- rc = -EIO;
- goto fail;
- }
-
- cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS);
- cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE);
- cfg->supported_cap =
- MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP);
- cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL);
- cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT);
- cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK);
- memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME),
- sizeof(cfg->name));
- cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE);
- cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK);
- memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION),
- sizeof(cfg->revision));
-
- return 0;
-
-fail:
- netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
- return rc;
-}
-
-static int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
- u32 flags, u32 loopback_mode,
- u32 loopback_speed)
-{
- MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN);
- int rc;
-
- BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0);
-
- MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities);
- MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags);
- MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode);
- MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf),
- NULL, 0, NULL);
- return rc;
-}
-
-static int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN);
- size_t outlen;
- int rc;
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
- if (rc)
- goto fail;
-
- if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST +
- MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) {
- rc = -EIO;
- goto fail;
- }
-
- *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED);
-
- return 0;
-
-fail:
- netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
- return rc;
-}
+#include "mcdi_port_common.h"
static int efx_mcdi_mdio_read(struct net_device *net_dev,
int prtad, int devad, u16 addr)
@@ -168,246 +69,6 @@ static int efx_mcdi_mdio_write(struct net_device *net_dev,
return 0;
}
-static void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset)
-{
- #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
- linkset)
-
- bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS);
- switch (media) {
- case MC_CMD_MEDIA_KX4:
- SET_BIT(Backplane);
- if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
- SET_BIT(1000baseKX_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
- SET_BIT(10000baseKX4_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
- SET_BIT(40000baseKR4_Full);
- break;
-
- case MC_CMD_MEDIA_XFP:
- case MC_CMD_MEDIA_SFP_PLUS:
- case MC_CMD_MEDIA_QSFP_PLUS:
- SET_BIT(FIBRE);
- if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
- SET_BIT(1000baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
- SET_BIT(10000baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
- SET_BIT(40000baseCR4_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN))
- SET_BIT(100000baseCR4_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN))
- SET_BIT(25000baseCR_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN))
- SET_BIT(50000baseCR2_Full);
- break;
-
- case MC_CMD_MEDIA_BASE_T:
- SET_BIT(TP);
- if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN))
- SET_BIT(10baseT_Half);
- if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN))
- SET_BIT(10baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN))
- SET_BIT(100baseT_Half);
- if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN))
- SET_BIT(100baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN))
- SET_BIT(1000baseT_Half);
- if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
- SET_BIT(1000baseT_Full);
- if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
- SET_BIT(10000baseT_Full);
- break;
- }
-
- if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
- SET_BIT(Pause);
- if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
- SET_BIT(Asym_Pause);
- if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
- SET_BIT(Autoneg);
-
- #undef SET_BIT
-}
-
-static u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset)
-{
- u32 result = 0;
-
- #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
- linkset)
-
- if (TEST_BIT(10baseT_Half))
- result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN);
- if (TEST_BIT(10baseT_Full))
- result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN);
- if (TEST_BIT(100baseT_Half))
- result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN);
- if (TEST_BIT(100baseT_Full))
- result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN);
- if (TEST_BIT(1000baseT_Half))
- result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN);
- if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full))
- result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN);
- if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full))
- result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN);
- if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full))
- result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN);
- if (TEST_BIT(100000baseCR4_Full))
- result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN);
- if (TEST_BIT(25000baseCR_Full))
- result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN);
- if (TEST_BIT(50000baseCR2_Full))
- result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN);
- if (TEST_BIT(Pause))
- result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN);
- if (TEST_BIT(Asym_Pause))
- result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN);
- if (TEST_BIT(Autoneg))
- result |= (1 << MC_CMD_PHY_CAP_AN_LBN);
-
- #undef TEST_BIT
-
- return result;
-}
-
-static u32 efx_get_mcdi_phy_flags(struct efx_nic *efx)
-{
- struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
- enum efx_phy_mode mode, supported;
- u32 flags;
-
- /* TODO: Advertise the capabilities supported by this PHY */
- supported = 0;
- if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN))
- supported |= PHY_MODE_TX_DISABLED;
- if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN))
- supported |= PHY_MODE_LOW_POWER;
- if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN))
- supported |= PHY_MODE_OFF;
-
- mode = efx->phy_mode & supported;
-
- flags = 0;
- if (mode & PHY_MODE_TX_DISABLED)
- flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN);
- if (mode & PHY_MODE_LOW_POWER)
- flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN);
- if (mode & PHY_MODE_OFF)
- flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN);
-
- return flags;
-}
-
-static u8 mcdi_to_ethtool_media(u32 media)
-{
- switch (media) {
- case MC_CMD_MEDIA_XAUI:
- case MC_CMD_MEDIA_CX4:
- case MC_CMD_MEDIA_KX4:
- return PORT_OTHER;
-
- case MC_CMD_MEDIA_XFP:
- case MC_CMD_MEDIA_SFP_PLUS:
- case MC_CMD_MEDIA_QSFP_PLUS:
- return PORT_FIBRE;
-
- case MC_CMD_MEDIA_BASE_T:
- return PORT_TP;
-
- default:
- return PORT_OTHER;
- }
-}
-
-static void efx_mcdi_phy_decode_link(struct efx_nic *efx,
- struct efx_link_state *link_state,
- u32 speed, u32 flags, u32 fcntl)
-{
- switch (fcntl) {
- case MC_CMD_FCNTL_AUTO:
- WARN_ON(1); /* This is not a link mode */
- link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX;
- break;
- case MC_CMD_FCNTL_BIDIR:
- link_state->fc = EFX_FC_TX | EFX_FC_RX;
- break;
- case MC_CMD_FCNTL_RESPOND:
- link_state->fc = EFX_FC_RX;
- break;
- default:
- WARN_ON(1);
- /* Fall through */
- case MC_CMD_FCNTL_OFF:
- link_state->fc = 0;
- break;
- }
-
- link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN));
- link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN));
- link_state->speed = speed;
-}
-
-/* The semantics of the ethtool FEC mode bitmask are not well defined,
- * particularly the meaning of combinations of bits. Which means we get to
- * define our own semantics, as follows:
- * OFF overrides any other bits, and means "disable all FEC" (with the
- * exception of 25G KR4/CR4, where it is not possible to reject it if AN
- * partner requests it).
- * AUTO on its own means use cable requirements and link partner autoneg with
- * fw-default preferences for the cable type.
- * AUTO and either RS or BASER means use the specified FEC type if cable and
- * link partner support it, otherwise autoneg/fw-default.
- * RS or BASER alone means use the specified FEC type if cable and link partner
- * support it and either requests it, otherwise no FEC.
- * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
- * partner support it, preferring RS to BASER.
- */
-static u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
-{
- u32 ret = 0;
-
- if (ethtool_cap & ETHTOOL_FEC_OFF)
- return 0;
-
- if (ethtool_cap & ETHTOOL_FEC_AUTO)
- ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
- if (ethtool_cap & ETHTOOL_FEC_RS)
- ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
- if (ethtool_cap & ETHTOOL_FEC_BASER)
- ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
- (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
- (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
- return ret;
-}
-
-/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function
- * can never produce, (baser xor rs) and neither req; the implementation below
- * maps both of those to AUTO. This should never matter, and it's not clear
- * what a better mapping would be anyway.
- */
-static u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
-{
- bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
- rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
- baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
- : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
- baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
- : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
-
- if (!baser && !rs)
- return ETHTOOL_FEC_OFF;
- return (rs_req ? ETHTOOL_FEC_RS : 0) |
- (baser_req ? ETHTOOL_FEC_BASER : 0) |
- (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
-}
-
static int efx_mcdi_phy_probe(struct efx_nic *efx)
{
struct efx_mcdi_phy_data *phy_data;
@@ -527,58 +188,6 @@ int efx_mcdi_port_reconfigure(struct efx_nic *efx)
efx->loopback_mode, 0);
}
-/* Verify that the forced flow control settings (!EFX_FC_AUTO) are
- * supported by the link partner. Warn the user if this isn't the case
- */
-static void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa)
-{
- struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
- u32 rmtadv;
-
- /* The link partner capabilities are only relevant if the
- * link supports flow control autonegotiation */
- if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
- return;
-
- /* If flow control autoneg is supported and enabled, then fine */
- if (efx->wanted_fc & EFX_FC_AUTO)
- return;
-
- rmtadv = 0;
- if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
- rmtadv |= ADVERTISED_Pause;
- if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
- rmtadv |= ADVERTISED_Asym_Pause;
-
- if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause)
- netif_err(efx, link, efx->net_dev,
- "warning: link partner doesn't support pause frames");
-}
-
-static bool efx_mcdi_phy_poll(struct efx_nic *efx)
-{
- struct efx_link_state old_state = efx->link_state;
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
- int rc;
-
- WARN_ON(!mutex_is_locked(&efx->mac_lock));
-
- BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
- outbuf, sizeof(outbuf), NULL);
- if (rc)
- efx->link_state.up = false;
- else
- efx_mcdi_phy_decode_link(
- efx, &efx->link_state,
- MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
- MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
- MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
-
- return !efx_link_state_equal(&efx->link_state, &old_state);
-}
-
static void efx_mcdi_phy_remove(struct efx_nic *efx)
{
struct efx_mcdi_phy_data *phy_data = efx->phy_data;
@@ -666,58 +275,6 @@ efx_mcdi_phy_set_link_ksettings(struct efx_nic *efx,
return 0;
}
-static int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
- struct ethtool_fecparam *fec)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
- u32 caps, active, speed; /* MCDI format */
- bool is_25g = false;
- size_t outlen;
- int rc;
-
- BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
- if (rc)
- return rc;
- if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
- return -EOPNOTSUPP;
-
- /* behaviour for 25G/50G links depends on 25G BASER bit */
- speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
- is_25g = speed == 25000 || speed == 50000;
-
- caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
- fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
- /* BASER is never supported on 100G */
- if (speed == 100000)
- fec->fec &= ~ETHTOOL_FEC_BASER;
-
- active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
- switch (active) {
- case MC_CMD_FEC_NONE:
- fec->active_fec = ETHTOOL_FEC_OFF;
- break;
- case MC_CMD_FEC_BASER:
- fec->active_fec = ETHTOOL_FEC_BASER;
- break;
- case MC_CMD_FEC_RS:
- fec->active_fec = ETHTOOL_FEC_RS;
- break;
- default:
- netif_warn(efx, hw, efx->net_dev,
- "Firmware reports unrecognised FEC_TYPE %u\n",
- active);
- /* We don't know what firmware has picked. AUTO is as good a
- * "can't happen" value as any other.
- */
- fec->active_fec = ETHTOOL_FEC_AUTO;
- break;
- }
-
- return 0;
-}
-
static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
const struct ethtool_fecparam *fec)
{
@@ -745,27 +302,6 @@ static int efx_mcdi_phy_set_fecparam(struct efx_nic *efx,
return 0;
}
-static int efx_mcdi_phy_test_alive(struct efx_nic *efx)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
- size_t outlen;
- int rc;
-
- BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0);
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0,
- outbuf, sizeof(outbuf), &outlen);
- if (rc)
- return rc;
-
- if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN)
- return -EIO;
- if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK)
- return -EINVAL;
-
- return 0;
-}
-
static const char *const mcdi_sft9001_cable_diag_names[] = {
"cable.pairA.length",
"cable.pairB.length",
@@ -1139,84 +675,6 @@ u32 efx_mcdi_phy_get_caps(struct efx_nic *efx)
return phy_data->supported_cap;
}
-static unsigned int efx_mcdi_event_link_speed[] = {
- [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100,
- [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000,
- [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000,
- [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000,
- [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000,
- [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000,
- [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000,
-};
-
-void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev)
-{
- u32 flags, fcntl, speed, lpa;
-
- speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED);
- EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
- speed = efx_mcdi_event_link_speed[speed];
-
- flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS);
- fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL);
- lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP);
-
- /* efx->link_state is only modified by efx_mcdi_phy_get_link(),
- * which is only run after flushing the event queues. Therefore, it
- * is safe to modify the link state outside of the mac_lock here.
- */
- efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl);
-
- efx_mcdi_phy_check_fcntl(efx, lpa);
-
- efx_link_status_changed(efx);
-}
-
-int efx_mcdi_set_mac(struct efx_nic *efx)
-{
- u32 fcntl;
- MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN);
-
- BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0);
-
- /* This has no effect on EF10 */
- ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR),
- efx->net_dev->dev_addr);
-
- MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU,
- EFX_MAX_FRAME_LEN(efx->net_dev->mtu));
- MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0);
-
- /* Set simple MAC filter for Siena */
- MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT,
- SET_MAC_IN_REJECT_UNCST, efx->unicast_filter);
-
- MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS,
- SET_MAC_IN_FLAG_INCLUDE_FCS,
- !!(efx->net_dev->features & NETIF_F_RXFCS));
-
- switch (efx->wanted_fc) {
- case EFX_FC_RX | EFX_FC_TX:
- fcntl = MC_CMD_FCNTL_BIDIR;
- break;
- case EFX_FC_RX:
- fcntl = MC_CMD_FCNTL_RESPOND;
- break;
- default:
- fcntl = MC_CMD_FCNTL_OFF;
- break;
- }
- if (efx->wanted_fc & EFX_FC_AUTO)
- fcntl = MC_CMD_FCNTL_AUTO;
- if (efx->fc_disable)
- fcntl = MC_CMD_FCNTL_OFF;
-
- MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl);
-
- return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes),
- NULL, 0, NULL);
-}
-
bool efx_mcdi_mac_check_fault(struct efx_nic *efx)
{
MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
@@ -1348,17 +806,3 @@ void efx_mcdi_port_remove(struct efx_nic *efx)
efx->phy_op->remove(efx);
efx_nic_free_buffer(efx, &efx->stats_buffer);
}
-
-/* Get physical port number (EF10 only; on Siena it is same as PF number) */
-int efx_mcdi_port_get_number(struct efx_nic *efx)
-{
- MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN);
- int rc;
-
- rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0,
- outbuf, sizeof(outbuf), NULL);
- if (rc)
- return rc;
-
- return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT);
-}
diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.c b/drivers/net/ethernet/sfc/mcdi_port_common.c
new file mode 100644
index 000000000000..a6a072ba46d3
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_port_common.c
@@ -0,0 +1,568 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "mcdi_port_common.h"
+#include "efx_common.h"
+
+int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_CFG_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_IN_LEN != 0);
+ BUILD_BUG_ON(MC_CMD_GET_PHY_CFG_OUT_NAME_LEN != sizeof(cfg->name));
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_CFG, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < MC_CMD_GET_PHY_CFG_OUT_LEN) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ cfg->flags = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_FLAGS);
+ cfg->type = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_TYPE);
+ cfg->supported_cap =
+ MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_SUPPORTED_CAP);
+ cfg->channel = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_CHANNEL);
+ cfg->port = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_PRT);
+ cfg->stats_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_STATS_MASK);
+ memcpy(cfg->name, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_NAME),
+ sizeof(cfg->name));
+ cfg->media = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MEDIA_TYPE);
+ cfg->mmd_mask = MCDI_DWORD(outbuf, GET_PHY_CFG_OUT_MMD_MASK);
+ memcpy(cfg->revision, MCDI_PTR(outbuf, GET_PHY_CFG_OUT_REVISION),
+ sizeof(cfg->revision));
+
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+void efx_link_set_advertising(struct efx_nic *efx,
+ const unsigned long *advertising)
+{
+ memcpy(efx->link_advertising, advertising,
+ sizeof(__ETHTOOL_DECLARE_LINK_MODE_MASK()));
+
+ efx->link_advertising[0] |= ADVERTISED_Autoneg;
+ if (advertising[0] & ADVERTISED_Pause)
+ efx->wanted_fc |= (EFX_FC_TX | EFX_FC_RX);
+ else
+ efx->wanted_fc &= ~(EFX_FC_TX | EFX_FC_RX);
+ if (advertising[0] & ADVERTISED_Asym_Pause)
+ efx->wanted_fc ^= EFX_FC_TX;
+}
+
+int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
+ u32 flags, u32 loopback_mode, u32 loopback_speed)
+{
+ MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_LINK_IN_LEN);
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_SET_LINK_OUT_LEN != 0);
+
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_CAP, capabilities);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_FLAGS, flags);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_MODE, loopback_mode);
+ MCDI_SET_DWORD(inbuf, SET_LINK_IN_LOOPBACK_SPEED, loopback_speed);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_SET_LINK, inbuf, sizeof(inbuf),
+ NULL, 0, NULL);
+ return rc;
+}
+
+int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LOOPBACK_MODES_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LOOPBACK_MODES, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ goto fail;
+
+ if (outlen < (MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_OFST +
+ MC_CMD_GET_LOOPBACK_MODES_OUT_SUGGESTED_LEN)) {
+ rc = -EIO;
+ goto fail;
+ }
+
+ *loopback_modes = MCDI_QWORD(outbuf, GET_LOOPBACK_MODES_OUT_SUGGESTED);
+
+ return 0;
+
+fail:
+ netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc);
+ return rc;
+}
+
+void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset)
+{
+ #define SET_BIT(name) __set_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
+ linkset)
+
+ bitmap_zero(linkset, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ switch (media) {
+ case MC_CMD_MEDIA_KX4:
+ SET_BIT(Backplane);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+ SET_BIT(1000baseKX_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+ SET_BIT(10000baseKX4_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+ SET_BIT(40000baseKR4_Full);
+ break;
+
+ case MC_CMD_MEDIA_XFP:
+ case MC_CMD_MEDIA_SFP_PLUS:
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ SET_BIT(FIBRE);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+ SET_BIT(1000baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+ SET_BIT(10000baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
+ SET_BIT(40000baseCR4_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_100000FDX_LBN))
+ SET_BIT(100000baseCR4_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_25000FDX_LBN))
+ SET_BIT(25000baseCR_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_50000FDX_LBN))
+ SET_BIT(50000baseCR2_Full);
+ break;
+
+ case MC_CMD_MEDIA_BASE_T:
+ SET_BIT(TP);
+ if (cap & (1 << MC_CMD_PHY_CAP_10HDX_LBN))
+ SET_BIT(10baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_10FDX_LBN))
+ SET_BIT(10baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_100HDX_LBN))
+ SET_BIT(100baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_100FDX_LBN))
+ SET_BIT(100baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000HDX_LBN))
+ SET_BIT(1000baseT_Half);
+ if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+ SET_BIT(1000baseT_Full);
+ if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+ SET_BIT(10000baseT_Full);
+ break;
+ }
+
+ if (cap & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
+ SET_BIT(Pause);
+ if (cap & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
+ SET_BIT(Asym_Pause);
+ if (cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+ SET_BIT(Autoneg);
+
+ #undef SET_BIT
+}
+
+u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset)
+{
+ u32 result = 0;
+
+ #define TEST_BIT(name) test_bit(ETHTOOL_LINK_MODE_ ## name ## _BIT, \
+ linkset)
+
+ if (TEST_BIT(10baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_10HDX_LBN);
+ if (TEST_BIT(10baseT_Full))
+ result |= (1 << MC_CMD_PHY_CAP_10FDX_LBN);
+ if (TEST_BIT(100baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_100HDX_LBN);
+ if (TEST_BIT(100baseT_Full))
+ result |= (1 << MC_CMD_PHY_CAP_100FDX_LBN);
+ if (TEST_BIT(1000baseT_Half))
+ result |= (1 << MC_CMD_PHY_CAP_1000HDX_LBN);
+ if (TEST_BIT(1000baseT_Full) || TEST_BIT(1000baseKX_Full))
+ result |= (1 << MC_CMD_PHY_CAP_1000FDX_LBN);
+ if (TEST_BIT(10000baseT_Full) || TEST_BIT(10000baseKX4_Full))
+ result |= (1 << MC_CMD_PHY_CAP_10000FDX_LBN);
+ if (TEST_BIT(40000baseCR4_Full) || TEST_BIT(40000baseKR4_Full))
+ result |= (1 << MC_CMD_PHY_CAP_40000FDX_LBN);
+ if (TEST_BIT(100000baseCR4_Full))
+ result |= (1 << MC_CMD_PHY_CAP_100000FDX_LBN);
+ if (TEST_BIT(25000baseCR_Full))
+ result |= (1 << MC_CMD_PHY_CAP_25000FDX_LBN);
+ if (TEST_BIT(50000baseCR2_Full))
+ result |= (1 << MC_CMD_PHY_CAP_50000FDX_LBN);
+ if (TEST_BIT(Pause))
+ result |= (1 << MC_CMD_PHY_CAP_PAUSE_LBN);
+ if (TEST_BIT(Asym_Pause))
+ result |= (1 << MC_CMD_PHY_CAP_ASYM_LBN);
+ if (TEST_BIT(Autoneg))
+ result |= (1 << MC_CMD_PHY_CAP_AN_LBN);
+
+ #undef TEST_BIT
+
+ return result;
+}
+
+u32 efx_get_mcdi_phy_flags(struct efx_nic *efx)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ enum efx_phy_mode mode, supported;
+ u32 flags;
+
+ /* TODO: Advertise the capabilities supported by this PHY */
+ supported = 0;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_TXDIS_LBN))
+ supported |= PHY_MODE_TX_DISABLED;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_LOWPOWER_LBN))
+ supported |= PHY_MODE_LOW_POWER;
+ if (phy_cfg->flags & (1 << MC_CMD_GET_PHY_CFG_OUT_POWEROFF_LBN))
+ supported |= PHY_MODE_OFF;
+
+ mode = efx->phy_mode & supported;
+
+ flags = 0;
+ if (mode & PHY_MODE_TX_DISABLED)
+ flags |= (1 << MC_CMD_SET_LINK_IN_TXDIS_LBN);
+ if (mode & PHY_MODE_LOW_POWER)
+ flags |= (1 << MC_CMD_SET_LINK_IN_LOWPOWER_LBN);
+ if (mode & PHY_MODE_OFF)
+ flags |= (1 << MC_CMD_SET_LINK_IN_POWEROFF_LBN);
+
+ return flags;
+}
+
+u8 mcdi_to_ethtool_media(u32 media)
+{
+ switch (media) {
+ case MC_CMD_MEDIA_XAUI:
+ case MC_CMD_MEDIA_CX4:
+ case MC_CMD_MEDIA_KX4:
+ return PORT_OTHER;
+
+ case MC_CMD_MEDIA_XFP:
+ case MC_CMD_MEDIA_SFP_PLUS:
+ case MC_CMD_MEDIA_QSFP_PLUS:
+ return PORT_FIBRE;
+
+ case MC_CMD_MEDIA_BASE_T:
+ return PORT_TP;
+
+ default:
+ return PORT_OTHER;
+ }
+}
+
+void efx_mcdi_phy_decode_link(struct efx_nic *efx,
+ struct efx_link_state *link_state,
+ u32 speed, u32 flags, u32 fcntl)
+{
+ switch (fcntl) {
+ case MC_CMD_FCNTL_AUTO:
+ WARN_ON(1); /* This is not a link mode */
+ link_state->fc = EFX_FC_AUTO | EFX_FC_TX | EFX_FC_RX;
+ break;
+ case MC_CMD_FCNTL_BIDIR:
+ link_state->fc = EFX_FC_TX | EFX_FC_RX;
+ break;
+ case MC_CMD_FCNTL_RESPOND:
+ link_state->fc = EFX_FC_RX;
+ break;
+ default:
+ WARN_ON(1);
+ /* Fall through */
+ case MC_CMD_FCNTL_OFF:
+ link_state->fc = 0;
+ break;
+ }
+
+ link_state->up = !!(flags & (1 << MC_CMD_GET_LINK_OUT_LINK_UP_LBN));
+ link_state->fd = !!(flags & (1 << MC_CMD_GET_LINK_OUT_FULL_DUPLEX_LBN));
+ link_state->speed = speed;
+}
+
+/* The semantics of the ethtool FEC mode bitmask are not well defined,
+ * particularly the meaning of combinations of bits. Which means we get to
+ * define our own semantics, as follows:
+ * OFF overrides any other bits, and means "disable all FEC" (with the
+ * exception of 25G KR4/CR4, where it is not possible to reject it if AN
+ * partner requests it).
+ * AUTO on its own means use cable requirements and link partner autoneg with
+ * fw-default preferences for the cable type.
+ * AUTO and either RS or BASER means use the specified FEC type if cable and
+ * link partner support it, otherwise autoneg/fw-default.
+ * RS or BASER alone means use the specified FEC type if cable and link partner
+ * support it and either requests it, otherwise no FEC.
+ * Both RS and BASER (whether AUTO or not) means use FEC if cable and link
+ * partner support it, preferring RS to BASER.
+ */
+u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap)
+{
+ u32 ret = 0;
+
+ if (ethtool_cap & ETHTOOL_FEC_OFF)
+ return 0;
+
+ if (ethtool_cap & ETHTOOL_FEC_AUTO)
+ ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_RS_FEC_LBN);
+ if (ethtool_cap & ETHTOOL_FEC_RS)
+ ret |= (1 << MC_CMD_PHY_CAP_RS_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN);
+ if (ethtool_cap & ETHTOOL_FEC_BASER)
+ ret |= (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN) |
+ (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN) |
+ (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN);
+ return ret;
+}
+
+/* Invert ethtool_fec_caps_to_mcdi. There are two combinations that function
+ * can never produce, (baser xor rs) and neither req; the implementation below
+ * maps both of those to AUTO. This should never matter, and it's not clear
+ * what a better mapping would be anyway.
+ */
+u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g)
+{
+ bool rs = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_LBN),
+ rs_req = caps & (1 << MC_CMD_PHY_CAP_RS_FEC_REQUESTED_LBN),
+ baser = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_LBN)
+ : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_LBN),
+ baser_req = is_25g ? caps & (1 << MC_CMD_PHY_CAP_25G_BASER_FEC_REQUESTED_LBN)
+ : caps & (1 << MC_CMD_PHY_CAP_BASER_FEC_REQUESTED_LBN);
+
+ if (!baser && !rs)
+ return ETHTOOL_FEC_OFF;
+ return (rs_req ? ETHTOOL_FEC_RS : 0) |
+ (baser_req ? ETHTOOL_FEC_BASER : 0) |
+ (baser == baser_req && rs == rs_req ? 0 : ETHTOOL_FEC_AUTO);
+}
+
+/* Verify that the forced flow control settings (!EFX_FC_AUTO) are
+ * supported by the link partner. Warn the user if this isn't the case
+ */
+void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa)
+{
+ struct efx_mcdi_phy_data *phy_cfg = efx->phy_data;
+ u32 rmtadv;
+
+ /* The link partner capabilities are only relevant if the
+ * link supports flow control autonegotiation
+ */
+ if (~phy_cfg->supported_cap & (1 << MC_CMD_PHY_CAP_AN_LBN))
+ return;
+
+ /* If flow control autoneg is supported and enabled, then fine */
+ if (efx->wanted_fc & EFX_FC_AUTO)
+ return;
+
+ rmtadv = 0;
+ if (lpa & (1 << MC_CMD_PHY_CAP_PAUSE_LBN))
+ rmtadv |= ADVERTISED_Pause;
+ if (lpa & (1 << MC_CMD_PHY_CAP_ASYM_LBN))
+ rmtadv |= ADVERTISED_Asym_Pause;
+
+ if ((efx->wanted_fc & EFX_FC_TX) && rmtadv == ADVERTISED_Asym_Pause)
+ netif_err(efx, link, efx->net_dev,
+ "warning: link partner doesn't support pause frames");
+}
+
+bool efx_mcdi_phy_poll(struct efx_nic *efx)
+{
+ struct efx_link_state old_state = efx->link_state;
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_LEN);
+ int rc;
+
+ WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ efx->link_state.up = false;
+ else
+ efx_mcdi_phy_decode_link(
+ efx, &efx->link_state,
+ MCDI_DWORD(outbuf, GET_LINK_OUT_LINK_SPEED),
+ MCDI_DWORD(outbuf, GET_LINK_OUT_FLAGS),
+ MCDI_DWORD(outbuf, GET_LINK_OUT_FCNTL));
+
+ return !efx_link_state_equal(&efx->link_state, &old_state);
+}
+
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx, struct ethtool_fecparam *fec)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_LINK_OUT_V2_LEN);
+ u32 caps, active, speed; /* MCDI format */
+ bool is_25g = false;
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_LINK_IN_LEN != 0);
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_LINK, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+ if (outlen < MC_CMD_GET_LINK_OUT_V2_LEN)
+ return -EOPNOTSUPP;
+
+ /* behaviour for 25G/50G links depends on 25G BASER bit */
+ speed = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_LINK_SPEED);
+ is_25g = speed == 25000 || speed == 50000;
+
+ caps = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_CAP);
+ fec->fec = mcdi_fec_caps_to_ethtool(caps, is_25g);
+ /* BASER is never supported on 100G */
+ if (speed == 100000)
+ fec->fec &= ~ETHTOOL_FEC_BASER;
+
+ active = MCDI_DWORD(outbuf, GET_LINK_OUT_V2_FEC_TYPE);
+ switch (active) {
+ case MC_CMD_FEC_NONE:
+ fec->active_fec = ETHTOOL_FEC_OFF;
+ break;
+ case MC_CMD_FEC_BASER:
+ fec->active_fec = ETHTOOL_FEC_BASER;
+ break;
+ case MC_CMD_FEC_RS:
+ fec->active_fec = ETHTOOL_FEC_RS;
+ break;
+ default:
+ netif_warn(efx, hw, efx->net_dev,
+ "Firmware reports unrecognised FEC_TYPE %u\n",
+ active);
+ /* We don't know what firmware has picked. AUTO is as good a
+ * "can't happen" value as any other.
+ */
+ fec->active_fec = ETHTOOL_FEC_AUTO;
+ break;
+ }
+
+ return 0;
+}
+
+int efx_mcdi_phy_test_alive(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PHY_STATE_OUT_LEN);
+ size_t outlen;
+ int rc;
+
+ BUILD_BUG_ON(MC_CMD_GET_PHY_STATE_IN_LEN != 0);
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PHY_STATE, NULL, 0,
+ outbuf, sizeof(outbuf), &outlen);
+ if (rc)
+ return rc;
+
+ if (outlen < MC_CMD_GET_PHY_STATE_OUT_LEN)
+ return -EIO;
+ if (MCDI_DWORD(outbuf, GET_PHY_STATE_OUT_STATE) != MC_CMD_PHY_STATE_OK)
+ return -EINVAL;
+
+ return 0;
+}
+
+int efx_mcdi_set_mac(struct efx_nic *efx)
+{
+ u32 fcntl;
+ MCDI_DECLARE_BUF(cmdbytes, MC_CMD_SET_MAC_IN_LEN);
+
+ BUILD_BUG_ON(MC_CMD_SET_MAC_OUT_LEN != 0);
+
+ /* This has no effect on EF10 */
+ ether_addr_copy(MCDI_PTR(cmdbytes, SET_MAC_IN_ADDR),
+ efx->net_dev->dev_addr);
+
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_MTU,
+ EFX_MAX_FRAME_LEN(efx->net_dev->mtu));
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_DRAIN, 0);
+
+ /* Set simple MAC filter for Siena */
+ MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_REJECT,
+ SET_MAC_IN_REJECT_UNCST, efx->unicast_filter);
+
+ MCDI_POPULATE_DWORD_1(cmdbytes, SET_MAC_IN_FLAGS,
+ SET_MAC_IN_FLAG_INCLUDE_FCS,
+ !!(efx->net_dev->features & NETIF_F_RXFCS));
+
+ switch (efx->wanted_fc) {
+ case EFX_FC_RX | EFX_FC_TX:
+ fcntl = MC_CMD_FCNTL_BIDIR;
+ break;
+ case EFX_FC_RX:
+ fcntl = MC_CMD_FCNTL_RESPOND;
+ break;
+ default:
+ fcntl = MC_CMD_FCNTL_OFF;
+ break;
+ }
+ if (efx->wanted_fc & EFX_FC_AUTO)
+ fcntl = MC_CMD_FCNTL_AUTO;
+ if (efx->fc_disable)
+ fcntl = MC_CMD_FCNTL_OFF;
+
+ MCDI_SET_DWORD(cmdbytes, SET_MAC_IN_FCNTL, fcntl);
+
+ return efx_mcdi_rpc(efx, MC_CMD_SET_MAC, cmdbytes, sizeof(cmdbytes),
+ NULL, 0, NULL);
+}
+
+/* Get physical port number (EF10 only; on Siena it is same as PF number) */
+int efx_mcdi_port_get_number(struct efx_nic *efx)
+{
+ MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_PORT_ASSIGNMENT_OUT_LEN);
+ int rc;
+
+ rc = efx_mcdi_rpc(efx, MC_CMD_GET_PORT_ASSIGNMENT, NULL, 0,
+ outbuf, sizeof(outbuf), NULL);
+ if (rc)
+ return rc;
+
+ return MCDI_DWORD(outbuf, GET_PORT_ASSIGNMENT_OUT_PORT);
+}
+
+static unsigned int efx_mcdi_event_link_speed[] = {
+ [MCDI_EVENT_LINKCHANGE_SPEED_100M] = 100,
+ [MCDI_EVENT_LINKCHANGE_SPEED_1G] = 1000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_10G] = 10000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_40G] = 40000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_25G] = 25000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_50G] = 50000,
+ [MCDI_EVENT_LINKCHANGE_SPEED_100G] = 100000,
+};
+
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev)
+{
+ u32 flags, fcntl, speed, lpa;
+
+ speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED);
+ EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
+ speed = efx_mcdi_event_link_speed[speed];
+
+ flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS);
+ fcntl = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_FCNTL);
+ lpa = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LP_CAP);
+
+ /* efx->link_state is only modified by efx_mcdi_phy_get_link(),
+ * which is only run after flushing the event queues. Therefore, it
+ * is safe to modify the link state outside of the mac_lock here.
+ */
+ efx_mcdi_phy_decode_link(efx, &efx->link_state, speed, flags, fcntl);
+
+ efx_mcdi_phy_check_fcntl(efx, lpa);
+
+ efx_link_status_changed(efx);
+}
diff --git a/drivers/net/ethernet/sfc/mcdi_port_common.h b/drivers/net/ethernet/sfc/mcdi_port_common.h
new file mode 100644
index 000000000000..b16f11265269
--- /dev/null
+++ b/drivers/net/ethernet/sfc/mcdi_port_common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+#ifndef EFX_MCDI_PORT_COMMON_H
+#define EFX_MCDI_PORT_COMMON_H
+
+#include "net_driver.h"
+#include "mcdi.h"
+#include "mcdi_pcol.h"
+
+struct efx_mcdi_phy_data {
+ u32 flags;
+ u32 type;
+ u32 supported_cap;
+ u32 channel;
+ u32 port;
+ u32 stats_mask;
+ u8 name[20];
+ u32 media;
+ u32 mmd_mask;
+ u8 revision[20];
+ u32 forced_cap;
+};
+
+#define EFX_MC_STATS_GENERATION_INVALID ((__force __le64)(-1))
+
+int efx_mcdi_get_phy_cfg(struct efx_nic *efx, struct efx_mcdi_phy_data *cfg);
+void efx_link_set_advertising(struct efx_nic *efx,
+ const unsigned long *advertising);
+int efx_mcdi_set_link(struct efx_nic *efx, u32 capabilities,
+ u32 flags, u32 loopback_mode, u32 loopback_speed);
+int efx_mcdi_loopback_modes(struct efx_nic *efx, u64 *loopback_modes);
+void mcdi_to_ethtool_linkset(u32 media, u32 cap, unsigned long *linkset);
+u32 ethtool_linkset_to_mcdi_cap(const unsigned long *linkset);
+u32 efx_get_mcdi_phy_flags(struct efx_nic *efx);
+u8 mcdi_to_ethtool_media(u32 media);
+void efx_mcdi_phy_decode_link(struct efx_nic *efx,
+ struct efx_link_state *link_state,
+ u32 speed, u32 flags, u32 fcntl);
+u32 ethtool_fec_caps_to_mcdi(u32 ethtool_cap);
+u32 mcdi_fec_caps_to_ethtool(u32 caps, bool is_25g);
+void efx_mcdi_phy_check_fcntl(struct efx_nic *efx, u32 lpa);
+bool efx_mcdi_phy_poll(struct efx_nic *efx);
+int efx_mcdi_phy_get_fecparam(struct efx_nic *efx,
+ struct ethtool_fecparam *fec);
+int efx_mcdi_phy_test_alive(struct efx_nic *efx);
+int efx_mcdi_set_mac(struct efx_nic *efx);
+int efx_mcdi_port_get_number(struct efx_nic *efx);
+void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 1f88212be085..9f9886f222c8 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -24,7 +24,6 @@
#include <linux/mutex.h>
#include <linux/rwsem.h>
#include <linux/vmalloc.h>
-#include <linux/i2c.h>
#include <linux/mtd/mtd.h>
#include <net/busy_poll.h>
#include <net/xdp.h>
@@ -139,6 +138,8 @@ struct efx_special_buffer {
* freed when descriptor completes
* @xdpf: When @flags & %EFX_TX_BUF_XDP, the XDP frame information; its @data
* member is the associated buffer to drop a page reference on.
+ * @option: When @flags & %EFX_TX_BUF_OPTION, an EF10-specific option
+ * descriptor.
* @dma_addr: DMA address of the fragment.
* @flags: Flags for allocation and DMA mapping type
* @len: Length of this fragment.
@@ -153,7 +154,7 @@ struct efx_tx_buffer {
struct xdp_frame *xdpf;
};
union {
- efx_qword_t option;
+ efx_qword_t option; /* EF10 */
dma_addr_t dma_addr;
};
unsigned short flags;
@@ -743,13 +744,13 @@ union efx_multicast_hash {
struct vfdi_status;
/* The reserved RSS context value */
-#define EFX_EF10_RSS_CONTEXT_INVALID 0xffffffff
+#define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff
/**
* struct efx_rss_context - A user-defined RSS context for filtering
* @list: node of linked list on which this struct is stored
* @context_id: the RSS_CONTEXT_ID returned by MC firmware, or
- * %EFX_EF10_RSS_CONTEXT_INVALID if this context is not present on the NIC.
- * For Siena, 0 if RSS is active, else %EFX_EF10_RSS_CONTEXT_INVALID.
+ * %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC.
+ * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID.
* @user_id: the rss_context ID exposed to userspace over ethtool.
* @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
* @rx_hash_key: Toeplitz hash key for this RSS context
@@ -1533,9 +1534,7 @@ static inline bool efx_channel_is_xdp_tx(struct efx_channel *channel)
static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
{
- return efx_channel_is_xdp_tx(channel) ||
- (channel->type && channel->type->want_txqs &&
- channel->type->want_txqs(channel));
+ return true;
}
static inline struct efx_tx_queue *
@@ -1613,6 +1612,15 @@ static inline struct efx_rx_buffer *efx_rx_buffer(struct efx_rx_queue *rx_queue,
return &rx_queue->buffer[index];
}
+static inline struct efx_rx_buffer *
+efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
+{
+ if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
+ return efx_rx_buffer(rx_queue, 0);
+ else
+ return rx_buf + 1;
+}
+
/**
* EFX_MAX_FRAME_LEN - calculate maximum frame length
*
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 1f7c5717de75..6670fda8f35a 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -9,9 +9,9 @@
#define EFX_NIC_H
#include <linux/net_tstamp.h>
-#include <linux/i2c-algo-bit.h>
#include "net_driver.h"
#include "efx.h"
+#include "efx_common.h"
#include "mcdi.h"
enum {
@@ -506,6 +506,9 @@ static inline void efx_nic_push_buffers(struct efx_tx_queue *tx_queue)
tx_queue->efx->type->tx_write(tx_queue);
}
+int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ bool *data_mapped);
+
/* RX data path */
static inline int efx_nic_probe_rx(struct efx_rx_queue *rx_queue)
{
@@ -554,6 +557,7 @@ static inline void efx_nic_eventq_read_ack(struct efx_channel *channel)
{
channel->efx->type->ev_read_ack(channel);
}
+
void efx_nic_event_test_start(struct efx_channel *channel);
/* Falcon/Siena queue operations */
@@ -671,6 +675,7 @@ struct efx_farch_register_test {
unsigned address;
efx_oword_t mask;
};
+
int efx_farch_test_registers(struct efx_nic *efx,
const struct efx_farch_register_test *regs,
size_t n_regs);
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index ef52b24ad9e7..a2042f16babc 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -21,6 +21,7 @@
#include <linux/bpf_trace.h>
#include "net_driver.h"
#include "efx.h"
+#include "rx_common.h"
#include "filter.h"
#include "nic.h"
#include "selftest.h"
@@ -32,60 +33,13 @@
/* Maximum rx prefix used by any architecture. */
#define EFX_MAX_RX_PREFIX_SIZE 16
-/* Number of RX buffers to recycle pages for. When creating the RX page recycle
- * ring, this number is divided by the number of buffers per page to calculate
- * the number of pages to store in the RX page recycle ring.
- */
-#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
-#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
-
/* Size of buffer allocated for skb header area. */
#define EFX_SKB_HEADERS 128u
-/* This is the percentage fill level below which new RX descriptors
- * will be added to the RX descriptor ring.
- */
-static unsigned int rx_refill_threshold;
-
/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
EFX_RX_USR_BUF_SIZE)
-/*
- * RX maximum head room required.
- *
- * This must be at least 1 to prevent overflow, plus one packet-worth
- * to allow pipelined receives.
- */
-#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
-
-static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
-{
- return page_address(buf->page) + buf->page_offset;
-}
-
-static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
-{
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
-#else
- const u8 *data = eh + efx->rx_packet_hash_offset;
- return (u32)data[0] |
- (u32)data[1] << 8 |
- (u32)data[2] << 16 |
- (u32)data[3] << 24;
-#endif
-}
-
-static inline struct efx_rx_buffer *
-efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
-{
- if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
- return efx_rx_buffer(rx_queue, 0);
- else
- return rx_buf + 1;
-}
-
static inline void efx_sync_rx_buffer(struct efx_nic *efx,
struct efx_rx_buffer *rx_buf,
unsigned int len)
@@ -94,301 +48,6 @@ static inline void efx_sync_rx_buffer(struct efx_nic *efx,
DMA_FROM_DEVICE);
}
-void efx_rx_config_page_split(struct efx_nic *efx)
-{
- efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align,
- EFX_RX_BUF_ALIGNMENT);
- efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
- ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
- (efx->rx_page_buf_step + XDP_PACKET_HEADROOM));
- efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
- efx->rx_bufs_per_page;
- efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
- efx->rx_bufs_per_page);
-}
-
-/* Check the RX page recycle ring for a page that can be reused. */
-static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- struct page *page;
- struct efx_rx_page_state *state;
- unsigned index;
-
- index = rx_queue->page_remove & rx_queue->page_ptr_mask;
- page = rx_queue->page_ring[index];
- if (page == NULL)
- return NULL;
-
- rx_queue->page_ring[index] = NULL;
- /* page_remove cannot exceed page_add. */
- if (rx_queue->page_remove != rx_queue->page_add)
- ++rx_queue->page_remove;
-
- /* If page_count is 1 then we hold the only reference to this page. */
- if (page_count(page) == 1) {
- ++rx_queue->page_recycle_count;
- return page;
- } else {
- state = page_address(page);
- dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
- PAGE_SIZE << efx->rx_buffer_order,
- DMA_FROM_DEVICE);
- put_page(page);
- ++rx_queue->page_recycle_failed;
- }
-
- return NULL;
-}
-
-/**
- * efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
- *
- * @rx_queue: Efx RX queue
- *
- * This allocates a batch of pages, maps them for DMA, and populates
- * struct efx_rx_buffers for each one. Return a negative error code or
- * 0 on success. If a single page can be used for multiple buffers,
- * then the page will either be inserted fully, or not at all.
- */
-static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
-{
- struct efx_nic *efx = rx_queue->efx;
- struct efx_rx_buffer *rx_buf;
- struct page *page;
- unsigned int page_offset;
- struct efx_rx_page_state *state;
- dma_addr_t dma_addr;
- unsigned index, count;
-
- count = 0;
- do {
- page = efx_reuse_page(rx_queue);
- if (page == NULL) {
- page = alloc_pages(__GFP_COMP |
- (atomic ? GFP_ATOMIC : GFP_KERNEL),
- efx->rx_buffer_order);
- if (unlikely(page == NULL))
- return -ENOMEM;
- dma_addr =
- dma_map_page(&efx->pci_dev->dev, page, 0,
- PAGE_SIZE << efx->rx_buffer_order,
- DMA_FROM_DEVICE);
- if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
- dma_addr))) {
- __free_pages(page, efx->rx_buffer_order);
- return -EIO;
- }
- state = page_address(page);
- state->dma_addr = dma_addr;
- } else {
- state = page_address(page);
- dma_addr = state->dma_addr;
- }
-
- dma_addr += sizeof(struct efx_rx_page_state);
- page_offset = sizeof(struct efx_rx_page_state);
-
- do {
- page_offset += XDP_PACKET_HEADROOM;
- dma_addr += XDP_PACKET_HEADROOM;
-
- index = rx_queue->added_count & rx_queue->ptr_mask;
- rx_buf = efx_rx_buffer(rx_queue, index);
- rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
- rx_buf->page = page;
- rx_buf->page_offset = page_offset + efx->rx_ip_align;
- rx_buf->len = efx->rx_dma_len;
- rx_buf->flags = 0;
- ++rx_queue->added_count;
- get_page(page);
- dma_addr += efx->rx_page_buf_step;
- page_offset += efx->rx_page_buf_step;
- } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
-
- rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
- } while (++count < efx->rx_pages_per_batch);
-
- return 0;
-}
-
-/* Unmap a DMA-mapped page. This function is only called for the final RX
- * buffer in a page.
- */
-static void efx_unmap_rx_buffer(struct efx_nic *efx,
- struct efx_rx_buffer *rx_buf)
-{
- struct page *page = rx_buf->page;
-
- if (page) {
- struct efx_rx_page_state *state = page_address(page);
- dma_unmap_page(&efx->pci_dev->dev,
- state->dma_addr,
- PAGE_SIZE << efx->rx_buffer_order,
- DMA_FROM_DEVICE);
- }
-}
-
-static void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
- struct efx_rx_buffer *rx_buf,
- unsigned int num_bufs)
-{
- do {
- if (rx_buf->page) {
- put_page(rx_buf->page);
- rx_buf->page = NULL;
- }
- rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
- } while (--num_bufs);
-}
-
-/* Attempt to recycle the page if there is an RX recycle ring; the page can
- * only be added if this is the final RX buffer, to prevent pages being used in
- * the descriptor ring and appearing in the recycle ring simultaneously.
- */
-static void efx_recycle_rx_page(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf)
-{
- struct page *page = rx_buf->page;
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
- struct efx_nic *efx = rx_queue->efx;
- unsigned index;
-
- /* Only recycle the page after processing the final buffer. */
- if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
- return;
-
- index = rx_queue->page_add & rx_queue->page_ptr_mask;
- if (rx_queue->page_ring[index] == NULL) {
- unsigned read_index = rx_queue->page_remove &
- rx_queue->page_ptr_mask;
-
- /* The next slot in the recycle ring is available, but
- * increment page_remove if the read pointer currently
- * points here.
- */
- if (read_index == index)
- ++rx_queue->page_remove;
- rx_queue->page_ring[index] = page;
- ++rx_queue->page_add;
- return;
- }
- ++rx_queue->page_recycle_full;
- efx_unmap_rx_buffer(efx, rx_buf);
- put_page(rx_buf->page);
-}
-
-static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
- struct efx_rx_buffer *rx_buf)
-{
- /* Release the page reference we hold for the buffer. */
- if (rx_buf->page)
- put_page(rx_buf->page);
-
- /* If this is the last buffer in a page, unmap and free it. */
- if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
- efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
- efx_free_rx_buffers(rx_queue, rx_buf, 1);
- }
- rx_buf->page = NULL;
-}
-
-/* Recycle the pages that are used by buffers that have just been received. */
-static void efx_recycle_rx_pages(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf,
- unsigned int n_frags)
-{
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
-
- do {
- efx_recycle_rx_page(channel, rx_buf);
- rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
- } while (--n_frags);
-}
-
-static void efx_discard_rx_packet(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf,
- unsigned int n_frags)
-{
- struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
-
- efx_recycle_rx_pages(channel, rx_buf, n_frags);
-
- efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
-}
-
-/**
- * efx_fast_push_rx_descriptors - push new RX descriptors quickly
- * @rx_queue: RX descriptor queue
- *
- * This will aim to fill the RX descriptor queue up to
- * @rx_queue->@max_fill. If there is insufficient atomic
- * memory to do so, a slow fill will be scheduled.
- *
- * The caller must provide serialisation (none is used here). In practise,
- * this means this function must run from the NAPI handler, or be called
- * when NAPI is disabled.
- */
-void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
-{
- struct efx_nic *efx = rx_queue->efx;
- unsigned int fill_level, batch_size;
- int space, rc = 0;
-
- if (!rx_queue->refill_enabled)
- return;
-
- /* Calculate current fill level, and exit if we don't need to fill */
- fill_level = (rx_queue->added_count - rx_queue->removed_count);
- EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
- if (fill_level >= rx_queue->fast_fill_trigger)
- goto out;
-
- /* Record minimum fill level */
- if (unlikely(fill_level < rx_queue->min_fill)) {
- if (fill_level)
- rx_queue->min_fill = fill_level;
- }
-
- batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
- space = rx_queue->max_fill - fill_level;
- EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
-
- netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
- "RX queue %d fast-filling descriptor ring from"
- " level %d to level %d\n",
- efx_rx_queue_index(rx_queue), fill_level,
- rx_queue->max_fill);
-
-
- do {
- rc = efx_init_rx_buffers(rx_queue, atomic);
- if (unlikely(rc)) {
- /* Ensure that we don't leave the rx queue empty */
- efx_schedule_slow_fill(rx_queue);
- goto out;
- }
- } while ((space -= batch_size) >= batch_size);
-
- netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
- "RX queue %d fast-filled descriptor ring "
- "to level %d\n", efx_rx_queue_index(rx_queue),
- rx_queue->added_count - rx_queue->removed_count);
-
- out:
- if (rx_queue->notified_count != rx_queue->added_count)
- efx_nic_notify_rx_desc(rx_queue);
-}
-
-void efx_rx_slow_fill(struct timer_list *t)
-{
- struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
-
- /* Post an event to cause NAPI to run and refill the queue */
- efx_nic_generate_fill_event(rx_queue);
- ++rx_queue->slow_fill_count;
-}
-
static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
struct efx_rx_buffer *rx_buf,
int len)
@@ -412,53 +71,6 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
}
-/* Pass a received packet up through GRO. GRO can handle pages
- * regardless of checksum state and skbs with a good checksum.
- */
-static void
-efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
- unsigned int n_frags, u8 *eh)
-{
- struct napi_struct *napi = &channel->napi_str;
- struct efx_nic *efx = channel->efx;
- struct sk_buff *skb;
-
- skb = napi_get_frags(napi);
- if (unlikely(!skb)) {
- struct efx_rx_queue *rx_queue;
-
- rx_queue = efx_channel_get_rx_queue(channel);
- efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
- return;
- }
-
- if (efx->net_dev->features & NETIF_F_RXHASH)
- skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
- PKT_HASH_TYPE_L3);
- skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
- CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
- skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
-
- for (;;) {
- skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
- rx_buf->page, rx_buf->page_offset,
- rx_buf->len);
- rx_buf->page = NULL;
- skb->len += rx_buf->len;
- if (skb_shinfo(skb)->nr_frags == n_frags)
- break;
-
- rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
- }
-
- skb->data_len = skb->len;
- skb->truesize += n_frags * efx->rx_buffer_truesize;
-
- skb_record_rx_queue(skb, channel->rx_queue.core_index);
-
- napi_gro_frags(napi);
-}
-
/* Allocate and construct an SKB around page fragments */
static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
struct efx_rx_buffer *rx_buf,
@@ -805,174 +417,6 @@ out:
channel->rx_pkt_n_frags = 0;
}
-int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- unsigned int entries;
- int rc;
-
- /* Create the smallest power-of-two aligned ring */
- entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
- EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
- rx_queue->ptr_mask = entries - 1;
-
- netif_dbg(efx, probe, efx->net_dev,
- "creating RX queue %d size %#x mask %#x\n",
- efx_rx_queue_index(rx_queue), efx->rxq_entries,
- rx_queue->ptr_mask);
-
- /* Allocate RX buffers */
- rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
- GFP_KERNEL);
- if (!rx_queue->buffer)
- return -ENOMEM;
-
- rc = efx_nic_probe_rx(rx_queue);
- if (rc) {
- kfree(rx_queue->buffer);
- rx_queue->buffer = NULL;
- }
-
- return rc;
-}
-
-static void efx_init_rx_recycle_ring(struct efx_nic *efx,
- struct efx_rx_queue *rx_queue)
-{
- unsigned int bufs_in_recycle_ring, page_ring_size;
-
- /* Set the RX recycle ring size */
-#ifdef CONFIG_PPC64
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
-#else
- if (iommu_present(&pci_bus_type))
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
- else
- bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
-#endif /* CONFIG_PPC64 */
-
- page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
- efx->rx_bufs_per_page);
- rx_queue->page_ring = kcalloc(page_ring_size,
- sizeof(*rx_queue->page_ring), GFP_KERNEL);
- rx_queue->page_ptr_mask = page_ring_size - 1;
-}
-
-void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
-{
- struct efx_nic *efx = rx_queue->efx;
- unsigned int max_fill, trigger, max_trigger;
- int rc = 0;
-
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
- "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
-
- /* Initialise ptr fields */
- rx_queue->added_count = 0;
- rx_queue->notified_count = 0;
- rx_queue->removed_count = 0;
- rx_queue->min_fill = -1U;
- efx_init_rx_recycle_ring(efx, rx_queue);
-
- rx_queue->page_remove = 0;
- rx_queue->page_add = rx_queue->page_ptr_mask + 1;
- rx_queue->page_recycle_count = 0;
- rx_queue->page_recycle_failed = 0;
- rx_queue->page_recycle_full = 0;
-
- /* Initialise limit fields */
- max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
- max_trigger =
- max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
- if (rx_refill_threshold != 0) {
- trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
- if (trigger > max_trigger)
- trigger = max_trigger;
- } else {
- trigger = max_trigger;
- }
-
- rx_queue->max_fill = max_fill;
- rx_queue->fast_fill_trigger = trigger;
- rx_queue->refill_enabled = true;
-
- /* Initialise XDP queue information */
- rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
- rx_queue->core_index);
-
- if (rc) {
- netif_err(efx, rx_err, efx->net_dev,
- "Failure to initialise XDP queue information rc=%d\n",
- rc);
- efx->xdp_rxq_info_failed = true;
- } else {
- rx_queue->xdp_rxq_info_valid = true;
- }
-
- /* Set up RX descriptor ring */
- efx_nic_init_rx(rx_queue);
-}
-
-void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
-{
- int i;
- struct efx_nic *efx = rx_queue->efx;
- struct efx_rx_buffer *rx_buf;
-
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
- "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
-
- del_timer_sync(&rx_queue->slow_fill);
-
- /* Release RX buffers from the current read ptr to the write ptr */
- if (rx_queue->buffer) {
- for (i = rx_queue->removed_count; i < rx_queue->added_count;
- i++) {
- unsigned index = i & rx_queue->ptr_mask;
- rx_buf = efx_rx_buffer(rx_queue, index);
- efx_fini_rx_buffer(rx_queue, rx_buf);
- }
- }
-
- /* Unmap and release the pages in the recycle ring. Remove the ring. */
- for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
- struct page *page = rx_queue->page_ring[i];
- struct efx_rx_page_state *state;
-
- if (page == NULL)
- continue;
-
- state = page_address(page);
- dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
- PAGE_SIZE << efx->rx_buffer_order,
- DMA_FROM_DEVICE);
- put_page(page);
- }
- kfree(rx_queue->page_ring);
- rx_queue->page_ring = NULL;
-
- if (rx_queue->xdp_rxq_info_valid)
- xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
-
- rx_queue->xdp_rxq_info_valid = false;
-}
-
-void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
-{
- netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
- "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
-
- efx_nic_remove_rx(rx_queue);
-
- kfree(rx_queue->buffer);
- rx_queue->buffer = NULL;
-}
-
-
-module_param(rx_refill_threshold, uint, 0444);
-MODULE_PARM_DESC(rx_refill_threshold,
- "RX descriptor ring refill threshold (%)");
-
#ifdef CONFIG_RFS_ACCEL
static void efx_filter_rfs_work(struct work_struct *data)
@@ -1206,37 +650,3 @@ bool __efx_filter_rfs_expire(struct efx_channel *channel, unsigned int quota)
}
#endif /* CONFIG_RFS_ACCEL */
-
-/**
- * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
- * @spec: Specification to test
- *
- * Return: %true if the specification is a non-drop RX filter that
- * matches a local MAC address I/G bit value of 1 or matches a local
- * IPv4 or IPv6 address value in the respective multicast address
- * range. Otherwise %false.
- */
-bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
-{
- if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
- spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
- return false;
-
- if (spec->match_flags &
- (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
- is_multicast_ether_addr(spec->loc_mac))
- return true;
-
- if ((spec->match_flags &
- (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
- (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
- if (spec->ether_type == htons(ETH_P_IP) &&
- ipv4_is_multicast(spec->loc_host[0]))
- return true;
- if (spec->ether_type == htons(ETH_P_IPV6) &&
- ((const u8 *)spec->loc_host)[0] == 0xff)
- return true;
- }
-
- return false;
-}
diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c
new file mode 100644
index 000000000000..ee8beb87bdc1
--- /dev/null
+++ b/drivers/net/ethernet/sfc/rx_common.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include <linux/module.h>
+#include <linux/iommu.h>
+#include "efx.h"
+#include "nic.h"
+#include "rx_common.h"
+
+/* This is the percentage fill level below which new RX descriptors
+ * will be added to the RX descriptor ring.
+ */
+static unsigned int rx_refill_threshold;
+module_param(rx_refill_threshold, uint, 0444);
+MODULE_PARM_DESC(rx_refill_threshold,
+ "RX descriptor ring refill threshold (%)");
+
+/* Number of RX buffers to recycle pages for. When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EFX_RECYCLE_RING_SIZE_IOMMU 4096
+#define EFX_RECYCLE_RING_SIZE_NOIOMMU (2 * EFX_RX_PREFERRED_BATCH)
+
+/* RX maximum head room required.
+ *
+ * This must be at least 1 to prevent overflow, plus one packet-worth
+ * to allow pipelined receives.
+ */
+#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
+
+/* Check the RX page recycle ring for a page that can be reused. */
+static struct page *efx_reuse_page(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_rx_page_state *state;
+ unsigned int index;
+ struct page *page;
+
+ index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+ page = rx_queue->page_ring[index];
+ if (page == NULL)
+ return NULL;
+
+ rx_queue->page_ring[index] = NULL;
+ /* page_remove cannot exceed page_add. */
+ if (rx_queue->page_remove != rx_queue->page_add)
+ ++rx_queue->page_remove;
+
+ /* If page_count is 1 then we hold the only reference to this page. */
+ if (page_count(page) == 1) {
+ ++rx_queue->page_recycle_count;
+ return page;
+ } else {
+ state = page_address(page);
+ dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ put_page(page);
+ ++rx_queue->page_recycle_failed;
+ }
+
+ return NULL;
+}
+
+/* Attempt to recycle the page if there is an RX recycle ring; the page can
+ * only be added if this is the final RX buffer, to prevent pages being used in
+ * the descriptor ring and appearing in the recycle ring simultaneously.
+ */
+static void efx_recycle_rx_page(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+ struct efx_nic *efx = rx_queue->efx;
+ struct page *page = rx_buf->page;
+ unsigned int index;
+
+ /* Only recycle the page after processing the final buffer. */
+ if (!(rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE))
+ return;
+
+ index = rx_queue->page_add & rx_queue->page_ptr_mask;
+ if (rx_queue->page_ring[index] == NULL) {
+ unsigned int read_index = rx_queue->page_remove &
+ rx_queue->page_ptr_mask;
+
+ /* The next slot in the recycle ring is available, but
+ * increment page_remove if the read pointer currently
+ * points here.
+ */
+ if (read_index == index)
+ ++rx_queue->page_remove;
+ rx_queue->page_ring[index] = page;
+ ++rx_queue->page_add;
+ return;
+ }
+ ++rx_queue->page_recycle_full;
+ efx_unmap_rx_buffer(efx, rx_buf);
+ put_page(rx_buf->page);
+}
+
+/* Recycle the pages that are used by buffers that have just been received. */
+void efx_recycle_rx_pages(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+ do {
+ efx_recycle_rx_page(channel, rx_buf);
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ } while (--n_frags);
+}
+
+void efx_discard_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
+{
+ struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
+
+ efx_recycle_rx_pages(channel, rx_buf, n_frags);
+
+ efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+}
+
+static void efx_init_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+{
+ unsigned int bufs_in_recycle_ring, page_ring_size;
+ struct efx_nic *efx = rx_queue->efx;
+
+ /* Set the RX recycle ring size */
+#ifdef CONFIG_PPC64
+ bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
+#else
+ if (iommu_present(&pci_bus_type))
+ bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_IOMMU;
+ else
+ bufs_in_recycle_ring = EFX_RECYCLE_RING_SIZE_NOIOMMU;
+#endif /* CONFIG_PPC64 */
+
+ page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
+ efx->rx_bufs_per_page);
+ rx_queue->page_ring = kcalloc(page_ring_size,
+ sizeof(*rx_queue->page_ring), GFP_KERNEL);
+ rx_queue->page_ptr_mask = page_ring_size - 1;
+}
+
+static void efx_fini_rx_recycle_ring(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ int i;
+
+ /* Unmap and release the pages in the recycle ring. Remove the ring. */
+ for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
+ struct page *page = rx_queue->page_ring[i];
+ struct efx_rx_page_state *state;
+
+ if (page == NULL)
+ continue;
+
+ state = page_address(page);
+ dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ put_page(page);
+ }
+ kfree(rx_queue->page_ring);
+ rx_queue->page_ring = NULL;
+}
+
+static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf)
+{
+ /* Release the page reference we hold for the buffer. */
+ if (rx_buf->page)
+ put_page(rx_buf->page);
+
+ /* If this is the last buffer in a page, unmap and free it. */
+ if (rx_buf->flags & EFX_RX_BUF_LAST_IN_PAGE) {
+ efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
+ efx_free_rx_buffers(rx_queue, rx_buf, 1);
+ }
+ rx_buf->page = NULL;
+}
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned int entries;
+ int rc;
+
+ /* Create the smallest power-of-two aligned ring */
+ entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
+ EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+ rx_queue->ptr_mask = entries - 1;
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "creating RX queue %d size %#x mask %#x\n",
+ efx_rx_queue_index(rx_queue), efx->rxq_entries,
+ rx_queue->ptr_mask);
+
+ /* Allocate RX buffers */
+ rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
+ GFP_KERNEL);
+ if (!rx_queue->buffer)
+ return -ENOMEM;
+
+ rc = efx_nic_probe_rx(rx_queue);
+ if (rc) {
+ kfree(rx_queue->buffer);
+ rx_queue->buffer = NULL;
+ }
+
+ return rc;
+}
+
+void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ unsigned int max_fill, trigger, max_trigger;
+ struct efx_nic *efx = rx_queue->efx;
+ int rc = 0;
+
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "initialising RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ /* Initialise ptr fields */
+ rx_queue->added_count = 0;
+ rx_queue->notified_count = 0;
+ rx_queue->removed_count = 0;
+ rx_queue->min_fill = -1U;
+ efx_init_rx_recycle_ring(rx_queue);
+
+ rx_queue->page_remove = 0;
+ rx_queue->page_add = rx_queue->page_ptr_mask + 1;
+ rx_queue->page_recycle_count = 0;
+ rx_queue->page_recycle_failed = 0;
+ rx_queue->page_recycle_full = 0;
+
+ /* Initialise limit fields */
+ max_fill = efx->rxq_entries - EFX_RXD_HEAD_ROOM;
+ max_trigger =
+ max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+ if (rx_refill_threshold != 0) {
+ trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
+ if (trigger > max_trigger)
+ trigger = max_trigger;
+ } else {
+ trigger = max_trigger;
+ }
+
+ rx_queue->max_fill = max_fill;
+ rx_queue->fast_fill_trigger = trigger;
+ rx_queue->refill_enabled = true;
+
+ /* Initialise XDP queue information */
+ rc = xdp_rxq_info_reg(&rx_queue->xdp_rxq_info, efx->net_dev,
+ rx_queue->core_index);
+
+ if (rc) {
+ netif_err(efx, rx_err, efx->net_dev,
+ "Failure to initialise XDP queue information rc=%d\n",
+ rc);
+ efx->xdp_rxq_info_failed = true;
+ } else {
+ rx_queue->xdp_rxq_info_valid = true;
+ }
+
+ /* Set up RX descriptor ring */
+ efx_nic_init_rx(rx_queue);
+}
+
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ struct efx_rx_buffer *rx_buf;
+ int i;
+
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "shutting down RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ del_timer_sync(&rx_queue->slow_fill);
+
+ /* Release RX buffers from the current read ptr to the write ptr */
+ if (rx_queue->buffer) {
+ for (i = rx_queue->removed_count; i < rx_queue->added_count;
+ i++) {
+ unsigned int index = i & rx_queue->ptr_mask;
+
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ efx_fini_rx_buffer(rx_queue, rx_buf);
+ }
+ }
+
+ efx_fini_rx_recycle_ring(rx_queue);
+
+ if (rx_queue->xdp_rxq_info_valid)
+ xdp_rxq_info_unreg(&rx_queue->xdp_rxq_info);
+
+ rx_queue->xdp_rxq_info_valid = false;
+}
+
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
+{
+ netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+ "destroying RX queue %d\n", efx_rx_queue_index(rx_queue));
+
+ efx_nic_remove_rx(rx_queue);
+
+ kfree(rx_queue->buffer);
+ rx_queue->buffer = NULL;
+}
+
+/* Unmap a DMA-mapped page. This function is only called for the final RX
+ * buffer in a page.
+ */
+void efx_unmap_rx_buffer(struct efx_nic *efx,
+ struct efx_rx_buffer *rx_buf)
+{
+ struct page *page = rx_buf->page;
+
+ if (page) {
+ struct efx_rx_page_state *state = page_address(page);
+
+ dma_unmap_page(&efx->pci_dev->dev,
+ state->dma_addr,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ }
+}
+
+void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int num_bufs)
+{
+ do {
+ if (rx_buf->page) {
+ put_page(rx_buf->page);
+ rx_buf->page = NULL;
+ }
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ } while (--num_bufs);
+}
+
+void efx_rx_slow_fill(struct timer_list *t)
+{
+ struct efx_rx_queue *rx_queue = from_timer(rx_queue, t, slow_fill);
+
+ /* Post an event to cause NAPI to run and refill the queue */
+ efx_nic_generate_fill_event(rx_queue);
+ ++rx_queue->slow_fill_count;
+}
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue)
+{
+ mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(10));
+}
+
+/* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
+ *
+ * @rx_queue: Efx RX queue
+ *
+ * This allocates a batch of pages, maps them for DMA, and populates
+ * struct efx_rx_buffers for each one. Return a negative error code or
+ * 0 on success. If a single page can be used for multiple buffers,
+ * then the page will either be inserted fully, or not at all.
+ */
+static int efx_init_rx_buffers(struct efx_rx_queue *rx_queue, bool atomic)
+{
+ unsigned int page_offset, index, count;
+ struct efx_nic *efx = rx_queue->efx;
+ struct efx_rx_page_state *state;
+ struct efx_rx_buffer *rx_buf;
+ dma_addr_t dma_addr;
+ struct page *page;
+
+ count = 0;
+ do {
+ page = efx_reuse_page(rx_queue);
+ if (page == NULL) {
+ page = alloc_pages(__GFP_COMP |
+ (atomic ? GFP_ATOMIC : GFP_KERNEL),
+ efx->rx_buffer_order);
+ if (unlikely(page == NULL))
+ return -ENOMEM;
+ dma_addr =
+ dma_map_page(&efx->pci_dev->dev, page, 0,
+ PAGE_SIZE << efx->rx_buffer_order,
+ DMA_FROM_DEVICE);
+ if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
+ dma_addr))) {
+ __free_pages(page, efx->rx_buffer_order);
+ return -EIO;
+ }
+ state = page_address(page);
+ state->dma_addr = dma_addr;
+ } else {
+ state = page_address(page);
+ dma_addr = state->dma_addr;
+ }
+
+ dma_addr += sizeof(struct efx_rx_page_state);
+ page_offset = sizeof(struct efx_rx_page_state);
+
+ do {
+ index = rx_queue->added_count & rx_queue->ptr_mask;
+ rx_buf = efx_rx_buffer(rx_queue, index);
+ rx_buf->dma_addr = dma_addr + efx->rx_ip_align +
+ XDP_PACKET_HEADROOM;
+ rx_buf->page = page;
+ rx_buf->page_offset = page_offset + efx->rx_ip_align +
+ XDP_PACKET_HEADROOM;
+ rx_buf->len = efx->rx_dma_len;
+ rx_buf->flags = 0;
+ ++rx_queue->added_count;
+ get_page(page);
+ dma_addr += efx->rx_page_buf_step;
+ page_offset += efx->rx_page_buf_step;
+ } while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
+
+ rx_buf->flags = EFX_RX_BUF_LAST_IN_PAGE;
+ } while (++count < efx->rx_pages_per_batch);
+
+ return 0;
+}
+
+void efx_rx_config_page_split(struct efx_nic *efx)
+{
+ efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align +
+ XDP_PACKET_HEADROOM,
+ EFX_RX_BUF_ALIGNMENT);
+ efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
+ ((PAGE_SIZE - sizeof(struct efx_rx_page_state)) /
+ efx->rx_page_buf_step);
+ efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
+ efx->rx_bufs_per_page;
+ efx->rx_pages_per_batch = DIV_ROUND_UP(EFX_RX_PREFERRED_BATCH,
+ efx->rx_bufs_per_page);
+}
+
+/* efx_fast_push_rx_descriptors - push new RX descriptors quickly
+ * @rx_queue: RX descriptor queue
+ *
+ * This will aim to fill the RX descriptor queue up to
+ * @rx_queue->@max_fill. If there is insufficient atomic
+ * memory to do so, a slow fill will be scheduled.
+ *
+ * The caller must provide serialisation (none is used here). In practise,
+ * this means this function must run from the NAPI handler, or be called
+ * when NAPI is disabled.
+ */
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
+{
+ struct efx_nic *efx = rx_queue->efx;
+ unsigned int fill_level, batch_size;
+ int space, rc = 0;
+
+ if (!rx_queue->refill_enabled)
+ return;
+
+ /* Calculate current fill level, and exit if we don't need to fill */
+ fill_level = (rx_queue->added_count - rx_queue->removed_count);
+ EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
+ if (fill_level >= rx_queue->fast_fill_trigger)
+ goto out;
+
+ /* Record minimum fill level */
+ if (unlikely(fill_level < rx_queue->min_fill)) {
+ if (fill_level)
+ rx_queue->min_fill = fill_level;
+ }
+
+ batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+ space = rx_queue->max_fill - fill_level;
+ EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
+
+ netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+ "RX queue %d fast-filling descriptor ring from"
+ " level %d to level %d\n",
+ efx_rx_queue_index(rx_queue), fill_level,
+ rx_queue->max_fill);
+
+ do {
+ rc = efx_init_rx_buffers(rx_queue, atomic);
+ if (unlikely(rc)) {
+ /* Ensure that we don't leave the rx queue empty */
+ efx_schedule_slow_fill(rx_queue);
+ goto out;
+ }
+ } while ((space -= batch_size) >= batch_size);
+
+ netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+ "RX queue %d fast-filled descriptor ring "
+ "to level %d\n", efx_rx_queue_index(rx_queue),
+ rx_queue->added_count - rx_queue->removed_count);
+
+ out:
+ if (rx_queue->notified_count != rx_queue->added_count)
+ efx_nic_notify_rx_desc(rx_queue);
+}
+
+/* Pass a received packet up through GRO. GRO can handle pages
+ * regardless of checksum state and skbs with a good checksum.
+ */
+void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags, u8 *eh)
+{
+ struct napi_struct *napi = &channel->napi_str;
+ struct efx_nic *efx = channel->efx;
+ struct sk_buff *skb;
+
+ skb = napi_get_frags(napi);
+ if (unlikely(!skb)) {
+ struct efx_rx_queue *rx_queue;
+
+ rx_queue = efx_channel_get_rx_queue(channel);
+ efx_free_rx_buffers(rx_queue, rx_buf, n_frags);
+ return;
+ }
+
+ if (efx->net_dev->features & NETIF_F_RXHASH)
+ skb_set_hash(skb, efx_rx_buf_hash(efx, eh),
+ PKT_HASH_TYPE_L3);
+ skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
+ CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
+ skb->csum_level = !!(rx_buf->flags & EFX_RX_PKT_CSUM_LEVEL);
+
+ for (;;) {
+ skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+ rx_buf->page, rx_buf->page_offset,
+ rx_buf->len);
+ rx_buf->page = NULL;
+ skb->len += rx_buf->len;
+ if (skb_shinfo(skb)->nr_frags == n_frags)
+ break;
+
+ rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+ }
+
+ skb->data_len = skb->len;
+ skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+ skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+ napi_gro_frags(napi);
+}
+
+/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because
+ * (a) this is an infrequent control-plane operation and (b) n is small (max 64)
+ */
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx)
+{
+ struct list_head *head = &efx->rss_context.list;
+ struct efx_rss_context *ctx, *new;
+ u32 id = 1; /* Don't use zero, that refers to the master RSS context */
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ /* Search for first gap in the numbering */
+ list_for_each_entry(ctx, head, list) {
+ if (ctx->user_id != id)
+ break;
+ id++;
+ /* Check for wrap. If this happens, we have nearly 2^32
+ * allocated RSS contexts, which seems unlikely.
+ */
+ if (WARN_ON_ONCE(!id))
+ return NULL;
+ }
+
+ /* Create the new entry */
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ return NULL;
+ new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
+ new->rx_hash_udp_4tuple = false;
+
+ /* Insert the new entry into the gap */
+ new->user_id = id;
+ list_add_tail(&new->list, &ctx->list);
+ return new;
+}
+
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id)
+{
+ struct list_head *head = &efx->rss_context.list;
+ struct efx_rss_context *ctx;
+
+ WARN_ON(!mutex_is_locked(&efx->rss_lock));
+
+ list_for_each_entry(ctx, head, list)
+ if (ctx->user_id == id)
+ return ctx;
+ return NULL;
+}
+
+void efx_free_rss_context_entry(struct efx_rss_context *ctx)
+{
+ list_del(&ctx->list);
+ kfree(ctx);
+}
+
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+ struct efx_rss_context *ctx)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++)
+ ctx->rx_indir_table[i] =
+ ethtool_rxfh_indir_default(i, efx->rss_spread);
+}
+
+/**
+ * efx_filter_is_mc_recipient - test whether spec is a multicast recipient
+ * @spec: Specification to test
+ *
+ * Return: %true if the specification is a non-drop RX filter that
+ * matches a local MAC address I/G bit value of 1 or matches a local
+ * IPv4 or IPv6 address value in the respective multicast address
+ * range. Otherwise %false.
+ */
+bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec)
+{
+ if (!(spec->flags & EFX_FILTER_FLAG_RX) ||
+ spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP)
+ return false;
+
+ if (spec->match_flags &
+ (EFX_FILTER_MATCH_LOC_MAC | EFX_FILTER_MATCH_LOC_MAC_IG) &&
+ is_multicast_ether_addr(spec->loc_mac))
+ return true;
+
+ if ((spec->match_flags &
+ (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) ==
+ (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_LOC_HOST)) {
+ if (spec->ether_type == htons(ETH_P_IP) &&
+ ipv4_is_multicast(spec->loc_host[0]))
+ return true;
+ if (spec->ether_type == htons(ETH_P_IPV6) &&
+ ((const u8 *)spec->loc_host)[0] == 0xff)
+ return true;
+ }
+
+ return false;
+}
+
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+ const struct efx_filter_spec *right)
+{
+ if ((left->match_flags ^ right->match_flags) |
+ ((left->flags ^ right->flags) &
+ (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)))
+ return false;
+
+ return memcmp(&left->outer_vid, &right->outer_vid,
+ sizeof(struct efx_filter_spec) -
+ offsetof(struct efx_filter_spec, outer_vid)) == 0;
+}
+
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec)
+{
+ BUILD_BUG_ON(offsetof(struct efx_filter_spec, outer_vid) & 3);
+ return jhash2((const u32 *)&spec->outer_vid,
+ (sizeof(struct efx_filter_spec) -
+ offsetof(struct efx_filter_spec, outer_vid)) / 4,
+ 0);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+ bool *force)
+{
+ if (rule->filter_id == EFX_ARFS_FILTER_ID_PENDING) {
+ /* ARFS is currently updating this entry, leave it */
+ return false;
+ }
+ if (rule->filter_id == EFX_ARFS_FILTER_ID_ERROR) {
+ /* ARFS tried and failed to update this, so it's probably out
+ * of date. Remove the filter and the ARFS rule entry.
+ */
+ rule->filter_id = EFX_ARFS_FILTER_ID_REMOVING;
+ *force = true;
+ return true;
+ } else if (WARN_ON(rule->filter_id != filter_idx)) { /* can't happen */
+ /* ARFS has moved on, so old filter is not needed. Since we did
+ * not mark the rule with EFX_ARFS_FILTER_ID_REMOVING, it will
+ * not be removed by efx_rps_hash_del() subsequently.
+ */
+ *force = true;
+ return true;
+ }
+ /* Remove it iff ARFS wants to. */
+ return true;
+}
+
+static
+struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx,
+ const struct efx_filter_spec *spec)
+{
+ u32 hash = efx_filter_spec_hash(spec);
+
+ lockdep_assert_held(&efx->rps_hash_lock);
+ if (!efx->rps_hash_table)
+ return NULL;
+ return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE];
+}
+
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+ const struct efx_filter_spec *spec)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (!head)
+ return NULL;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec))
+ return rule;
+ }
+ return NULL;
+}
+
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+ const struct efx_filter_spec *spec,
+ bool *new)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (!head)
+ return NULL;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec)) {
+ *new = false;
+ return rule;
+ }
+ }
+ rule = kmalloc(sizeof(*rule), GFP_ATOMIC);
+ *new = true;
+ if (rule) {
+ memcpy(&rule->spec, spec, sizeof(rule->spec));
+ hlist_add_head(&rule->node, head);
+ }
+ return rule;
+}
+
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec)
+{
+ struct efx_arfs_rule *rule;
+ struct hlist_head *head;
+ struct hlist_node *node;
+
+ head = efx_rps_hash_bucket(efx, spec);
+ if (WARN_ON(!head))
+ return;
+ hlist_for_each(node, head) {
+ rule = container_of(node, struct efx_arfs_rule, node);
+ if (efx_filter_spec_equal(spec, &rule->spec)) {
+ /* Someone already reused the entry. We know that if
+ * this check doesn't fire (i.e. filter_id == REMOVING)
+ * then the REMOVING mark was put there by our caller,
+ * because caller is holding a lock on filter table and
+ * only holders of that lock set REMOVING.
+ */
+ if (rule->filter_id != EFX_ARFS_FILTER_ID_REMOVING)
+ return;
+ hlist_del(node);
+ kfree(rule);
+ return;
+ }
+ }
+ /* We didn't find it. */
+ WARN_ON(1);
+}
+#endif
+
+int efx_probe_filters(struct efx_nic *efx)
+{
+ int rc;
+
+ init_rwsem(&efx->filter_sem);
+ mutex_lock(&efx->mac_lock);
+ down_write(&efx->filter_sem);
+ rc = efx->type->filter_table_probe(efx);
+ if (rc)
+ goto out_unlock;
+
+#ifdef CONFIG_RFS_ACCEL
+ if (efx->type->offload_features & NETIF_F_NTUPLE) {
+ struct efx_channel *channel;
+ int i, success = 1;
+
+ efx_for_each_channel(channel, efx) {
+ channel->rps_flow_id =
+ kcalloc(efx->type->max_rx_ip_filters,
+ sizeof(*channel->rps_flow_id),
+ GFP_KERNEL);
+ if (!channel->rps_flow_id)
+ success = 0;
+ else
+ for (i = 0;
+ i < efx->type->max_rx_ip_filters;
+ ++i)
+ channel->rps_flow_id[i] =
+ RPS_FLOW_ID_INVALID;
+ channel->rfs_expire_index = 0;
+ channel->rfs_filter_count = 0;
+ }
+
+ if (!success) {
+ efx_for_each_channel(channel, efx)
+ kfree(channel->rps_flow_id);
+ efx->type->filter_table_remove(efx);
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+ }
+#endif
+out_unlock:
+ up_write(&efx->filter_sem);
+ mutex_unlock(&efx->mac_lock);
+ return rc;
+}
+
+void efx_remove_filters(struct efx_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+ struct efx_channel *channel;
+
+ efx_for_each_channel(channel, efx) {
+ cancel_delayed_work_sync(&channel->filter_work);
+ kfree(channel->rps_flow_id);
+ }
+#endif
+ down_write(&efx->filter_sem);
+ efx->type->filter_table_remove(efx);
+ up_write(&efx->filter_sem);
+}
diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h
new file mode 100644
index 000000000000..c41f12a89477
--- /dev/null
+++ b/drivers/net/ethernet/sfc/rx_common.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_RX_COMMON_H
+#define EFX_RX_COMMON_H
+
+/* Preferred number of descriptors to fill at once */
+#define EFX_RX_PREFERRED_BATCH 8U
+
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
+ EFX_RX_USR_BUF_SIZE)
+
+static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
+{
+ return page_address(buf->page) + buf->page_offset;
+}
+
+static inline u32 efx_rx_buf_hash(struct efx_nic *efx, const u8 *eh)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
+#else
+ const u8 *data = eh + efx->rx_packet_hash_offset;
+
+ return (u32)data[0] |
+ (u32)data[1] << 8 |
+ (u32)data[2] << 16 |
+ (u32)data[3] << 24;
+#endif
+}
+
+void efx_rx_slow_fill(struct timer_list *t);
+
+void efx_recycle_rx_pages(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags);
+void efx_discard_rx_packet(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags);
+
+int efx_probe_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_remove_rx_queue(struct efx_rx_queue *rx_queue);
+void efx_destroy_rx_queue(struct efx_rx_queue *rx_queue);
+
+void efx_init_rx_buffer(struct efx_rx_queue *rx_queue,
+ struct page *page,
+ unsigned int page_offset,
+ u16 flags);
+void efx_unmap_rx_buffer(struct efx_nic *efx, struct efx_rx_buffer *rx_buf);
+void efx_free_rx_buffers(struct efx_rx_queue *rx_queue,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int num_bufs);
+
+void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
+void efx_rx_config_page_split(struct efx_nic *efx);
+void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic);
+
+void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags, u8 *eh);
+
+struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx);
+struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id);
+void efx_free_rss_context_entry(struct efx_rss_context *ctx);
+void efx_set_default_rx_indir_table(struct efx_nic *efx,
+ struct efx_rss_context *ctx);
+
+bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec);
+bool efx_filter_spec_equal(const struct efx_filter_spec *left,
+ const struct efx_filter_spec *right);
+u32 efx_filter_spec_hash(const struct efx_filter_spec *spec);
+
+#ifdef CONFIG_RFS_ACCEL
+bool efx_rps_check_rule(struct efx_arfs_rule *rule, unsigned int filter_idx,
+ bool *force);
+struct efx_arfs_rule *efx_rps_hash_find(struct efx_nic *efx,
+ const struct efx_filter_spec *spec);
+struct efx_arfs_rule *efx_rps_hash_add(struct efx_nic *efx,
+ const struct efx_filter_spec *spec,
+ bool *new);
+void efx_rps_hash_del(struct efx_nic *efx, const struct efx_filter_spec *spec);
+#endif
+
+int efx_probe_filters(struct efx_nic *efx);
+void efx_remove_filters(struct efx_nic *efx);
+
+#endif
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index 8474cf8ea7d3..1ae369022d7d 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -18,6 +18,8 @@
#include <linux/slab.h>
#include "net_driver.h"
#include "efx.h"
+#include "efx_common.h"
+#include "efx_channels.h"
#include "nic.h"
#include "selftest.h"
#include "workarounds.h"
@@ -783,7 +785,7 @@ void efx_selftest_async_cancel(struct efx_nic *efx)
cancel_delayed_work_sync(&efx->selftest_work);
}
-void efx_selftest_async_work(struct work_struct *data)
+static void efx_selftest_async_work(struct work_struct *data)
{
struct efx_nic *efx = container_of(data, struct efx_nic,
selftest_work.work);
@@ -802,3 +804,8 @@ void efx_selftest_async_work(struct work_struct *data)
channel->channel, cpu);
}
}
+
+void efx_selftest_async_init(struct efx_nic *efx)
+{
+ INIT_DELAYED_WORK(&efx->selftest_work, efx_selftest_async_work);
+}
diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h
index a3553816d92c..ca88ebb4f6b1 100644
--- a/drivers/net/ethernet/sfc/selftest.h
+++ b/drivers/net/ethernet/sfc/selftest.h
@@ -45,8 +45,8 @@ void efx_loopback_rx_packet(struct efx_nic *efx, const char *buf_ptr,
int pkt_len);
int efx_selftest(struct efx_nic *efx, struct efx_self_tests *tests,
unsigned flags);
+void efx_selftest_async_init(struct efx_nic *efx);
void efx_selftest_async_start(struct efx_nic *efx);
void efx_selftest_async_cancel(struct efx_nic *efx);
-void efx_selftest_async_work(struct work_struct *data);
#endif /* EFX_SELFTEST_H */
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 81499244a4b4..baa464161626 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -14,12 +14,14 @@
#include "net_driver.h"
#include "bitfield.h"
#include "efx.h"
+#include "efx_common.h"
#include "nic.h"
#include "farch_regs.h"
#include "io.h"
#include "workarounds.h"
#include "mcdi.h"
#include "mcdi_pcol.h"
+#include "mcdi_port_common.h"
#include "selftest.h"
#include "siena_sriov.h"
diff --git a/drivers/net/ethernet/sfc/siena_sriov.c b/drivers/net/ethernet/sfc/siena_sriov.c
index dfbdf05dcf79..83dcfcae3d4b 100644
--- a/drivers/net/ethernet/sfc/siena_sriov.c
+++ b/drivers/net/ethernet/sfc/siena_sriov.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include "net_driver.h"
#include "efx.h"
+#include "efx_channels.h"
#include "nic.h"
#include "io.h"
#include "mcdi.h"
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 00c1c4402451..04d7f41d7ed9 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -20,6 +20,7 @@
#include "io.h"
#include "nic.h"
#include "tx.h"
+#include "tx_common.h"
#include "workarounds.h"
#include "ef10_regs.h"
@@ -56,72 +57,6 @@ u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
return efx_tx_get_copy_buffer(tx_queue, buffer);
}
-static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
- struct efx_tx_buffer *buffer,
- unsigned int *pkts_compl,
- unsigned int *bytes_compl)
-{
- if (buffer->unmap_len) {
- struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
- dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
- if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
- dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
- DMA_TO_DEVICE);
- else
- dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
- DMA_TO_DEVICE);
- buffer->unmap_len = 0;
- }
-
- if (buffer->flags & EFX_TX_BUF_SKB) {
- struct sk_buff *skb = (struct sk_buff *)buffer->skb;
-
- EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
- (*pkts_compl)++;
- (*bytes_compl) += skb->len;
- if (tx_queue->timestamping &&
- (tx_queue->completed_timestamp_major ||
- tx_queue->completed_timestamp_minor)) {
- struct skb_shared_hwtstamps hwtstamp;
-
- hwtstamp.hwtstamp =
- efx_ptp_nic_to_kernel_time(tx_queue);
- skb_tstamp_tx(skb, &hwtstamp);
-
- tx_queue->completed_timestamp_major = 0;
- tx_queue->completed_timestamp_minor = 0;
- }
- dev_consume_skb_any((struct sk_buff *)buffer->skb);
- netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
- "TX queue %d transmission id %x complete\n",
- tx_queue->queue, tx_queue->read_count);
- } else if (buffer->flags & EFX_TX_BUF_XDP) {
- xdp_return_frame_rx_napi(buffer->xdpf);
- }
-
- buffer->len = 0;
- buffer->flags = 0;
-}
-
-unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
-{
- /* Header and payload descriptor for each output segment, plus
- * one for every input fragment boundary within a segment
- */
- unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
-
- /* Possibly one more per segment for option descriptors */
- if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
- max_descs += EFX_TSO_MAX_SEGS;
-
- /* Possibly more for PCIe page boundaries within input fragments */
- if (PAGE_SIZE > EFX_PAGE_SIZE)
- max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
- DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
-
- return max_descs;
-}
-
static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
{
/* We need to consider both queues that the net core sees as one */
@@ -333,125 +268,6 @@ static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
}
#endif /* EFX_USE_PIO */
-static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
- dma_addr_t dma_addr,
- size_t len)
-{
- const struct efx_nic_type *nic_type = tx_queue->efx->type;
- struct efx_tx_buffer *buffer;
- unsigned int dma_len;
-
- /* Map the fragment taking account of NIC-dependent DMA limits. */
- do {
- buffer = efx_tx_queue_get_insert_buffer(tx_queue);
- dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
-
- buffer->len = dma_len;
- buffer->dma_addr = dma_addr;
- buffer->flags = EFX_TX_BUF_CONT;
- len -= dma_len;
- dma_addr += dma_len;
- ++tx_queue->insert_count;
- } while (len);
-
- return buffer;
-}
-
-/* Map all data from an SKB for DMA and create descriptors on the queue.
- */
-static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
- unsigned int segment_count)
-{
- struct efx_nic *efx = tx_queue->efx;
- struct device *dma_dev = &efx->pci_dev->dev;
- unsigned int frag_index, nr_frags;
- dma_addr_t dma_addr, unmap_addr;
- unsigned short dma_flags;
- size_t len, unmap_len;
-
- nr_frags = skb_shinfo(skb)->nr_frags;
- frag_index = 0;
-
- /* Map header data. */
- len = skb_headlen(skb);
- dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
- dma_flags = EFX_TX_BUF_MAP_SINGLE;
- unmap_len = len;
- unmap_addr = dma_addr;
-
- if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
- return -EIO;
-
- if (segment_count) {
- /* For TSO we need to put the header in to a separate
- * descriptor. Map this separately if necessary.
- */
- size_t header_len = skb_transport_header(skb) - skb->data +
- (tcp_hdr(skb)->doff << 2u);
-
- if (header_len != len) {
- tx_queue->tso_long_headers++;
- efx_tx_map_chunk(tx_queue, dma_addr, header_len);
- len -= header_len;
- dma_addr += header_len;
- }
- }
-
- /* Add descriptors for each fragment. */
- do {
- struct efx_tx_buffer *buffer;
- skb_frag_t *fragment;
-
- buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
-
- /* The final descriptor for a fragment is responsible for
- * unmapping the whole fragment.
- */
- buffer->flags = EFX_TX_BUF_CONT | dma_flags;
- buffer->unmap_len = unmap_len;
- buffer->dma_offset = buffer->dma_addr - unmap_addr;
-
- if (frag_index >= nr_frags) {
- /* Store SKB details with the final buffer for
- * the completion.
- */
- buffer->skb = skb;
- buffer->flags = EFX_TX_BUF_SKB | dma_flags;
- return 0;
- }
-
- /* Move on to the next fragment. */
- fragment = &skb_shinfo(skb)->frags[frag_index++];
- len = skb_frag_size(fragment);
- dma_addr = skb_frag_dma_map(dma_dev, fragment,
- 0, len, DMA_TO_DEVICE);
- dma_flags = 0;
- unmap_len = len;
- unmap_addr = dma_addr;
-
- if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
- return -EIO;
- } while (1);
-}
-
-/* Remove buffers put into a tx_queue for the current packet.
- * None of the buffers must have an skb attached.
- */
-static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
- unsigned int insert_count)
-{
- struct efx_tx_buffer *buffer;
- unsigned int bytes_compl = 0;
- unsigned int pkts_compl = 0;
-
- /* Work backwards until we hit the original insert pointer value */
- while (tx_queue->insert_count != insert_count) {
- --tx_queue->insert_count;
- buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
- efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
- }
-}
-
/*
* Fallback to software TSO.
*
@@ -473,12 +289,9 @@ static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
dev_consume_skb_any(skb);
skb = segments;
- while (skb) {
- next = skb->next;
- skb->next = NULL;
-
+ skb_list_walk_safe(skb, skb, next) {
+ skb_mark_not_on_list(skb);
efx_enqueue_skb(tx_queue, skb);
- skb = next;
}
return 0;
@@ -687,41 +500,6 @@ int efx_xdp_tx_buffers(struct efx_nic *efx, int n, struct xdp_frame **xdpfs,
return i;
}
-/* Remove packets from the TX queue
- *
- * This removes packets from the TX queue, up to and including the
- * specified index.
- */
-static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
- unsigned int index,
- unsigned int *pkts_compl,
- unsigned int *bytes_compl)
-{
- struct efx_nic *efx = tx_queue->efx;
- unsigned int stop_index, read_ptr;
-
- stop_index = (index + 1) & tx_queue->ptr_mask;
- read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
-
- while (read_ptr != stop_index) {
- struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
-
- if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
- unlikely(buffer->len == 0)) {
- netif_err(efx, tx_err, efx->net_dev,
- "TX queue %d spurious TX completion id %x\n",
- tx_queue->queue, read_ptr);
- efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
- return;
- }
-
- efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
-
- ++tx_queue->read_count;
- read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
- }
-}
-
/* Initiate a packet transmission. We use one channel per CPU
* (sharing when we have more CPUs than channels). On Falcon, the TX
* completion events will be directed back to the CPU that transmitted
@@ -834,173 +612,3 @@ int efx_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
net_dev->num_tc = num_tc;
return 0;
}
-
-void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
-{
- unsigned fill_level;
- struct efx_nic *efx = tx_queue->efx;
- struct efx_tx_queue *txq2;
- unsigned int pkts_compl = 0, bytes_compl = 0;
-
- EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
-
- efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
- tx_queue->pkts_compl += pkts_compl;
- tx_queue->bytes_compl += bytes_compl;
-
- if (pkts_compl > 1)
- ++tx_queue->merge_events;
-
- /* See if we need to restart the netif queue. This memory
- * barrier ensures that we write read_count (inside
- * efx_dequeue_buffers()) before reading the queue status.
- */
- smp_mb();
- if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
- likely(efx->port_enabled) &&
- likely(netif_device_present(efx->net_dev))) {
- txq2 = efx_tx_queue_partner(tx_queue);
- fill_level = max(tx_queue->insert_count - tx_queue->read_count,
- txq2->insert_count - txq2->read_count);
- if (fill_level <= efx->txq_wake_thresh)
- netif_tx_wake_queue(tx_queue->core_txq);
- }
-
- /* Check whether the hardware queue is now empty */
- if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
- tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
- if (tx_queue->read_count == tx_queue->old_write_count) {
- smp_mb();
- tx_queue->empty_read_count =
- tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
- }
- }
-}
-
-static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
-{
- return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER);
-}
-
-int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
-{
- struct efx_nic *efx = tx_queue->efx;
- unsigned int entries;
- int rc;
-
- /* Create the smallest power-of-two aligned ring */
- entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
- EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
- tx_queue->ptr_mask = entries - 1;
-
- netif_dbg(efx, probe, efx->net_dev,
- "creating TX queue %d size %#x mask %#x\n",
- tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
-
- /* Allocate software ring */
- tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
- GFP_KERNEL);
- if (!tx_queue->buffer)
- return -ENOMEM;
-
- tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
- sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
- if (!tx_queue->cb_page) {
- rc = -ENOMEM;
- goto fail1;
- }
-
- /* Allocate hardware ring */
- rc = efx_nic_probe_tx(tx_queue);
- if (rc)
- goto fail2;
-
- return 0;
-
-fail2:
- kfree(tx_queue->cb_page);
- tx_queue->cb_page = NULL;
-fail1:
- kfree(tx_queue->buffer);
- tx_queue->buffer = NULL;
- return rc;
-}
-
-void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
-{
- struct efx_nic *efx = tx_queue->efx;
-
- netif_dbg(efx, drv, efx->net_dev,
- "initialising TX queue %d\n", tx_queue->queue);
-
- tx_queue->insert_count = 0;
- tx_queue->write_count = 0;
- tx_queue->packet_write_count = 0;
- tx_queue->old_write_count = 0;
- tx_queue->read_count = 0;
- tx_queue->old_read_count = 0;
- tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
- tx_queue->xmit_more_available = false;
- tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
- tx_queue->channel == efx_ptp_channel(efx));
- tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
- tx_queue->completed_timestamp_major = 0;
- tx_queue->completed_timestamp_minor = 0;
-
- tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
-
- /* Set up default function pointers. These may get replaced by
- * efx_nic_init_tx() based off NIC/queue capabilities.
- */
- tx_queue->handle_tso = efx_enqueue_skb_tso;
-
- /* Set up TX descriptor ring */
- efx_nic_init_tx(tx_queue);
-
- tx_queue->initialised = true;
-}
-
-void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
-{
- struct efx_tx_buffer *buffer;
-
- netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
- "shutting down TX queue %d\n", tx_queue->queue);
-
- if (!tx_queue->buffer)
- return;
-
- /* Free any buffers left in the ring */
- while (tx_queue->read_count != tx_queue->write_count) {
- unsigned int pkts_compl = 0, bytes_compl = 0;
- buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
- efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
-
- ++tx_queue->read_count;
- }
- tx_queue->xmit_more_available = false;
- netdev_tx_reset_queue(tx_queue->core_txq);
-}
-
-void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
-{
- int i;
-
- if (!tx_queue->buffer)
- return;
-
- netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
- "destroying TX queue %d\n", tx_queue->queue);
- efx_nic_remove_tx(tx_queue);
-
- if (tx_queue->cb_page) {
- for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
- efx_nic_free_buffer(tx_queue->efx,
- &tx_queue->cb_page[i]);
- kfree(tx_queue->cb_page);
- tx_queue->cb_page = NULL;
- }
-
- kfree(tx_queue->buffer);
- tx_queue->buffer = NULL;
-}
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
new file mode 100644
index 000000000000..b1571e9789d0
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "tx_common.h"
+
+static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
+{
+ return DIV_ROUND_UP(tx_queue->ptr_mask + 1,
+ PAGE_SIZE >> EFX_TX_CB_ORDER);
+}
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned int entries;
+ int rc;
+
+ /* Create the smallest power-of-two aligned ring */
+ entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
+ EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+ tx_queue->ptr_mask = entries - 1;
+
+ netif_dbg(efx, probe, efx->net_dev,
+ "creating TX queue %d size %#x mask %#x\n",
+ tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
+
+ /* Allocate software ring */
+ tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
+ GFP_KERNEL);
+ if (!tx_queue->buffer)
+ return -ENOMEM;
+
+ tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
+ sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
+ if (!tx_queue->cb_page) {
+ rc = -ENOMEM;
+ goto fail1;
+ }
+
+ /* Allocate hardware ring */
+ rc = efx_nic_probe_tx(tx_queue);
+ if (rc)
+ goto fail2;
+
+ return 0;
+
+fail2:
+ kfree(tx_queue->cb_page);
+ tx_queue->cb_page = NULL;
+fail1:
+ kfree(tx_queue->buffer);
+ tx_queue->buffer = NULL;
+ return rc;
+}
+
+void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_nic *efx = tx_queue->efx;
+
+ netif_dbg(efx, drv, efx->net_dev,
+ "initialising TX queue %d\n", tx_queue->queue);
+
+ tx_queue->insert_count = 0;
+ tx_queue->write_count = 0;
+ tx_queue->packet_write_count = 0;
+ tx_queue->old_write_count = 0;
+ tx_queue->read_count = 0;
+ tx_queue->old_read_count = 0;
+ tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
+ tx_queue->xmit_more_available = false;
+ tx_queue->timestamping = (efx_ptp_use_mac_tx_timestamps(efx) &&
+ tx_queue->channel == efx_ptp_channel(efx));
+ tx_queue->completed_desc_ptr = tx_queue->ptr_mask;
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
+
+ tx_queue->xdp_tx = efx_channel_is_xdp_tx(tx_queue->channel);
+
+ /* Set up default function pointers. These may get replaced by
+ * efx_nic_init_tx() based off NIC/queue capabilities.
+ */
+ tx_queue->handle_tso = efx_enqueue_skb_tso;
+
+ /* Set up TX descriptor ring */
+ efx_nic_init_tx(tx_queue);
+
+ tx_queue->initialised = true;
+}
+
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ struct efx_tx_buffer *buffer;
+
+ netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+ "shutting down TX queue %d\n", tx_queue->queue);
+
+ if (!tx_queue->buffer)
+ return;
+
+ /* Free any buffers left in the ring */
+ while (tx_queue->read_count != tx_queue->write_count) {
+ unsigned int pkts_compl = 0, bytes_compl = 0;
+
+ buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+ ++tx_queue->read_count;
+ }
+ tx_queue->xmit_more_available = false;
+ netdev_tx_reset_queue(tx_queue->core_txq);
+}
+
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
+{
+ int i;
+
+ if (!tx_queue->buffer)
+ return;
+
+ netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+ "destroying TX queue %d\n", tx_queue->queue);
+ efx_nic_remove_tx(tx_queue);
+
+ if (tx_queue->cb_page) {
+ for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
+ efx_nic_free_buffer(tx_queue->efx,
+ &tx_queue->cb_page[i]);
+ kfree(tx_queue->cb_page);
+ tx_queue->cb_page = NULL;
+ }
+
+ kfree(tx_queue->buffer);
+ tx_queue->buffer = NULL;
+}
+
+void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
+{
+ if (buffer->unmap_len) {
+ struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
+ dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
+
+ if (buffer->flags & EFX_TX_BUF_MAP_SINGLE)
+ dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
+ DMA_TO_DEVICE);
+ else
+ dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
+ DMA_TO_DEVICE);
+ buffer->unmap_len = 0;
+ }
+
+ if (buffer->flags & EFX_TX_BUF_SKB) {
+ struct sk_buff *skb = (struct sk_buff *)buffer->skb;
+
+ EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
+ (*pkts_compl)++;
+ (*bytes_compl) += skb->len;
+ if (tx_queue->timestamping &&
+ (tx_queue->completed_timestamp_major ||
+ tx_queue->completed_timestamp_minor)) {
+ struct skb_shared_hwtstamps hwtstamp;
+
+ hwtstamp.hwtstamp =
+ efx_ptp_nic_to_kernel_time(tx_queue);
+ skb_tstamp_tx(skb, &hwtstamp);
+
+ tx_queue->completed_timestamp_major = 0;
+ tx_queue->completed_timestamp_minor = 0;
+ }
+ dev_consume_skb_any((struct sk_buff *)buffer->skb);
+ netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
+ "TX queue %d transmission id %x complete\n",
+ tx_queue->queue, tx_queue->read_count);
+ } else if (buffer->flags & EFX_TX_BUF_XDP) {
+ xdp_return_frame_rx_napi(buffer->xdpf);
+ }
+
+ buffer->len = 0;
+ buffer->flags = 0;
+}
+
+/* Remove packets from the TX queue
+ *
+ * This removes packets from the TX queue, up to and including the
+ * specified index.
+ */
+static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
+ unsigned int index,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ unsigned int stop_index, read_ptr;
+
+ stop_index = (index + 1) & tx_queue->ptr_mask;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+ while (read_ptr != stop_index) {
+ struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+ if (!(buffer->flags & EFX_TX_BUF_OPTION) &&
+ unlikely(buffer->len == 0)) {
+ netif_err(efx, tx_err, efx->net_dev,
+ "TX queue %d spurious TX completion id %x\n",
+ tx_queue->queue, read_ptr);
+ efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+ return;
+ }
+
+ efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
+
+ ++tx_queue->read_count;
+ read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+ }
+}
+
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
+{
+ unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
+ struct efx_nic *efx = tx_queue->efx;
+ struct efx_tx_queue *txq2;
+
+ EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
+
+ efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+ tx_queue->pkts_compl += pkts_compl;
+ tx_queue->bytes_compl += bytes_compl;
+
+ if (pkts_compl > 1)
+ ++tx_queue->merge_events;
+
+ /* See if we need to restart the netif queue. This memory
+ * barrier ensures that we write read_count (inside
+ * efx_dequeue_buffers()) before reading the queue status.
+ */
+ smp_mb();
+ if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
+ likely(efx->port_enabled) &&
+ likely(netif_device_present(efx->net_dev))) {
+ txq2 = efx_tx_queue_partner(tx_queue);
+ fill_level = max(tx_queue->insert_count - tx_queue->read_count,
+ txq2->insert_count - txq2->read_count);
+ if (fill_level <= efx->txq_wake_thresh)
+ netif_tx_wake_queue(tx_queue->core_txq);
+ }
+
+ /* Check whether the hardware queue is now empty */
+ if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+ tx_queue->old_write_count = READ_ONCE(tx_queue->write_count);
+ if (tx_queue->read_count == tx_queue->old_write_count) {
+ smp_mb();
+ tx_queue->empty_read_count =
+ tx_queue->read_count | EFX_EMPTY_COUNT_VALID;
+ }
+ }
+}
+
+/* Remove buffers put into a tx_queue for the current packet.
+ * None of the buffers must have an skb attached.
+ */
+void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+ unsigned int insert_count)
+{
+ struct efx_tx_buffer *buffer;
+ unsigned int bytes_compl = 0;
+ unsigned int pkts_compl = 0;
+
+ /* Work backwards until we hit the original insert pointer value */
+ while (tx_queue->insert_count != insert_count) {
+ --tx_queue->insert_count;
+ buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+ }
+}
+
+struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, size_t len)
+{
+ const struct efx_nic_type *nic_type = tx_queue->efx->type;
+ struct efx_tx_buffer *buffer;
+ unsigned int dma_len;
+
+ /* Map the fragment taking account of NIC-dependent DMA limits. */
+ do {
+ buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+ dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
+
+ buffer->len = dma_len;
+ buffer->dma_addr = dma_addr;
+ buffer->flags = EFX_TX_BUF_CONT;
+ len -= dma_len;
+ dma_addr += dma_len;
+ ++tx_queue->insert_count;
+ } while (len);
+
+ return buffer;
+}
+
+/* Map all data from an SKB for DMA and create descriptors on the queue. */
+int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ unsigned int segment_count)
+{
+ struct efx_nic *efx = tx_queue->efx;
+ struct device *dma_dev = &efx->pci_dev->dev;
+ unsigned int frag_index, nr_frags;
+ dma_addr_t dma_addr, unmap_addr;
+ unsigned short dma_flags;
+ size_t len, unmap_len;
+
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ frag_index = 0;
+
+ /* Map header data. */
+ len = skb_headlen(skb);
+ dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
+ dma_flags = EFX_TX_BUF_MAP_SINGLE;
+ unmap_len = len;
+ unmap_addr = dma_addr;
+
+ if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+ return -EIO;
+
+ if (segment_count) {
+ /* For TSO we need to put the header in to a separate
+ * descriptor. Map this separately if necessary.
+ */
+ size_t header_len = skb_transport_header(skb) - skb->data +
+ (tcp_hdr(skb)->doff << 2u);
+
+ if (header_len != len) {
+ tx_queue->tso_long_headers++;
+ efx_tx_map_chunk(tx_queue, dma_addr, header_len);
+ len -= header_len;
+ dma_addr += header_len;
+ }
+ }
+
+ /* Add descriptors for each fragment. */
+ do {
+ struct efx_tx_buffer *buffer;
+ skb_frag_t *fragment;
+
+ buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+
+ /* The final descriptor for a fragment is responsible for
+ * unmapping the whole fragment.
+ */
+ buffer->flags = EFX_TX_BUF_CONT | dma_flags;
+ buffer->unmap_len = unmap_len;
+ buffer->dma_offset = buffer->dma_addr - unmap_addr;
+
+ if (frag_index >= nr_frags) {
+ /* Store SKB details with the final buffer for
+ * the completion.
+ */
+ buffer->skb = skb;
+ buffer->flags = EFX_TX_BUF_SKB | dma_flags;
+ return 0;
+ }
+
+ /* Move on to the next fragment. */
+ fragment = &skb_shinfo(skb)->frags[frag_index++];
+ len = skb_frag_size(fragment);
+ dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
+ DMA_TO_DEVICE);
+ dma_flags = 0;
+ unmap_len = len;
+ unmap_addr = dma_addr;
+
+ if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+ return -EIO;
+ } while (1);
+}
+
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
+{
+ /* Header and payload descriptor for each output segment, plus
+ * one for every input fragment boundary within a segment
+ */
+ unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
+
+ /* Possibly one more per segment for option descriptors */
+ if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+ max_descs += EFX_TSO_MAX_SEGS;
+
+ /* Possibly more for PCIe page boundaries within input fragments */
+ if (PAGE_SIZE > EFX_PAGE_SIZE)
+ max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
+ DIV_ROUND_UP(GSO_MAX_SIZE, EFX_PAGE_SIZE));
+
+ return max_descs;
+}
diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h
new file mode 100644
index 000000000000..f92f1fe3a87f
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tx_common.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2018 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_TX_COMMON_H
+#define EFX_TX_COMMON_H
+
+int efx_probe_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_init_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_fini_tx_queue(struct efx_tx_queue *tx_queue);
+void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
+
+void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
+ struct efx_tx_buffer *buffer,
+ unsigned int *pkts_compl,
+ unsigned int *bytes_compl);
+
+void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index);
+
+void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
+ unsigned int insert_count);
+
+struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+ dma_addr_t dma_addr, size_t len);
+int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ unsigned int segment_count);
+
+unsigned int efx_tx_max_skb_descs(struct efx_nic *efx);
+
+#endif
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index d242906ae233..06637b03deed 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -114,7 +114,7 @@ struct ioc3_private {
static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static void ioc3_set_multicast_list(struct net_device *dev);
static netdev_tx_t ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev);
-static void ioc3_timeout(struct net_device *dev);
+static void ioc3_timeout(struct net_device *dev, unsigned int txqueue);
static inline unsigned int ioc3_hash(const unsigned char *addr);
static void ioc3_start(struct ioc3_private *ip);
static inline void ioc3_stop(struct ioc3_private *ip);
@@ -1479,7 +1479,7 @@ drop_packet:
return NETDEV_TX_OK;
}
-static void ioc3_timeout(struct net_device *dev)
+static void ioc3_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ioc3_private *ip = netdev_priv(dev);
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index 539bc5db989c..0c396ecd3389 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -90,7 +90,7 @@ struct meth_private {
spinlock_t meth_lock;
};
-static void meth_tx_timeout(struct net_device *dev);
+static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue);
static irqreturn_t meth_interrupt(int irq, void *dev_id);
/* global, initialized in ip32-setup.c */
@@ -727,7 +727,7 @@ static netdev_tx_t meth_tx(struct sk_buff *skb, struct net_device *dev)
/*
* Deal with a transmit timeout.
*/
-static void meth_tx_timeout(struct net_device *dev)
+static void meth_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct meth_private *priv = netdev_priv(dev);
unsigned long flags;
diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index c7641a236eb8..cb043eb1bdc1 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c
@@ -1078,7 +1078,7 @@ static void sc92031_set_multicast_list(struct net_device *dev)
spin_unlock_bh(&priv->lock);
}
-static void sc92031_tx_timeout(struct net_device *dev)
+static void sc92031_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct sc92031_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index 5b351beb78cb..5a4b6e3ab38f 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1538,7 +1538,7 @@ err_out_0:
goto out;
}
-static void sis190_tx_timeout(struct net_device *dev)
+static void sis190_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct sis190_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp->mmio_addr;
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 85eaccbbbac1..81ed7589e33c 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -222,7 +222,7 @@ static int mdio_read(struct net_device *net_dev, int phy_id, int location);
static void mdio_write(struct net_device *net_dev, int phy_id, int location, int val);
static void sis900_timer(struct timer_list *t);
static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy);
-static void sis900_tx_timeout(struct net_device *net_dev);
+static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue);
static void sis900_init_tx_ring(struct net_device *net_dev);
static void sis900_init_rx_ring(struct net_device *net_dev);
static netdev_tx_t sis900_start_xmit(struct sk_buff *skb,
@@ -1537,7 +1537,7 @@ static void sis900_read_mode(struct net_device *net_dev, int *speed, int *duplex
* disable interrupts and do some tasks
*/
-static void sis900_tx_timeout(struct net_device *net_dev)
+static void sis900_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
{
struct sis900_private *sis_priv = netdev_priv(net_dev);
void __iomem *ioaddr = sis_priv->ioaddr;
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index be47d864f8b9..61ddee0c2a2e 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -280,6 +280,7 @@ struct epic_private {
signed char phys[4]; /* MII device addresses. */
u16 advertising; /* NWay media advertisement */
int mii_phy_cnt;
+ u32 ethtool_ops_nesting;
struct mii_if_info mii;
unsigned int tx_full:1; /* The Tx queue is full. */
unsigned int default_port:4; /* Last dev->if_port value. */
@@ -291,7 +292,7 @@ static int mdio_read(struct net_device *dev, int phy_id, int location);
static void mdio_write(struct net_device *dev, int phy_id, int loc, int val);
static void epic_restart(struct net_device *dev);
static void epic_timer(struct timer_list *t);
-static void epic_tx_timeout(struct net_device *dev);
+static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void epic_init_ring(struct net_device *dev);
static netdev_tx_t epic_start_xmit(struct sk_buff *skb,
struct net_device *dev);
@@ -861,7 +862,7 @@ static void epic_timer(struct timer_list *t)
add_timer(&ep->timer);
}
-static void epic_tx_timeout(struct net_device *dev)
+static void epic_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct epic_private *ep = netdev_priv(dev);
void __iomem *ioaddr = ep->ioaddr;
@@ -1435,8 +1436,10 @@ static int ethtool_begin(struct net_device *dev)
struct epic_private *ep = netdev_priv(dev);
void __iomem *ioaddr = ep->ioaddr;
+ if (ep->ethtool_ops_nesting == U32_MAX)
+ return -EBUSY;
/* power-up, if interface is down */
- if (!netif_running(dev)) {
+ if (!ep->ethtool_ops_nesting++ && !netif_running(dev)) {
ew32(GENCTL, 0x0200);
ew32(NVCTL, (er32(NVCTL) & ~0x003c) | 0x4800);
}
@@ -1449,7 +1452,7 @@ static void ethtool_complete(struct net_device *dev)
void __iomem *ioaddr = ep->ioaddr;
/* power-down, if interface is down */
- if (!netif_running(dev)) {
+ if (!--ep->ethtool_ops_nesting && !netif_running(dev)) {
ew32(GENCTL, 0x0008);
ew32(NVCTL, (er32(NVCTL) & ~0x483c) | 0x0000);
}
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index 8d88e4083456..186c0bddbe5f 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -936,7 +936,7 @@ static void smc911x_phy_configure(struct work_struct *work)
if (lp->ctl_rspeed != 100)
my_ad_caps &= ~(ADVERTISE_100BASE4|ADVERTISE_100FULL|ADVERTISE_100HALF);
- if (!lp->ctl_rfduplx)
+ if (!lp->ctl_rfduplx)
my_ad_caps &= ~(ADVERTISE_100FULL|ADVERTISE_10FULL);
/* Update our Auto-Neg Advertisement Register */
@@ -1245,7 +1245,7 @@ static void smc911x_poll_controller(struct net_device *dev)
#endif
/* Our watchdog timed out. Called by the networking layer */
-static void smc911x_timeout(struct net_device *dev)
+static void smc911x_timeout(struct net_device *dev, unsigned int txqueue)
{
struct smc911x_local *lp = netdev_priv(dev);
int status, mask;
diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c
index d3bb2ba51f40..4b2330deed47 100644
--- a/drivers/net/ethernet/smsc/smc9194.c
+++ b/drivers/net/ethernet/smsc/smc9194.c
@@ -216,7 +216,7 @@ static int smc_open(struct net_device *dev);
/*
. Our watchdog timed out. Called by the networking layer
*/
-static void smc_timeout(struct net_device *dev);
+static void smc_timeout(struct net_device *dev, unsigned int txqueue);
/*
. This is called by the kernel in response to 'ifconfig ethX down'. It
@@ -1094,7 +1094,7 @@ static int smc_open(struct net_device *dev)
.--------------------------------------------------------
*/
-static void smc_timeout(struct net_device *dev)
+static void smc_timeout(struct net_device *dev, unsigned int txqueue)
{
/* If we get here, some higher level has decided we are broken.
There should really be a "kick me" function call instead. */
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index a55f430f6a7b..f2a50eb3c1e0 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -271,7 +271,7 @@ static void smc91c92_release(struct pcmcia_device *link);
static int smc_open(struct net_device *dev);
static int smc_close(struct net_device *dev);
static int smc_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
-static void smc_tx_timeout(struct net_device *dev);
+static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue);
static netdev_tx_t smc_start_xmit(struct sk_buff *skb,
struct net_device *dev);
static irqreturn_t smc_interrupt(int irq, void *dev_id);
@@ -1178,7 +1178,7 @@ static void smc_hardware_send_packet(struct net_device * dev)
/*====================================================================*/
-static void smc_tx_timeout(struct net_device *dev)
+static void smc_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct smc_private *smc = netdev_priv(dev);
unsigned int ioaddr = dev->base_addr;
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 3a6761131f4c..90410f9d3b1a 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -1321,7 +1321,7 @@ static void smc_poll_controller(struct net_device *dev)
#endif
/* Our watchdog timed out. Called by the networking layer */
-static void smc_timeout(struct net_device *dev)
+static void smc_timeout(struct net_device *dev, unsigned int txqueue)
{
struct smc_local *lp = netdev_priv(dev);
void __iomem *ioaddr = lp->base;
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 869a498e3b5e..b5a9e947a4a8 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1279,7 +1279,7 @@ static int netsec_setup_rx_dring(struct netsec_priv *priv)
/* internal DMA mapping in page_pool */
pp_params.flags = PP_FLAG_DMA_MAP;
pp_params.pool_size = DESC_NUM;
- pp_params.nid = cpu_to_node(0);
+ pp_params.nid = NUMA_NO_NODE;
pp_params.dev = priv->dev;
pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index b210e987a1db..31003b67d24f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -363,11 +363,15 @@ struct dma_features {
unsigned int dvlan;
unsigned int l3l4fnum;
unsigned int arpoffsel;
+ /* TSN Features */
+ unsigned int estwid;
+ unsigned int estdep;
+ unsigned int estsel;
+ unsigned int fpesel;
};
-/* GMAC TX FIFO is 8K, Rx FIFO is 16K */
-#define BUF_SIZE_16KiB 16384
-/* RX Buffer size must be < 8191 and multiple of 4/8/16 bytes */
+/* RX Buffer size must be multiple of 4/8/16 bytes */
+#define BUF_SIZE_16KiB 16368
#define BUF_SIZE_8KiB 8188
#define BUF_SIZE_4KiB 4096
#define BUF_SIZE_2KiB 2048
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index dd9967aeda22..2342d497348e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -40,7 +40,7 @@ struct tegra_eqos {
static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
struct plat_stmmacenet_data *plat_dat)
{
- struct device_node *np = pdev->dev.of_node;
+ struct device *dev = &pdev->dev;
u32 burst_map = 0;
u32 bit_index = 0;
u32 a_index = 0;
@@ -52,9 +52,10 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
return -ENOMEM;
}
- plat_dat->axi->axi_lpi_en = of_property_read_bool(np, "snps,en-lpi");
- if (of_property_read_u32(np, "snps,write-requests",
- &plat_dat->axi->axi_wr_osr_lmt)) {
+ plat_dat->axi->axi_lpi_en = device_property_read_bool(dev,
+ "snps,en-lpi");
+ if (device_property_read_u32(dev, "snps,write-requests",
+ &plat_dat->axi->axi_wr_osr_lmt)) {
/**
* Since the register has a reset value of 1, if property
* is missing, default to 1.
@@ -68,8 +69,8 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
plat_dat->axi->axi_wr_osr_lmt--;
}
- if (of_property_read_u32(np, "snps,read-requests",
- &plat_dat->axi->axi_rd_osr_lmt)) {
+ if (device_property_read_u32(dev, "snps,read-requests",
+ &plat_dat->axi->axi_rd_osr_lmt)) {
/**
* Since the register has a reset value of 1, if property
* is missing, default to 1.
@@ -82,7 +83,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
*/
plat_dat->axi->axi_rd_osr_lmt--;
}
- of_property_read_u32(np, "snps,burst-map", &burst_map);
+ device_property_read_u32(dev, "snps,burst-map", &burst_map);
/* converts burst-map bitmask to burst array */
for (bit_index = 0; bit_index < 7; bit_index++) {
@@ -270,6 +271,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev,
struct plat_stmmacenet_data *data,
struct stmmac_resources *res)
{
+ struct device *dev = &pdev->dev;
struct tegra_eqos *eqos;
int err;
@@ -282,6 +284,9 @@ static void *tegra_eqos_probe(struct platform_device *pdev,
eqos->dev = &pdev->dev;
eqos->regs = res->addr;
+ if (!is_of_node(dev->fwnode))
+ goto bypass_clk_reset_gpio;
+
eqos->clk_master = devm_clk_get(&pdev->dev, "master_bus");
if (IS_ERR(eqos->clk_master)) {
err = PTR_ERR(eqos->clk_master);
@@ -354,6 +359,7 @@ static void *tegra_eqos_probe(struct platform_device *pdev,
usleep_range(2000, 4000);
+bypass_clk_reset_gpio:
data->fix_mac_speed = tegra_eqos_fix_speed;
data->init = tegra_eqos_init;
data->bsp_priv = eqos;
@@ -421,7 +427,7 @@ static int dwc_eth_dwmac_probe(struct platform_device *pdev)
void *priv;
int ret;
- data = of_device_get_match_data(&pdev->dev);
+ data = device_get_match_data(&pdev->dev);
memset(&stmmac_res, 0, sizeof(struct stmmac_resources));
@@ -478,7 +484,7 @@ static int dwc_eth_dwmac_remove(struct platform_device *pdev)
const struct dwc_eth_dwmac_data *data;
int err;
- data = of_device_get_match_data(&pdev->dev);
+ data = device_get_match_data(&pdev->dev);
err = stmmac_dvr_remove(&pdev->dev);
if (err < 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
index bdb80421acac..9e4b83832938 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c
@@ -55,6 +55,8 @@ struct mediatek_dwmac_plat_data {
struct regmap *peri_regmap;
struct device *dev;
phy_interface_t phy_mode;
+ int num_clks_to_config;
+ bool rmii_clk_from_mac;
bool rmii_rxc;
};
@@ -73,21 +75,33 @@ struct mediatek_dwmac_variant {
/* list of clocks required for mac */
static const char * const mt2712_dwmac_clk_l[] = {
- "axi", "apb", "mac_main", "ptp_ref"
+ "axi", "apb", "mac_main", "ptp_ref", "rmii_internal"
};
static int mt2712_set_interface(struct mediatek_dwmac_plat_data *plat)
{
+ int rmii_clk_from_mac = plat->rmii_clk_from_mac ? RMII_CLK_SRC_INTERNAL : 0;
int rmii_rxc = plat->rmii_rxc ? RMII_CLK_SRC_RXC : 0;
u32 intf_val = 0;
+ /* The clock labeled as "rmii_internal" in mt2712_dwmac_clk_l is needed
+ * only in RMII(when MAC provides the reference clock), and useless for
+ * RGMII/MII/RMII(when PHY provides the reference clock).
+ * num_clks_to_config indicates the real number of clocks should be
+ * configured, equals to (plat->variant->num_clks - 1) in default for all the case,
+ * then +1 for rmii_clk_from_mac case.
+ */
+ plat->num_clks_to_config = plat->variant->num_clks - 1;
+
/* select phy interface in top control domain */
switch (plat->phy_mode) {
case PHY_INTERFACE_MODE_MII:
intf_val |= PHY_INTF_MII;
break;
case PHY_INTERFACE_MODE_RMII:
- intf_val |= (PHY_INTF_RMII | rmii_rxc);
+ if (plat->rmii_clk_from_mac)
+ plat->num_clks_to_config++;
+ intf_val |= (PHY_INTF_RMII | rmii_rxc | rmii_clk_from_mac);
break;
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_TXID:
@@ -173,35 +187,50 @@ static int mt2712_set_delay(struct mediatek_dwmac_plat_data *plat)
delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
break;
case PHY_INTERFACE_MODE_RMII:
- /* the rmii reference clock is from external phy,
- * and the property "rmii_rxc" indicates which pin(TXC/RXC)
- * the reference clk is connected to. The reference clock is a
- * received signal, so rx_delay/rx_inv are used to indicate
- * the reference clock timing adjustment
- */
- if (plat->rmii_rxc) {
- /* the rmii reference clock from outside is connected
- * to RXC pin, the reference clock will be adjusted
- * by RXC delay macro circuit.
- */
- delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay);
- delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay);
- delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
- } else {
- /* the rmii reference clock from outside is connected
- * to TXC pin, the reference clock will be adjusted
- * by TXC delay macro circuit.
+ if (plat->rmii_clk_from_mac) {
+ /* case 1: mac provides the rmii reference clock,
+ * and the clock output to TXC pin.
+ * The egress timing can be adjusted by GTXC delay macro circuit.
+ * The ingress timing can be adjusted by TXC delay macro circuit.
*/
delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay);
delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay);
delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv);
+
+ delay_val |= FIELD_PREP(ETH_DLY_GTXC_ENABLE, !!mac_delay->tx_delay);
+ delay_val |= FIELD_PREP(ETH_DLY_GTXC_STAGES, mac_delay->tx_delay);
+ delay_val |= FIELD_PREP(ETH_DLY_GTXC_INV, mac_delay->tx_inv);
+ } else {
+ /* case 2: the rmii reference clock is from external phy,
+ * and the property "rmii_rxc" indicates which pin(TXC/RXC)
+ * the reference clk is connected to. The reference clock is a
+ * received signal, so rx_delay/rx_inv are used to indicate
+ * the reference clock timing adjustment
+ */
+ if (plat->rmii_rxc) {
+ /* the rmii reference clock from outside is connected
+ * to RXC pin, the reference clock will be adjusted
+ * by RXC delay macro circuit.
+ */
+ delay_val |= FIELD_PREP(ETH_DLY_RXC_ENABLE, !!mac_delay->rx_delay);
+ delay_val |= FIELD_PREP(ETH_DLY_RXC_STAGES, mac_delay->rx_delay);
+ delay_val |= FIELD_PREP(ETH_DLY_RXC_INV, mac_delay->rx_inv);
+ } else {
+ /* the rmii reference clock from outside is connected
+ * to TXC pin, the reference clock will be adjusted
+ * by TXC delay macro circuit.
+ */
+ delay_val |= FIELD_PREP(ETH_DLY_TXC_ENABLE, !!mac_delay->rx_delay);
+ delay_val |= FIELD_PREP(ETH_DLY_TXC_STAGES, mac_delay->rx_delay);
+ delay_val |= FIELD_PREP(ETH_DLY_TXC_INV, mac_delay->rx_inv);
+ }
+ /* tx_inv will inverse the tx clock inside mac relateive to
+ * reference clock from external phy,
+ * and this bit is located in the same register with fine-tune
+ */
+ if (mac_delay->tx_inv)
+ fine_val = ETH_RMII_DLY_TX_INV;
}
- /* tx_inv will inverse the tx clock inside mac relateive to
- * reference clock from external phy,
- * and this bit is located in the same register with fine-tune
- */
- if (mac_delay->tx_inv)
- fine_val = ETH_RMII_DLY_TX_INV;
break;
case PHY_INTERFACE_MODE_RGMII:
case PHY_INTERFACE_MODE_RGMII_TXID:
@@ -278,6 +307,7 @@ static int mediatek_dwmac_config_dt(struct mediatek_dwmac_plat_data *plat)
mac_delay->tx_inv = of_property_read_bool(plat->np, "mediatek,txc-inverse");
mac_delay->rx_inv = of_property_read_bool(plat->np, "mediatek,rxc-inverse");
plat->rmii_rxc = of_property_read_bool(plat->np, "mediatek,rmii-rxc");
+ plat->rmii_clk_from_mac = of_property_read_bool(plat->np, "mediatek,rmii-clk-from-mac");
return 0;
}
@@ -294,6 +324,8 @@ static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat)
for (i = 0; i < num; i++)
plat->clks[i].id = variant->clk_list[i];
+ plat->num_clks_to_config = variant->num_clks;
+
return devm_clk_bulk_get(plat->dev, num, plat->clks);
}
@@ -321,7 +353,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
return ret;
}
- ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks);
+ ret = clk_bulk_prepare_enable(plat->num_clks_to_config, plat->clks);
if (ret) {
dev_err(plat->dev, "failed to enable clks, err = %d\n", ret);
return ret;
@@ -336,9 +368,8 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv)
static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
{
struct mediatek_dwmac_plat_data *plat = priv;
- const struct mediatek_dwmac_variant *variant = plat->variant;
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
+ clk_bulk_disable_unprepare(plat->num_clks_to_config, plat->clks);
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index bd6c01004913..0e2fa14f1423 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -112,6 +112,14 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
struct device *dev = dwmac->dev;
const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS];
struct meson8b_dwmac_clk_configs *clk_configs;
+ static const struct clk_div_table div_table[] = {
+ { .div = 2, .val = 2, },
+ { .div = 3, .val = 3, },
+ { .div = 4, .val = 4, },
+ { .div = 5, .val = 5, },
+ { .div = 6, .val = 6, },
+ { .div = 7, .val = 7, },
+ };
clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL);
if (!clk_configs)
@@ -146,9 +154,9 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac)
clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0;
clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT;
clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH;
- clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED |
- CLK_DIVIDER_ALLOW_ZERO |
- CLK_DIVIDER_ROUND_CLOSEST;
+ clk_configs->m250_div.table = div_table;
+ clk_configs->m250_div.flags = CLK_DIVIDER_ALLOW_ZERO |
+ CLK_DIVIDER_ROUND_CLOSEST;
clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1,
&clk_divider_ops,
&clk_configs->m250_div.hw);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
index 1c8d84ed8410..58e0511badba 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
@@ -335,14 +335,30 @@ static void sun8i_dwmac_dump_mac_regs(struct mac_device_info *hw,
}
}
-static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+static void sun8i_dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan,
+ bool rx, bool tx)
{
- writel(EMAC_RX_INT | EMAC_TX_INT, ioaddr + EMAC_INT_EN);
+ u32 value = readl(ioaddr + EMAC_INT_EN);
+
+ if (rx)
+ value |= EMAC_RX_INT;
+ if (tx)
+ value |= EMAC_TX_INT;
+
+ writel(value, ioaddr + EMAC_INT_EN);
}
-static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+static void sun8i_dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan,
+ bool rx, bool tx)
{
- writel(0, ioaddr + EMAC_INT_EN);
+ u32 value = readl(ioaddr + EMAC_INT_EN);
+
+ if (rx)
+ value &= ~EMAC_RX_INT;
+ if (tx)
+ value &= ~EMAC_TX_INT;
+
+ writel(value, ioaddr + EMAC_INT_EN);
}
static void sun8i_dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
@@ -957,6 +973,9 @@ static int sun8i_dwmac_set_syscon(struct stmmac_priv *priv)
/* default */
break;
case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ case PHY_INTERFACE_MODE_RGMII_RXID:
+ case PHY_INTERFACE_MODE_RGMII_TXID:
reg |= SYSCON_EPIT | SYSCON_ETCS_INT_GMII;
break;
case PHY_INTERFACE_MODE_RMII:
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index 26353ef616b8..7d40760e9ba8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -44,7 +44,7 @@ static int sun7i_gmac_init(struct platform_device *pdev, void *priv)
* rate, which then uses the auto-reparenting feature of the
* clock driver, and enabling/disabling the clock.
*/
- if (gmac->interface == PHY_INTERFACE_MODE_RGMII) {
+ if (phy_interface_mode_is_rgmii(gmac->interface)) {
clk_set_rate(gmac->tx_clk, SUN7I_GMAC_GMII_RGMII_RATE);
clk_prepare_enable(gmac->tx_clk);
gmac->clk_enabled = 1;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 2dc70d104161..2e6b60a476c6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -64,6 +64,8 @@
#define GMAC_RXQCTRL_MCBCQEN_SHIFT 20
#define GMAC_RXQCTRL_TACPQE BIT(21)
#define GMAC_RXQCTRL_TACPQE_SHIFT 21
+#define GMAC_RXQCTRL_FPRQ GENMASK(26, 24)
+#define GMAC_RXQCTRL_FPRQ_SHIFT 24
/* MAC Packet Filtering */
#define GMAC_PACKET_FILTER_PR BIT(0)
@@ -176,6 +178,8 @@ enum power_event {
#define GMAC_CONFIG_SARC GENMASK(30, 28)
#define GMAC_CONFIG_SARC_SHIFT 28
#define GMAC_CONFIG_IPC BIT(27)
+#define GMAC_CONFIG_IPG GENMASK(26, 24)
+#define GMAC_CONFIG_IPG_SHIFT 24
#define GMAC_CONFIG_2K BIT(22)
#define GMAC_CONFIG_ACS BIT(20)
#define GMAC_CONFIG_BE BIT(18)
@@ -183,6 +187,7 @@ enum power_event {
#define GMAC_CONFIG_JE BIT(16)
#define GMAC_CONFIG_PS BIT(15)
#define GMAC_CONFIG_FES BIT(14)
+#define GMAC_CONFIG_FES_SHIFT 14
#define GMAC_CONFIG_DM BIT(13)
#define GMAC_CONFIG_LM BIT(12)
#define GMAC_CONFIG_DCRS BIT(9)
@@ -190,6 +195,9 @@ enum power_event {
#define GMAC_CONFIG_RE BIT(0)
/* MAC extended config */
+#define GMAC_CONFIG_EIPG GENMASK(29, 25)
+#define GMAC_CONFIG_EIPG_SHIFT 25
+#define GMAC_CONFIG_EIPG_EN BIT(24)
#define GMAC_CONFIG_HDSMS GENMASK(22, 20)
#define GMAC_CONFIG_HDSMS_SHIFT 20
#define GMAC_CONFIG_HDSMS_256 (0x2 << GMAC_CONFIG_HDSMS_SHIFT)
@@ -231,6 +239,10 @@ enum power_event {
/* MAC HW features3 bitmap */
#define GMAC_HW_FEAT_ASP GENMASK(29, 28)
+#define GMAC_HW_FEAT_FPESEL BIT(26)
+#define GMAC_HW_FEAT_ESTWID GENMASK(21, 20)
+#define GMAC_HW_FEAT_ESTDEP GENMASK(19, 17)
+#define GMAC_HW_FEAT_ESTSEL BIT(16)
#define GMAC_HW_FEAT_FRPES GENMASK(14, 13)
#define GMAC_HW_FEAT_FRPBS GENMASK(12, 11)
#define GMAC_HW_FEAT_FRPSEL BIT(10)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 40ca00e596dd..f0c0ea616032 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -984,6 +984,8 @@ const struct stmmac_ops dwmac410_ops = {
.set_arp_offload = dwmac4_set_arp_offload,
.config_l3_filter = dwmac4_config_l3_filter,
.config_l4_filter = dwmac4_config_l4_filter,
+ .est_configure = dwmac5_est_configure,
+ .fpe_configure = dwmac5_fpe_configure,
};
const struct stmmac_ops dwmac510_ops = {
@@ -1027,6 +1029,8 @@ const struct stmmac_ops dwmac510_ops = {
.set_arp_offload = dwmac4_set_arp_offload,
.config_l3_filter = dwmac4_config_l3_filter,
.config_l4_filter = dwmac4_config_l4_filter,
+ .est_configure = dwmac5_est_configure,
+ .fpe_configure = dwmac5_fpe_configure,
};
int dwmac4_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index c15409030710..213d44482ffa 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -404,6 +404,10 @@ static void dwmac4_get_hw_feature(void __iomem *ioaddr,
/* 5.10 Features */
dma_cap->asp = (hw_cap & GMAC_HW_FEAT_ASP) >> 28;
+ dma_cap->fpesel = (hw_cap & GMAC_HW_FEAT_FPESEL) >> 26;
+ dma_cap->estwid = (hw_cap & GMAC_HW_FEAT_ESTWID) >> 20;
+ dma_cap->estdep = (hw_cap & GMAC_HW_FEAT_ESTDEP) >> 17;
+ dma_cap->estsel = (hw_cap & GMAC_HW_FEAT_ESTSEL) >> 16;
dma_cap->frpes = (hw_cap & GMAC_HW_FEAT_FRPES) >> 13;
dma_cap->frpbs = (hw_cap & GMAC_HW_FEAT_FRPBS) >> 11;
dma_cap->frpsel = (hw_cap & GMAC_HW_FEAT_FRPSEL) >> 10;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
index 589931795847..bcb6d5190f3d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.h
@@ -168,6 +168,8 @@
/* DMA default interrupt mask for 4.00 */
#define DMA_CHAN_INTR_DEFAULT_MASK (DMA_CHAN_INTR_NORMAL | \
DMA_CHAN_INTR_ABNORMAL)
+#define DMA_CHAN_INTR_DEFAULT_RX (DMA_CHAN_INTR_ENA_RIE)
+#define DMA_CHAN_INTR_DEFAULT_TX (DMA_CHAN_INTR_ENA_TIE)
#define DMA_CHAN_INTR_NORMAL_4_10 (DMA_CHAN_INTR_ENA_NIE_4_10 | \
DMA_CHAN_INTR_ENA_RIE | \
@@ -178,6 +180,8 @@
/* DMA default interrupt mask for 4.10a */
#define DMA_CHAN_INTR_DEFAULT_MASK_4_10 (DMA_CHAN_INTR_NORMAL_4_10 | \
DMA_CHAN_INTR_ABNORMAL_4_10)
+#define DMA_CHAN_INTR_DEFAULT_RX_4_10 (DMA_CHAN_INTR_ENA_RIE)
+#define DMA_CHAN_INTR_DEFAULT_TX_4_10 (DMA_CHAN_INTR_ENA_TIE)
/* channel 0 specific fields */
#define DMA_CHAN0_DBG_STAT_TPS GENMASK(15, 12)
@@ -186,9 +190,10 @@
#define DMA_CHAN0_DBG_STAT_RPS_SHIFT 8
int dwmac4_dma_reset(void __iomem *ioaddr);
-void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan);
-void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan);
-void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
void dwmac4_dma_start_tx(void __iomem *ioaddr, u32 chan);
void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan);
void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index f2a29a90e085..9becca280074 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -97,21 +97,52 @@ void dwmac4_set_rx_ring_len(void __iomem *ioaddr, u32 len, u32 chan)
writel(len, ioaddr + DMA_CHAN_RX_RING_LEN(chan));
}
-void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac4_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
{
- writel(DMA_CHAN_INTR_DEFAULT_MASK, ioaddr +
- DMA_CHAN_INTR_ENA(chan));
+ u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+
+ if (rx)
+ value |= DMA_CHAN_INTR_DEFAULT_RX;
+ if (tx)
+ value |= DMA_CHAN_INTR_DEFAULT_TX;
+
+ writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
}
-void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac410_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
{
- writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10,
- ioaddr + DMA_CHAN_INTR_ENA(chan));
+ u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+
+ if (rx)
+ value |= DMA_CHAN_INTR_DEFAULT_RX_4_10;
+ if (tx)
+ value |= DMA_CHAN_INTR_DEFAULT_TX_4_10;
+
+ writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
}
-void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac4_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
{
- writel(0, ioaddr + DMA_CHAN_INTR_ENA(chan));
+ u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+
+ if (rx)
+ value &= ~DMA_CHAN_INTR_DEFAULT_RX;
+ if (tx)
+ value &= ~DMA_CHAN_INTR_DEFAULT_TX;
+
+ writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
+}
+
+void dwmac410_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
+{
+ u32 value = readl(ioaddr + DMA_CHAN_INTR_ENA(chan));
+
+ if (rx)
+ value &= ~DMA_CHAN_INTR_DEFAULT_RX_4_10;
+ if (tx)
+ value &= ~DMA_CHAN_INTR_DEFAULT_TX_4_10;
+
+ writel(value, ioaddr + DMA_CHAN_INTR_ENA(chan));
}
int dwmac4_dma_interrupt(void __iomem *ioaddr,
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
index e436fa160c7d..494c859b4ade 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.c
@@ -550,3 +550,122 @@ int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
writel(val, ioaddr + MAC_PPS_CONTROL);
return 0;
}
+
+static int dwmac5_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl)
+{
+ u32 ctrl;
+
+ writel(val, ioaddr + MTL_EST_GCL_DATA);
+
+ ctrl = (reg << ADDR_SHIFT);
+ ctrl |= gcl ? 0 : GCRR;
+
+ writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL);
+
+ ctrl |= SRWO;
+ writel(ctrl, ioaddr + MTL_EST_GCL_CONTROL);
+
+ return readl_poll_timeout(ioaddr + MTL_EST_GCL_CONTROL,
+ ctrl, !(ctrl & SRWO), 100, 5000);
+}
+
+int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
+ unsigned int ptp_rate)
+{
+ u32 speed, total_offset, offset, ctrl, ctr_low;
+ u32 extcfg = readl(ioaddr + GMAC_EXT_CONFIG);
+ u32 mac_cfg = readl(ioaddr + GMAC_CONFIG);
+ int i, ret = 0x0;
+ u64 total_ctr;
+
+ if (extcfg & GMAC_CONFIG_EIPG_EN) {
+ offset = (extcfg & GMAC_CONFIG_EIPG) >> GMAC_CONFIG_EIPG_SHIFT;
+ offset = 104 + (offset * 8);
+ } else {
+ offset = (mac_cfg & GMAC_CONFIG_IPG) >> GMAC_CONFIG_IPG_SHIFT;
+ offset = 96 - (offset * 8);
+ }
+
+ speed = mac_cfg & (GMAC_CONFIG_PS | GMAC_CONFIG_FES);
+ speed = speed >> GMAC_CONFIG_FES_SHIFT;
+
+ switch (speed) {
+ case 0x0:
+ offset = offset * 1000; /* 1G */
+ break;
+ case 0x1:
+ offset = offset * 400; /* 2.5G */
+ break;
+ case 0x2:
+ offset = offset * 100000; /* 10M */
+ break;
+ case 0x3:
+ offset = offset * 10000; /* 100M */
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ offset = offset / 1000;
+
+ ret |= dwmac5_est_write(ioaddr, BTR_LOW, cfg->btr[0], false);
+ ret |= dwmac5_est_write(ioaddr, BTR_HIGH, cfg->btr[1], false);
+ ret |= dwmac5_est_write(ioaddr, TER, cfg->ter, false);
+ ret |= dwmac5_est_write(ioaddr, LLR, cfg->gcl_size, false);
+ if (ret)
+ return ret;
+
+ total_offset = 0;
+ for (i = 0; i < cfg->gcl_size; i++) {
+ ret = dwmac5_est_write(ioaddr, i, cfg->gcl[i] + offset, true);
+ if (ret)
+ return ret;
+
+ total_offset += offset;
+ }
+
+ total_ctr = cfg->ctr[0] + cfg->ctr[1] * 1000000000;
+ total_ctr += total_offset;
+
+ ctr_low = do_div(total_ctr, 1000000000);
+
+ ret |= dwmac5_est_write(ioaddr, CTR_LOW, ctr_low, false);
+ ret |= dwmac5_est_write(ioaddr, CTR_HIGH, total_ctr, false);
+ if (ret)
+ return ret;
+
+ ctrl = readl(ioaddr + MTL_EST_CONTROL);
+ ctrl &= ~PTOV;
+ ctrl |= ((1000000000 / ptp_rate) * 6) << PTOV_SHIFT;
+ if (cfg->enable)
+ ctrl |= EEST | SSWL;
+ else
+ ctrl &= ~EEST;
+
+ writel(ctrl, ioaddr + MTL_EST_CONTROL);
+ return 0;
+}
+
+void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+ bool enable)
+{
+ u32 value;
+
+ if (!enable) {
+ value = readl(ioaddr + MAC_FPE_CTRL_STS);
+
+ value &= ~EFPE;
+
+ writel(value, ioaddr + MAC_FPE_CTRL_STS);
+ return;
+ }
+
+ value = readl(ioaddr + GMAC_RXQ_CTRL1);
+ value &= ~GMAC_RXQCTRL_FPRQ;
+ value |= (num_rxq - 1) << GMAC_RXQCTRL_FPRQ_SHIFT;
+ writel(value, ioaddr + GMAC_RXQ_CTRL1);
+
+ value = readl(ioaddr + MAC_FPE_CTRL_STS);
+ value |= EFPE;
+ writel(value, ioaddr + MAC_FPE_CTRL_STS);
+}
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
index 23fecf68f781..3e8faa96b4d4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac5.h
@@ -11,6 +11,9 @@
#define PRTYEN BIT(1)
#define TMOUTEN BIT(0)
+#define MAC_FPE_CTRL_STS 0x00000234
+#define EFPE BIT(0)
+
#define MAC_PPS_CONTROL 0x00000b70
#define PPS_MAXIDX(x) ((((x) + 1) * 8) - 1)
#define PPS_MINIDX(x) ((x) * 8)
@@ -30,6 +33,23 @@
#define MAC_PPSx_INTERVAL(x) (0x00000b88 + ((x) * 0x10))
#define MAC_PPSx_WIDTH(x) (0x00000b8c + ((x) * 0x10))
+#define MTL_EST_CONTROL 0x00000c50
+#define PTOV GENMASK(31, 24)
+#define PTOV_SHIFT 24
+#define SSWL BIT(1)
+#define EEST BIT(0)
+#define MTL_EST_GCL_CONTROL 0x00000c80
+#define BTR_LOW 0x0
+#define BTR_HIGH 0x1
+#define CTR_LOW 0x2
+#define CTR_HIGH 0x3
+#define TER 0x4
+#define LLR 0x5
+#define ADDR_SHIFT 8
+#define GCRR BIT(2)
+#define SRWO BIT(0)
+#define MTL_EST_GCL_DATA 0x00000c84
+
#define MTL_RXP_CONTROL_STATUS 0x00000ca0
#define RXPI BIT(31)
#define NPE GENMASK(23, 16)
@@ -83,5 +103,9 @@ int dwmac5_rxp_config(void __iomem *ioaddr, struct stmmac_tc_entry *entries,
int dwmac5_flex_pps_config(void __iomem *ioaddr, int index,
struct stmmac_pps_cfg *cfg, bool enable,
u32 sub_second_inc, u32 systime_flags);
+int dwmac5_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
+ unsigned int ptp_rate);
+void dwmac5_fpe_configure(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+ bool enable);
#endif /* __DWMAC5_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
index 292b880f3f9f..e5dbd0bc257e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_dma.h
@@ -96,6 +96,8 @@
/* DMA default interrupt mask */
#define DMA_INTR_DEFAULT_MASK (DMA_INTR_NORMAL | DMA_INTR_ABNORMAL)
+#define DMA_INTR_DEFAULT_RX (DMA_INTR_ENA_RIE)
+#define DMA_INTR_DEFAULT_TX (DMA_INTR_ENA_TIE)
/* DMA Status register defines */
#define DMA_STATUS_GLPII 0x40000000 /* GMAC LPI interrupt */
@@ -130,8 +132,8 @@
#define NUM_DWMAC1000_DMA_REGS 23
void dwmac_enable_dma_transmission(void __iomem *ioaddr);
-void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan);
-void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan);
+void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
+void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx);
void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan);
void dwmac_dma_stop_tx(void __iomem *ioaddr, u32 chan);
void dwmac_dma_start_rx(void __iomem *ioaddr, u32 chan);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
index 1bc25aa86dbd..688d36095333 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c
@@ -37,14 +37,28 @@ void dwmac_enable_dma_transmission(void __iomem *ioaddr)
writel(1, ioaddr + DMA_XMT_POLL_DEMAND);
}
-void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac_enable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
{
- writel(DMA_INTR_DEFAULT_MASK, ioaddr + DMA_INTR_ENA);
+ u32 value = readl(ioaddr + DMA_INTR_ENA);
+
+ if (rx)
+ value |= DMA_INTR_DEFAULT_RX;
+ if (tx)
+ value |= DMA_INTR_DEFAULT_TX;
+
+ writel(value, ioaddr + DMA_INTR_ENA);
}
-void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+void dwmac_disable_dma_irq(void __iomem *ioaddr, u32 chan, bool rx, bool tx)
{
- writel(0, ioaddr + DMA_INTR_ENA);
+ u32 value = readl(ioaddr + DMA_INTR_ENA);
+
+ if (rx)
+ value &= ~DMA_INTR_DEFAULT_RX;
+ if (tx)
+ value &= ~DMA_INTR_DEFAULT_TX;
+
+ writel(value, ioaddr + DMA_INTR_ENA);
}
void dwmac_dma_start_tx(void __iomem *ioaddr, u32 chan)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
index 3b6e559aa0b9..64d13e50e403 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h
@@ -73,6 +73,9 @@
#define XGMAC_RXQ_CTRL0 0x000000a0
#define XGMAC_RXQEN(x) GENMASK((x) * 2 + 1, (x) * 2)
#define XGMAC_RXQEN_SHIFT(x) ((x) * 2)
+#define XGMAC_RXQ_CTRL1 0x000000a4
+#define XGMAC_RQ GENMASK(7, 4)
+#define XGMAC_RQ_SHIFT 4
#define XGMAC_RXQ_CTRL2 0x000000a8
#define XGMAC_RXQ_CTRL3 0x000000ac
#define XGMAC_PSRQ(x) GENMASK((x) * 8 + 7, (x) * 8)
@@ -136,6 +139,10 @@
#define XGMAC_HWFEAT_TXQCNT GENMASK(9, 6)
#define XGMAC_HWFEAT_RXQCNT GENMASK(3, 0)
#define XGMAC_HW_FEATURE3 0x00000128
+#define XGMAC_HWFEAT_FPESEL BIT(26)
+#define XGMAC_HWFEAT_ESTWID GENMASK(24, 23)
+#define XGMAC_HWFEAT_ESTDEP GENMASK(22, 20)
+#define XGMAC_HWFEAT_ESTSEL BIT(19)
#define XGMAC_HWFEAT_ASP GENMASK(15, 14)
#define XGMAC_HWFEAT_DVLAN BIT(13)
#define XGMAC_HWFEAT_FRPES GENMASK(12, 11)
@@ -148,6 +155,8 @@
#define XGMAC_MDIO_ADDR 0x00000200
#define XGMAC_MDIO_DATA 0x00000204
#define XGMAC_MDIO_C22P 0x00000220
+#define XGMAC_FPE_CTRL_STS 0x00000280
+#define XGMAC_EFPE BIT(0)
#define XGMAC_ADDRx_HIGH(x) (0x00000300 + (x) * 0x8)
#define XGMAC_ADDR_MAX 32
#define XGMAC_AE BIT(31)
@@ -237,6 +246,22 @@
#define XGMAC_TC_PRTY_MAP1 0x00001044
#define XGMAC_PSTC(x) GENMASK((x) * 8 + 7, (x) * 8)
#define XGMAC_PSTC_SHIFT(x) ((x) * 8)
+#define XGMAC_MTL_EST_CONTROL 0x00001050
+#define XGMAC_PTOV GENMASK(31, 23)
+#define XGMAC_PTOV_SHIFT 23
+#define XGMAC_SSWL BIT(1)
+#define XGMAC_EEST BIT(0)
+#define XGMAC_MTL_EST_GCL_CONTROL 0x00001080
+#define XGMAC_BTR_LOW 0x0
+#define XGMAC_BTR_HIGH 0x1
+#define XGMAC_CTR_LOW 0x2
+#define XGMAC_CTR_HIGH 0x3
+#define XGMAC_TER 0x4
+#define XGMAC_LLR 0x5
+#define XGMAC_ADDR_SHIFT 8
+#define XGMAC_GCRR BIT(2)
+#define XGMAC_SRWO BIT(0)
+#define XGMAC_MTL_EST_GCL_DATA 0x00001084
#define XGMAC_MTL_RXP_CONTROL_STATUS 0x000010a0
#define XGMAC_RXPI BIT(31)
#define XGMAC_NPE GENMASK(23, 16)
@@ -343,6 +368,8 @@
#define XGMAC_DMA_CH_RX_CONTROL(x) (0x00003108 + (0x80 * (x)))
#define XGMAC_RxPBL GENMASK(21, 16)
#define XGMAC_RxPBL_SHIFT 16
+#define XGMAC_RBSZ GENMASK(14, 1)
+#define XGMAC_RBSZ_SHIFT 1
#define XGMAC_RXST BIT(0)
#define XGMAC_DMA_CH_TxDESC_HADDR(x) (0x00003110 + (0x80 * (x)))
#define XGMAC_DMA_CH_TxDESC_LADDR(x) (0x00003114 + (0x80 * (x)))
@@ -361,6 +388,8 @@
#define XGMAC_TIE BIT(0)
#define XGMAC_DMA_INT_DEFAULT_EN (XGMAC_NIE | XGMAC_AIE | XGMAC_RBUE | \
XGMAC_RIE | XGMAC_TIE)
+#define XGMAC_DMA_INT_DEFAULT_RX (XGMAC_RBUE | XGMAC_RIE)
+#define XGMAC_DMA_INT_DEFAULT_TX (XGMAC_TIE)
#define XGMAC_DMA_CH_Rx_WATCHDOG(x) (0x0000313c + (0x80 * (x)))
#define XGMAC_RWT GENMASK(7, 0)
#define XGMAC_DMA_CH_STATUS(x) (0x00003160 + (0x80 * (x)))
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
index 082f5ee9e525..2af3ac5409b7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c
@@ -1359,6 +1359,81 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en,
writel(value, ioaddr + XGMAC_RX_CONFIG);
}
+static int dwxgmac3_est_write(void __iomem *ioaddr, u32 reg, u32 val, bool gcl)
+{
+ u32 ctrl;
+
+ writel(val, ioaddr + XGMAC_MTL_EST_GCL_DATA);
+
+ ctrl = (reg << XGMAC_ADDR_SHIFT);
+ ctrl |= gcl ? 0 : XGMAC_GCRR;
+
+ writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL);
+
+ ctrl |= XGMAC_SRWO;
+ writel(ctrl, ioaddr + XGMAC_MTL_EST_GCL_CONTROL);
+
+ return readl_poll_timeout_atomic(ioaddr + XGMAC_MTL_EST_GCL_CONTROL,
+ ctrl, !(ctrl & XGMAC_SRWO), 100, 5000);
+}
+
+static int dwxgmac3_est_configure(void __iomem *ioaddr, struct stmmac_est *cfg,
+ unsigned int ptp_rate)
+{
+ int i, ret = 0x0;
+ u32 ctrl;
+
+ ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_LOW, cfg->btr[0], false);
+ ret |= dwxgmac3_est_write(ioaddr, XGMAC_BTR_HIGH, cfg->btr[1], false);
+ ret |= dwxgmac3_est_write(ioaddr, XGMAC_TER, cfg->ter, false);
+ ret |= dwxgmac3_est_write(ioaddr, XGMAC_LLR, cfg->gcl_size, false);
+ ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_LOW, cfg->ctr[0], false);
+ ret |= dwxgmac3_est_write(ioaddr, XGMAC_CTR_HIGH, cfg->ctr[1], false);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < cfg->gcl_size; i++) {
+ ret = dwxgmac3_est_write(ioaddr, i, cfg->gcl[i], true);
+ if (ret)
+ return ret;
+ }
+
+ ctrl = readl(ioaddr + XGMAC_MTL_EST_CONTROL);
+ ctrl &= ~XGMAC_PTOV;
+ ctrl |= ((1000000000 / ptp_rate) * 9) << XGMAC_PTOV_SHIFT;
+ if (cfg->enable)
+ ctrl |= XGMAC_EEST | XGMAC_SSWL;
+ else
+ ctrl &= ~XGMAC_EEST;
+
+ writel(ctrl, ioaddr + XGMAC_MTL_EST_CONTROL);
+ return 0;
+}
+
+static void dwxgmac3_fpe_configure(void __iomem *ioaddr, u32 num_txq,
+ u32 num_rxq, bool enable)
+{
+ u32 value;
+
+ if (!enable) {
+ value = readl(ioaddr + XGMAC_FPE_CTRL_STS);
+
+ value &= ~XGMAC_EFPE;
+
+ writel(value, ioaddr + XGMAC_FPE_CTRL_STS);
+ return;
+ }
+
+ value = readl(ioaddr + XGMAC_RXQ_CTRL1);
+ value &= ~XGMAC_RQ;
+ value |= (num_rxq - 1) << XGMAC_RQ_SHIFT;
+ writel(value, ioaddr + XGMAC_RXQ_CTRL1);
+
+ value = readl(ioaddr + XGMAC_FPE_CTRL_STS);
+ value |= XGMAC_EFPE;
+ writel(value, ioaddr + XGMAC_FPE_CTRL_STS);
+}
+
const struct stmmac_ops dwxgmac210_ops = {
.core_init = dwxgmac2_core_init,
.set_mac = dwxgmac2_set_mac,
@@ -1402,6 +1477,8 @@ const struct stmmac_ops dwxgmac210_ops = {
.config_l3_filter = dwxgmac2_config_l3_filter,
.config_l4_filter = dwxgmac2_config_l4_filter,
.set_arp_offload = dwxgmac2_set_arp_offload,
+ .est_configure = dwxgmac3_est_configure,
+ .fpe_configure = dwxgmac3_fpe_configure,
};
int dwxgmac2_setup(struct stmmac_priv *priv)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
index 22a7f0cc1b90..bbbfa793a367 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c
@@ -248,14 +248,30 @@ static void dwxgmac2_dma_tx_mode(void __iomem *ioaddr, int mode,
writel(value, ioaddr + XGMAC_MTL_TXQ_OPMODE(channel));
}
-static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan)
+static void dwxgmac2_enable_dma_irq(void __iomem *ioaddr, u32 chan,
+ bool rx, bool tx)
{
- writel(XGMAC_DMA_INT_DEFAULT_EN, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
+ u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
+
+ if (rx)
+ value |= XGMAC_DMA_INT_DEFAULT_RX;
+ if (tx)
+ value |= XGMAC_DMA_INT_DEFAULT_TX;
+
+ writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
}
-static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan)
+static void dwxgmac2_disable_dma_irq(void __iomem *ioaddr, u32 chan,
+ bool rx, bool tx)
{
- writel(0, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
+ u32 value = readl(ioaddr + XGMAC_DMA_CH_INT_EN(chan));
+
+ if (rx)
+ value &= ~XGMAC_DMA_INT_DEFAULT_RX;
+ if (tx)
+ value &= ~XGMAC_DMA_INT_DEFAULT_TX;
+
+ writel(value, ioaddr + XGMAC_DMA_CH_INT_EN(chan));
}
static void dwxgmac2_dma_start_tx(void __iomem *ioaddr, u32 chan)
@@ -413,6 +429,10 @@ static void dwxgmac2_get_hw_feature(void __iomem *ioaddr,
/* MAC HW feature 3 */
hw_cap = readl(ioaddr + XGMAC_HW_FEATURE3);
+ dma_cap->fpesel = (hw_cap & XGMAC_HWFEAT_FPESEL) >> 26;
+ dma_cap->estwid = (hw_cap & XGMAC_HWFEAT_ESTWID) >> 23;
+ dma_cap->estdep = (hw_cap & XGMAC_HWFEAT_ESTDEP) >> 20;
+ dma_cap->estsel = (hw_cap & XGMAC_HWFEAT_ESTSEL) >> 19;
dma_cap->asp = (hw_cap & XGMAC_HWFEAT_ASP) >> 14;
dma_cap->dvlan = (hw_cap & XGMAC_HWFEAT_DVLAN) >> 13;
dma_cap->frpes = (hw_cap & XGMAC_HWFEAT_FRPES) >> 11;
@@ -482,7 +502,8 @@ static void dwxgmac2_set_bfsize(void __iomem *ioaddr, int bfsize, u32 chan)
u32 value;
value = readl(ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
- value |= bfsize << 1;
+ value &= ~XGMAC_RBSZ;
+ value |= bfsize << XGMAC_RBSZ_SHIFT;
writel(value, ioaddr + XGMAC_DMA_CH_RX_CONTROL(chan));
}
diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h
index aa5b917398fe..905a6f0edaca 100644
--- a/drivers/net/ethernet/stmicro/stmmac/hwif.h
+++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h
@@ -187,8 +187,10 @@ struct stmmac_dma_ops {
void (*dma_diagnostic_fr) (void *data, struct stmmac_extra_stats *x,
void __iomem *ioaddr);
void (*enable_dma_transmission) (void __iomem *ioaddr);
- void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan);
- void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan);
+ void (*enable_dma_irq)(void __iomem *ioaddr, u32 chan,
+ bool rx, bool tx);
+ void (*disable_dma_irq)(void __iomem *ioaddr, u32 chan,
+ bool rx, bool tx);
void (*start_tx)(void __iomem *ioaddr, u32 chan);
void (*stop_tx)(void __iomem *ioaddr, u32 chan);
void (*start_rx)(void __iomem *ioaddr, u32 chan);
@@ -274,6 +276,7 @@ struct stmmac_safety_stats;
struct stmmac_tc_entry;
struct stmmac_pps_cfg;
struct stmmac_rss;
+struct stmmac_est;
/* Helpers to program the MAC core */
struct stmmac_ops {
@@ -371,6 +374,10 @@ struct stmmac_ops {
bool en, bool udp, bool sa, bool inv,
u32 match);
void (*set_arp_offload)(struct mac_device_info *hw, bool en, u32 addr);
+ int (*est_configure)(void __iomem *ioaddr, struct stmmac_est *cfg,
+ unsigned int ptp_rate);
+ void (*fpe_configure)(void __iomem *ioaddr, u32 num_txq, u32 num_rxq,
+ bool enable);
};
#define stmmac_core_init(__priv, __args...) \
@@ -457,6 +464,10 @@ struct stmmac_ops {
stmmac_do_callback(__priv, mac, config_l4_filter, __args)
#define stmmac_set_arp_offload(__priv, __args...) \
stmmac_do_void_callback(__priv, mac, set_arp_offload, __args)
+#define stmmac_est_configure(__priv, __args...) \
+ stmmac_do_callback(__priv, mac, est_configure, __args)
+#define stmmac_fpe_configure(__priv, __args...) \
+ stmmac_do_void_callback(__priv, mac, fpe_configure, __args)
/* PTP and HW Timer helpers */
struct stmmac_hwtimestamp {
@@ -514,6 +525,7 @@ struct stmmac_priv;
struct tc_cls_u32_offload;
struct tc_cbs_qopt_offload;
struct flow_cls_offload;
+struct tc_taprio_qopt_offload;
struct stmmac_tc_ops {
int (*init)(struct stmmac_priv *priv);
@@ -523,6 +535,8 @@ struct stmmac_tc_ops {
struct tc_cbs_qopt_offload *qopt);
int (*setup_cls)(struct stmmac_priv *priv,
struct flow_cls_offload *cls);
+ int (*setup_taprio)(struct stmmac_priv *priv,
+ struct tc_taprio_qopt_offload *qopt);
};
#define stmmac_tc_init(__priv, __args...) \
@@ -533,6 +547,8 @@ struct stmmac_tc_ops {
stmmac_do_callback(__priv, tc, setup_cbs, __args)
#define stmmac_tc_setup_cls(__priv, __args...) \
stmmac_do_callback(__priv, tc, setup_cls, __args)
+#define stmmac_tc_setup_taprio(__priv, __args...) \
+ stmmac_do_callback(__priv, tc, setup_taprio, __args)
struct stmmac_counters;
diff --git a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
index 252cf48c5816..a57b0fa815ab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/mmc_core.c
@@ -119,6 +119,13 @@
#define MMC_RX_ICMP_GD_OCTETS 0x180
#define MMC_RX_ICMP_ERR_OCTETS 0x184
+#define MMC_TX_FPE_FRAG 0x1a8
+#define MMC_TX_HOLD_REQ 0x1ac
+#define MMC_RX_PKT_ASSEMBLY_ERR 0x1c8
+#define MMC_RX_PKT_SMD_ERR 0x1cc
+#define MMC_RX_PKT_ASSEMBLY_OK 0x1d0
+#define MMC_RX_FPE_FRAG 0x1d4
+
/* XGMAC MMC Registers */
#define MMC_XGMAC_TX_OCTET_GB 0x14
#define MMC_XGMAC_TX_PKT_GB 0x1c
@@ -315,6 +322,15 @@ static void dwmac_mmc_read(void __iomem *mmcaddr, struct stmmac_counters *mmc)
mmc->mmc_rx_tcp_err_octets += readl(mmcaddr + MMC_RX_TCP_ERR_OCTETS);
mmc->mmc_rx_icmp_gd_octets += readl(mmcaddr + MMC_RX_ICMP_GD_OCTETS);
mmc->mmc_rx_icmp_err_octets += readl(mmcaddr + MMC_RX_ICMP_ERR_OCTETS);
+
+ mmc->mmc_tx_fpe_fragment_cntr += readl(mmcaddr + MMC_TX_FPE_FRAG);
+ mmc->mmc_tx_hold_req_cntr += readl(mmcaddr + MMC_TX_HOLD_REQ);
+ mmc->mmc_rx_packet_assembly_err_cntr +=
+ readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_ERR);
+ mmc->mmc_rx_packet_smd_err_cntr += readl(mmcaddr + MMC_RX_PKT_SMD_ERR);
+ mmc->mmc_rx_packet_assembly_ok_cntr +=
+ readl(mmcaddr + MMC_RX_PKT_ASSEMBLY_OK);
+ mmc->mmc_rx_fpe_fragment_cntr += readl(mmcaddr + MMC_RX_FPE_FRAG);
}
const struct stmmac_mmc_ops dwmac_mmc_ops = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index d993fc7e82c3..f98c5eefb382 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -88,6 +88,7 @@ struct stmmac_channel {
struct napi_struct rx_napi ____cacheline_aligned_in_smp;
struct napi_struct tx_napi ____cacheline_aligned_in_smp;
struct stmmac_priv *priv_data;
+ spinlock_t lock;
u32 index;
};
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 1a768837ca72..b29603ec744c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -34,7 +34,7 @@ struct stmmac_stats {
};
#define STMMAC_STAT(m) \
- { #m, FIELD_SIZEOF(struct stmmac_extra_stats, m), \
+ { #m, sizeof_field(struct stmmac_extra_stats, m), \
offsetof(struct stmmac_priv, xstats.m)}
static const struct stmmac_stats stmmac_gstrings_stats[] = {
@@ -163,7 +163,7 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
/* HW MAC Management counters (if supported) */
#define STMMAC_MMC_STAT(m) \
- { #m, FIELD_SIZEOF(struct stmmac_counters, m), \
+ { #m, sizeof_field(struct stmmac_counters, m), \
offsetof(struct stmmac_priv, mmc.m)}
static const struct stmmac_stats stmmac_mmc[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index bbc65bd332a8..f113138df0d9 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -46,7 +46,7 @@
#include "dwxgmac2.h"
#include "hwif.h"
-#define STMMAC_ALIGN(x) __ALIGN_KERNEL(x, SMP_CACHE_BYTES)
+#define STMMAC_ALIGN(x) ALIGN(ALIGN(x, SMP_CACHE_BYTES), 16)
#define TSO_MAX_BUFF_SIZE (SZ_16K - 1)
/* Module parameters */
@@ -106,6 +106,7 @@ MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode");
static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
#ifdef CONFIG_DEBUG_FS
+static const struct net_device_ops stmmac_netdev_ops;
static void stmmac_init_fs(struct net_device *dev);
static void stmmac_exit_fs(struct net_device *dev);
#endif
@@ -1109,7 +1110,9 @@ static int stmmac_set_bfsize(int mtu, int bufsize)
{
int ret = bufsize;
- if (mtu >= BUF_SIZE_4KiB)
+ if (mtu >= BUF_SIZE_8KiB)
+ ret = BUF_SIZE_16KiB;
+ else if (mtu >= BUF_SIZE_4KiB)
ret = BUF_SIZE_8KiB;
else if (mtu >= BUF_SIZE_2KiB)
ret = BUF_SIZE_4KiB;
@@ -1293,19 +1296,9 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
struct stmmac_priv *priv = netdev_priv(dev);
u32 rx_count = priv->plat->rx_queues_to_use;
int ret = -ENOMEM;
- int bfsize = 0;
int queue;
int i;
- bfsize = stmmac_set_16kib_bfsize(priv, dev->mtu);
- if (bfsize < 0)
- bfsize = 0;
-
- if (bfsize < BUF_SIZE_16KiB)
- bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz);
-
- priv->dma_buf_sz = bfsize;
-
/* RX INITIALIZATION */
netif_dbg(priv, probe, priv->dev,
"SKB addresses:\nskb\t\tskb data\tdma data\n");
@@ -1347,8 +1340,6 @@ static int init_dma_rx_desc_rings(struct net_device *dev, gfp_t flags)
}
}
- buf_sz = bfsize;
-
return 0;
err_init_rx_buffers:
@@ -1975,7 +1966,7 @@ static int stmmac_tx_clean(struct stmmac_priv *priv, int budget, u32 queue)
/* We still have pending packets, let's call for a new scheduling */
if (tx_q->dirty_tx != tx_q->cur_tx)
- mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
+ mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(priv->tx_coal_timer));
__netif_tx_unlock_bh(netdev_get_tx_queue(priv->dev, queue));
@@ -2069,17 +2060,25 @@ static int stmmac_napi_check(struct stmmac_priv *priv, u32 chan)
int status = stmmac_dma_interrupt_status(priv, priv->ioaddr,
&priv->xstats, chan);
struct stmmac_channel *ch = &priv->channel[chan];
+ unsigned long flags;
if ((status & handle_rx) && (chan < priv->plat->rx_queues_to_use)) {
if (napi_schedule_prep(&ch->rx_napi)) {
- stmmac_disable_dma_irq(priv, priv->ioaddr, chan);
+ spin_lock_irqsave(&ch->lock, flags);
+ stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 1, 0);
+ spin_unlock_irqrestore(&ch->lock, flags);
__napi_schedule_irqoff(&ch->rx_napi);
- status |= handle_tx;
}
}
- if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use))
- napi_schedule_irqoff(&ch->tx_napi);
+ if ((status & handle_tx) && (chan < priv->plat->tx_queues_to_use)) {
+ if (napi_schedule_prep(&ch->tx_napi)) {
+ spin_lock_irqsave(&ch->lock, flags);
+ stmmac_disable_dma_irq(priv, priv->ioaddr, chan, 0, 1);
+ spin_unlock_irqrestore(&ch->lock, flags);
+ __napi_schedule_irqoff(&ch->tx_napi);
+ }
+ }
return status;
}
@@ -2274,14 +2273,14 @@ static void stmmac_tx_timer(struct timer_list *t)
ch = &priv->channel[tx_q->queue_index];
- /*
- * If NAPI is already running we can miss some events. Let's rearm
- * the timer and try again.
- */
- if (likely(napi_schedule_prep(&ch->tx_napi)))
+ if (likely(napi_schedule_prep(&ch->tx_napi))) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&ch->lock, flags);
+ stmmac_disable_dma_irq(priv, priv->ioaddr, ch->index, 0, 1);
+ spin_unlock_irqrestore(&ch->lock, flags);
__napi_schedule(&ch->tx_napi);
- else
- mod_timer(&tx_q->txtimer, STMMAC_COAL_TIMER(10));
+ }
}
/**
@@ -2658,6 +2657,7 @@ static void stmmac_hw_teardown(struct net_device *dev)
static int stmmac_open(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
+ int bfsize = 0;
u32 chan;
int ret;
@@ -2677,7 +2677,16 @@ static int stmmac_open(struct net_device *dev)
memset(&priv->xstats, 0, sizeof(struct stmmac_extra_stats));
priv->xstats.threshold = tc;
- priv->dma_buf_sz = STMMAC_ALIGN(buf_sz);
+ bfsize = stmmac_set_16kib_bfsize(priv, dev->mtu);
+ if (bfsize < 0)
+ bfsize = 0;
+
+ if (bfsize < BUF_SIZE_16KiB)
+ bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz);
+
+ priv->dma_buf_sz = bfsize;
+ buf_sz = bfsize;
+
priv->rx_copybreak = STMMAC_RX_COPYBREAK;
ret = alloc_dma_desc_resources(priv);
@@ -3053,8 +3062,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
- } else {
- stmmac_tx_timer_arm(priv, queue);
}
/* We've used all descriptors we need for this skb, however,
@@ -3125,6 +3132,7 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+ stmmac_tx_timer_arm(priv, queue);
return NETDEV_TX_OK;
@@ -3276,8 +3284,6 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_count_frames = 0;
stmmac_set_tx_ic(priv, desc);
priv->xstats.tx_set_ic_bit++;
- } else {
- stmmac_tx_timer_arm(priv, queue);
}
/* We've used all descriptors we need for this skb, however,
@@ -3366,6 +3372,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
tx_q->tx_tail_addr = tx_q->dma_tx_phy + (tx_q->cur_tx * sizeof(*desc));
stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr, queue);
+ stmmac_tx_timer_arm(priv, queue);
return NETDEV_TX_OK;
@@ -3646,8 +3653,9 @@ read_again:
* feature is always disabled and packets need to be
* stripped manually.
*/
- if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
- unlikely(status != llc_snap)) {
+ if (likely(!(status & rx_not_ls)) &&
+ (likely(priv->synopsys_id >= DWMAC_CORE_4_00) ||
+ unlikely(status != llc_snap))) {
if (buf2_len)
buf2_len -= ETH_FCS_LEN;
else
@@ -3751,8 +3759,14 @@ static int stmmac_napi_poll_rx(struct napi_struct *napi, int budget)
priv->xstats.napi_poll++;
work_done = stmmac_rx(priv, budget, chan);
- if (work_done < budget && napi_complete_done(napi, work_done))
- stmmac_enable_dma_irq(priv, priv->ioaddr, chan);
+ if (work_done < budget && napi_complete_done(napi, work_done)) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&ch->lock, flags);
+ stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 1, 0);
+ spin_unlock_irqrestore(&ch->lock, flags);
+ }
+
return work_done;
}
@@ -3761,7 +3775,6 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
struct stmmac_channel *ch =
container_of(napi, struct stmmac_channel, tx_napi);
struct stmmac_priv *priv = ch->priv_data;
- struct stmmac_tx_queue *tx_q;
u32 chan = ch->index;
int work_done;
@@ -3770,15 +3783,12 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
work_done = stmmac_tx_clean(priv, DMA_TX_SIZE, chan);
work_done = min(work_done, budget);
- if (work_done < budget)
- napi_complete_done(napi, work_done);
+ if (work_done < budget && napi_complete_done(napi, work_done)) {
+ unsigned long flags;
- /* Force transmission restart */
- tx_q = &priv->tx_queue[chan];
- if (tx_q->cur_tx != tx_q->dirty_tx) {
- stmmac_enable_dma_transmission(priv, priv->ioaddr);
- stmmac_set_tx_tail_ptr(priv, priv->ioaddr, tx_q->tx_tail_addr,
- chan);
+ spin_lock_irqsave(&ch->lock, flags);
+ stmmac_enable_dma_irq(priv, priv->ioaddr, chan, 0, 1);
+ spin_unlock_irqrestore(&ch->lock, flags);
}
return work_done;
@@ -3792,7 +3802,7 @@ static int stmmac_napi_poll_tx(struct napi_struct *napi, int budget)
* netdev structure and arrange for the device to be reset to a sane state
* in order to transmit a new packet.
*/
-static void stmmac_tx_timeout(struct net_device *dev)
+static void stmmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct stmmac_priv *priv = netdev_priv(dev);
@@ -3829,12 +3839,24 @@ static void stmmac_set_rx_mode(struct net_device *dev)
static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
{
struct stmmac_priv *priv = netdev_priv(dev);
+ int txfifosz = priv->plat->tx_fifo_size;
+
+ if (txfifosz == 0)
+ txfifosz = priv->dma_cap.tx_fifo_size;
+
+ txfifosz /= priv->plat->tx_queues_to_use;
if (netif_running(dev)) {
netdev_err(priv->dev, "must be stopped to change its MTU\n");
return -EBUSY;
}
+ new_mtu = STMMAC_ALIGN(new_mtu);
+
+ /* If condition true, FIFO is too small or MTU too large */
+ if ((txfifosz < new_mtu) || (new_mtu > BUF_SIZE_16KiB))
+ return -EINVAL;
+
dev->mtu = new_mtu;
netdev_update_features(dev);
@@ -4066,6 +4088,8 @@ static int stmmac_setup_tc(struct net_device *ndev, enum tc_setup_type type,
priv, priv, true);
case TC_SETUP_QDISC_CBS:
return stmmac_tc_setup_cbs(priv, priv, type_data);
+ case TC_SETUP_QDISC_TAPRIO:
+ return stmmac_tc_setup_taprio(priv, priv, type_data);
default:
return -EOPNOTSUPP;
}
@@ -4238,13 +4262,70 @@ static int stmmac_dma_cap_show(struct seq_file *seq, void *v)
priv->dma_cap.number_rx_channel);
seq_printf(seq, "\tNumber of Additional TX channel: %d\n",
priv->dma_cap.number_tx_channel);
+ seq_printf(seq, "\tNumber of Additional RX queues: %d\n",
+ priv->dma_cap.number_rx_queues);
+ seq_printf(seq, "\tNumber of Additional TX queues: %d\n",
+ priv->dma_cap.number_tx_queues);
seq_printf(seq, "\tEnhanced descriptors: %s\n",
(priv->dma_cap.enh_desc) ? "Y" : "N");
-
+ seq_printf(seq, "\tTX Fifo Size: %d\n", priv->dma_cap.tx_fifo_size);
+ seq_printf(seq, "\tRX Fifo Size: %d\n", priv->dma_cap.rx_fifo_size);
+ seq_printf(seq, "\tHash Table Size: %d\n", priv->dma_cap.hash_tb_sz);
+ seq_printf(seq, "\tTSO: %s\n", priv->dma_cap.tsoen ? "Y" : "N");
+ seq_printf(seq, "\tNumber of PPS Outputs: %d\n",
+ priv->dma_cap.pps_out_num);
+ seq_printf(seq, "\tSafety Features: %s\n",
+ priv->dma_cap.asp ? "Y" : "N");
+ seq_printf(seq, "\tFlexible RX Parser: %s\n",
+ priv->dma_cap.frpsel ? "Y" : "N");
+ seq_printf(seq, "\tEnhanced Addressing: %d\n",
+ priv->dma_cap.addr64);
+ seq_printf(seq, "\tReceive Side Scaling: %s\n",
+ priv->dma_cap.rssen ? "Y" : "N");
+ seq_printf(seq, "\tVLAN Hash Filtering: %s\n",
+ priv->dma_cap.vlhash ? "Y" : "N");
+ seq_printf(seq, "\tSplit Header: %s\n",
+ priv->dma_cap.sphen ? "Y" : "N");
+ seq_printf(seq, "\tVLAN TX Insertion: %s\n",
+ priv->dma_cap.vlins ? "Y" : "N");
+ seq_printf(seq, "\tDouble VLAN: %s\n",
+ priv->dma_cap.dvlan ? "Y" : "N");
+ seq_printf(seq, "\tNumber of L3/L4 Filters: %d\n",
+ priv->dma_cap.l3l4fnum);
+ seq_printf(seq, "\tARP Offloading: %s\n",
+ priv->dma_cap.arpoffsel ? "Y" : "N");
return 0;
}
DEFINE_SHOW_ATTRIBUTE(stmmac_dma_cap);
+/* Use network device events to rename debugfs file entries.
+ */
+static int stmmac_device_event(struct notifier_block *unused,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct stmmac_priv *priv = netdev_priv(dev);
+
+ if (dev->netdev_ops != &stmmac_netdev_ops)
+ goto done;
+
+ switch (event) {
+ case NETDEV_CHANGENAME:
+ if (priv->dbgfs_dir)
+ priv->dbgfs_dir = debugfs_rename(stmmac_fs_dir,
+ priv->dbgfs_dir,
+ stmmac_fs_dir,
+ dev->name);
+ break;
+ }
+done:
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block stmmac_notifier = {
+ .notifier_call = stmmac_device_event,
+};
+
static void stmmac_init_fs(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
@@ -4259,12 +4340,15 @@ static void stmmac_init_fs(struct net_device *dev)
/* Entry to report the DMA HW features */
debugfs_create_file("dma_cap", 0444, priv->dbgfs_dir, dev,
&stmmac_dma_cap_fops);
+
+ register_netdevice_notifier(&stmmac_notifier);
}
static void stmmac_exit_fs(struct net_device *dev)
{
struct stmmac_priv *priv = netdev_priv(dev);
+ unregister_netdevice_notifier(&stmmac_notifier);
debugfs_remove_recursive(priv->dbgfs_dir);
}
#endif /* CONFIG_DEBUG_FS */
@@ -4685,6 +4769,7 @@ int stmmac_dvr_probe(struct device *device,
for (queue = 0; queue < maxq; queue++) {
struct stmmac_channel *ch = &priv->channel[queue];
+ spin_lock_init(&ch->lock);
ch->priv_data = priv;
ch->index = queue;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 8237dbc3e991..40f171d310d7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -65,7 +65,6 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
plat->force_sf_dma_mode = 1;
plat->mdio_bus_data->needs_reset = true;
- plat->mdio_bus_data->phy_mask = 0;
/* Set default value for multicast hash bins */
plat->multicast_filter_bins = HASH_TABLE_SIZE;
@@ -154,8 +153,6 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
plat->tx_queues_cfg[6].weight = 0x0F;
plat->tx_queues_cfg[7].weight = 0x10;
- plat->mdio_bus_data->phy_mask = 0;
-
plat->dma_cfg->pbl = 32;
plat->dma_cfg->pblx8 = true;
plat->dma_cfg->fixed_burst = 0;
@@ -386,8 +383,6 @@ static int snps_gmac5_default_data(struct pci_dev *pdev,
plat->tso_en = 1;
plat->pmt = 1;
- plat->mdio_bus_data->phy_mask = 0;
-
/* Set default value for multicast hash bins */
plat->multicast_filter_bins = HASH_TABLE_SIZE;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index bedaff0c13bd..4775f49d7f3b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -320,7 +320,7 @@ out:
static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
struct device_node *np, struct device *dev)
{
- bool mdio = true;
+ bool mdio = !of_phy_is_fixed_link(np);
static const struct of_device_id need_mdio_ids[] = {
{ .compatible = "snps,dwc-qos-ethernet-4.10" },
{},
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
index f3d8b9336b8e..13227909287c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_selftests.c
@@ -624,6 +624,8 @@ static int stmmac_test_mcfilt(struct stmmac_priv *priv)
return -EOPNOTSUPP;
if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries)
return -EOPNOTSUPP;
+ if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins)
+ return -EOPNOTSUPP;
while (--tries) {
/* We only need to check the mc_addr for collisions */
@@ -666,6 +668,8 @@ static int stmmac_test_ucfilt(struct stmmac_priv *priv)
if (stmmac_filter_check(priv))
return -EOPNOTSUPP;
+ if (netdev_uc_count(priv->dev) >= priv->hw->unicast_filter_entries)
+ return -EOPNOTSUPP;
if (netdev_mc_count(priv->dev) >= priv->hw->multicast_filter_bins)
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
index 7d972e0fd2b0..6c4686b77516 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
@@ -591,9 +591,146 @@ static int tc_setup_cls(struct stmmac_priv *priv,
return ret;
}
+static int tc_setup_taprio(struct stmmac_priv *priv,
+ struct tc_taprio_qopt_offload *qopt)
+{
+ u32 size, wid = priv->dma_cap.estwid, dep = priv->dma_cap.estdep;
+ struct plat_stmmacenet_data *plat = priv->plat;
+ struct timespec64 time;
+ bool fpe = false;
+ int i, ret = 0;
+ u64 ctr;
+
+ if (!priv->dma_cap.estsel)
+ return -EOPNOTSUPP;
+
+ switch (wid) {
+ case 0x1:
+ wid = 16;
+ break;
+ case 0x2:
+ wid = 20;
+ break;
+ case 0x3:
+ wid = 24;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ switch (dep) {
+ case 0x1:
+ dep = 64;
+ break;
+ case 0x2:
+ dep = 128;
+ break;
+ case 0x3:
+ dep = 256;
+ break;
+ case 0x4:
+ dep = 512;
+ break;
+ case 0x5:
+ dep = 1024;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if (!qopt->enable)
+ goto disable;
+ if (qopt->num_entries >= dep)
+ return -EINVAL;
+ if (!qopt->base_time)
+ return -ERANGE;
+ if (!qopt->cycle_time)
+ return -ERANGE;
+
+ if (!plat->est) {
+ plat->est = devm_kzalloc(priv->device, sizeof(*plat->est),
+ GFP_KERNEL);
+ if (!plat->est)
+ return -ENOMEM;
+ } else {
+ memset(plat->est, 0, sizeof(*plat->est));
+ }
+
+ size = qopt->num_entries;
+
+ priv->plat->est->gcl_size = size;
+ priv->plat->est->enable = qopt->enable;
+
+ for (i = 0; i < size; i++) {
+ s64 delta_ns = qopt->entries[i].interval;
+ u32 gates = qopt->entries[i].gate_mask;
+
+ if (delta_ns > GENMASK(wid, 0))
+ return -ERANGE;
+ if (gates > GENMASK(31 - wid, 0))
+ return -ERANGE;
+
+ switch (qopt->entries[i].command) {
+ case TC_TAPRIO_CMD_SET_GATES:
+ if (fpe)
+ return -EINVAL;
+ break;
+ case TC_TAPRIO_CMD_SET_AND_HOLD:
+ gates |= BIT(0);
+ fpe = true;
+ break;
+ case TC_TAPRIO_CMD_SET_AND_RELEASE:
+ gates &= ~BIT(0);
+ fpe = true;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->plat->est->gcl[i] = delta_ns | (gates << wid);
+ }
+
+ /* Adjust for real system time */
+ time = ktime_to_timespec64(qopt->base_time);
+ priv->plat->est->btr[0] = (u32)time.tv_nsec;
+ priv->plat->est->btr[1] = (u32)time.tv_sec;
+
+ ctr = qopt->cycle_time;
+ priv->plat->est->ctr[0] = do_div(ctr, NSEC_PER_SEC);
+ priv->plat->est->ctr[1] = (u32)ctr;
+
+ if (fpe && !priv->dma_cap.fpesel)
+ return -EOPNOTSUPP;
+
+ ret = stmmac_fpe_configure(priv, priv->ioaddr,
+ priv->plat->tx_queues_to_use,
+ priv->plat->rx_queues_to_use, fpe);
+ if (ret && fpe) {
+ netdev_err(priv->dev, "failed to enable Frame Preemption\n");
+ return ret;
+ }
+
+ ret = stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
+ priv->plat->clk_ptp_rate);
+ if (ret) {
+ netdev_err(priv->dev, "failed to configure EST\n");
+ goto disable;
+ }
+
+ netdev_info(priv->dev, "configured EST\n");
+ return 0;
+
+disable:
+ priv->plat->est->enable = false;
+ stmmac_est_configure(priv, priv->ioaddr, priv->plat->est,
+ priv->plat->clk_ptp_rate);
+ return ret;
+}
+
const struct stmmac_tc_ops dwmac510_tc_ops = {
.init = tc_init,
.setup_cls_u32 = tc_setup_cls_u32,
.setup_cbs = tc_setup_cbs,
.setup_cls = tc_setup_cls,
+ .setup_taprio = tc_setup_taprio,
};
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index c91876f8c536..6ec9163e232c 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -2666,7 +2666,7 @@ static void cas_netpoll(struct net_device *dev)
}
#endif
-static void cas_tx_timeout(struct net_device *dev)
+static void cas_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct cas *cp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index f5fd1f3c07cc..9a5004f674c7 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6517,7 +6517,7 @@ static void niu_reset_task(struct work_struct *work)
spin_unlock_irqrestore(&np->lock, flags);
}
-static void niu_tx_timeout(struct net_device *dev)
+static void niu_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct niu *np = netdev_priv(dev);
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index e9b757b03b56..c5add0b45eed 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -941,7 +941,7 @@ static int bigmac_close(struct net_device *dev)
return 0;
}
-static void bigmac_tx_timeout(struct net_device *dev)
+static void bigmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct bigmac *bp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index 3e7631160384..8358064fbd48 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -970,7 +970,7 @@ static void gem_poll_controller(struct net_device *dev)
}
#endif
-static void gem_tx_timeout(struct net_device *dev)
+static void gem_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct gem *gp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index d007dfeba5c3..f0fe7bb2a750 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -2246,7 +2246,7 @@ static int happy_meal_close(struct net_device *dev)
#define SXD(x)
#endif
-static void happy_meal_tx_timeout(struct net_device *dev)
+static void happy_meal_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct happy_meal *hp = netdev_priv(dev);
diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c
index 1468fa0a54e9..2102b95ec347 100644
--- a/drivers/net/ethernet/sun/sunqe.c
+++ b/drivers/net/ethernet/sun/sunqe.c
@@ -544,7 +544,7 @@ static void qe_tx_reclaim(struct sunqe *qep)
qep->tx_old = elem;
}
-static void qe_tx_timeout(struct net_device *dev)
+static void qe_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct sunqe *qep = netdev_priv(dev);
int tx_full;
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index 8b94d9ad9e2b..c23ce838ff63 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -1223,7 +1223,7 @@ vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
{
struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
- struct sk_buff *segs;
+ struct sk_buff *segs, *curr, *next;
int maclen, datalen;
int status;
int gso_size, gso_type, gso_segs;
@@ -1282,11 +1282,8 @@ vnet_handle_offloads(struct vnet_port *port, struct sk_buff *skb,
skb_reset_mac_header(skb);
status = 0;
- while (segs) {
- struct sk_buff *curr = segs;
-
- segs = segs->next;
- curr->next = NULL;
+ skb_list_walk_safe(segs, curr, next) {
+ skb_mark_not_on_list(curr);
if (port->tso && curr->len > dev->mtu) {
skb_shinfo(curr)->gso_size = gso_size;
skb_shinfo(curr)->gso_type = gso_type;
@@ -1539,7 +1536,7 @@ out_dropped:
}
EXPORT_SYMBOL_GPL(sunvnet_start_xmit_common);
-void sunvnet_tx_timeout_common(struct net_device *dev)
+void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue)
{
/* XXX Implement me XXX */
}
diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h
index 2b808d2482d6..5416a3cb9e7d 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.h
+++ b/drivers/net/ethernet/sun/sunvnet_common.h
@@ -135,7 +135,7 @@ int sunvnet_open_common(struct net_device *dev);
int sunvnet_close_common(struct net_device *dev);
void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp);
int sunvnet_set_mac_addr_common(struct net_device *dev, void *p);
-void sunvnet_tx_timeout_common(struct net_device *dev);
+void sunvnet_tx_timeout_common(struct net_device *dev, unsigned int txqueue);
netdev_tx_t
sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
struct vnet_port *(*vnet_tx_port)
diff --git a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
index a1f5a1e61040..07046a2370b3 100644
--- a/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
+++ b/drivers/net/ethernet/synopsys/dwc-xlgmac-net.c
@@ -689,7 +689,7 @@ static int xlgmac_close(struct net_device *netdev)
return 0;
}
-static void xlgmac_tx_timeout(struct net_device *netdev)
+static void xlgmac_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct xlgmac_pdata *pdata = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index a46f4189fde3..bf98e0fa7d8b 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -63,6 +63,7 @@ config TI_CPSW_SWITCHDEV
tristate "TI CPSW Switch Support with switchdev"
depends on ARCH_DAVINCI || ARCH_OMAP2PLUS || COMPILE_TEST
depends on NET_SWITCHDEV
+ select PAGE_POOL
select TI_DAVINCI_MDIO
select MFD_SYSCON
select REGMAP
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index d34df8e5cf94..ecf776ad8689 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -5,6 +5,7 @@
obj-$(CONFIG_TI_CPSW) += cpsw-common.o
obj-$(CONFIG_TI_DAVINCI_EMAC) += cpsw-common.o
+obj-$(CONFIG_TI_CPSW_SWITCHDEV) += cpsw-common.o
obj-$(CONFIG_TLAN) += tlan.o
obj-$(CONFIG_CPMAC) += cpmac.o
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 3a655a4dc10e..5e1b8292cd3f 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -797,7 +797,7 @@ static irqreturn_t cpmac_irq(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static void cpmac_tx_timeout(struct net_device *dev)
+static void cpmac_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct cpmac_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/ti/cpsw_ethtool.c b/drivers/net/ethernet/ti/cpsw_ethtool.c
index 31248a6cc642..fa54efe3be63 100644
--- a/drivers/net/ethernet/ti/cpsw_ethtool.c
+++ b/drivers/net/ethernet/ti/cpsw_ethtool.c
@@ -73,13 +73,13 @@ enum {
};
#define CPSW_STAT(m) CPSW_STATS, \
- FIELD_SIZEOF(struct cpsw_hw_stats, m), \
+ sizeof_field(struct cpsw_hw_stats, m), \
offsetof(struct cpsw_hw_stats, m)
#define CPDMA_RX_STAT(m) CPDMA_RX_STATS, \
- FIELD_SIZEOF(struct cpdma_chan_stats, m), \
+ sizeof_field(struct cpdma_chan_stats, m), \
offsetof(struct cpdma_chan_stats, m)
#define CPDMA_TX_STAT(m) CPDMA_TX_STATS, \
- FIELD_SIZEOF(struct cpdma_chan_stats, m), \
+ sizeof_field(struct cpdma_chan_stats, m), \
offsetof(struct cpdma_chan_stats, m)
static const struct cpsw_stats cpsw_gstrings_stats[] = {
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 707d5eb480ce..97a058ca60ac 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -272,7 +272,7 @@ void soft_reset(const char *module, void __iomem *reg)
WARN(readl_relaxed(reg) & 1, "failed to soft-reset %s\n", module);
}
-void cpsw_ndo_tx_timeout(struct net_device *ndev)
+void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct cpsw_priv *priv = netdev_priv(ndev);
struct cpsw_common *cpsw = priv->cpsw;
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index bc726356a72c..b8d7b924ee3d 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -449,7 +449,7 @@ int cpsw_rx_poll(struct napi_struct *napi_rx, int budget);
void cpsw_rx_vlan_encap(struct sk_buff *skb);
void soft_reset(const char *module, void __iomem *reg);
void cpsw_set_slave_mac(struct cpsw_slave *slave, struct cpsw_priv *priv);
-void cpsw_ndo_tx_timeout(struct net_device *ndev);
+void cpsw_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue);
int cpsw_need_resplit(struct cpsw_common *cpsw);
int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd);
int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate);
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 37ba708ac781..6614fa3089b2 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -1018,7 +1018,6 @@ static int cpdma_chan_submit_si(struct submit_info *si)
struct cpdma_chan *chan = si->chan;
struct cpdma_ctlr *ctlr = chan->ctlr;
int len = si->len;
- int swlen = len;
struct cpdma_desc __iomem *desc;
dma_addr_t buffer;
u32 mode;
@@ -1046,7 +1045,6 @@ static int cpdma_chan_submit_si(struct submit_info *si)
if (si->data_dma) {
buffer = si->data_dma;
dma_sync_single_for_device(ctlr->dev, buffer, len, chan->dir);
- swlen |= CPDMA_DMA_EXT_MAP;
} else {
buffer = dma_map_single(ctlr->dev, si->data_virt, len, chan->dir);
ret = dma_mapping_error(ctlr->dev, buffer);
@@ -1065,7 +1063,8 @@ static int cpdma_chan_submit_si(struct submit_info *si)
writel_relaxed(mode | len, &desc->hw_mode);
writel_relaxed((uintptr_t)si->token, &desc->sw_token);
writel_relaxed(buffer, &desc->sw_buffer);
- writel_relaxed(swlen, &desc->sw_len);
+ writel_relaxed(si->data_dma ? len | CPDMA_DMA_EXT_MAP : len,
+ &desc->sw_len);
desc_read(desc, sw_len);
__cpdma_chan_submit(chan, desc);
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index ae27be85e363..75d4e16c692b 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -983,7 +983,7 @@ fail_tx:
* error and re-initialize the TX channel for hardware operation
*
*/
-static void emac_dev_tx_timeout(struct net_device *ndev)
+static void emac_dev_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct emac_priv *priv = netdev_priv(ndev);
struct device *emac_dev = &ndev->dev;
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 1b2702f74455..432645e86495 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -1811,7 +1811,7 @@ out:
return (ret == 0) ? 0 : err;
}
-static void netcp_ndo_tx_timeout(struct net_device *ndev)
+static void netcp_ndo_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct netcp_intf *netcp = netdev_priv(ndev);
unsigned int descs = knav_pool_count(netcp->tx_pool);
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 86a3f42a3dcc..fb36115e9c51 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -783,28 +783,28 @@ struct netcp_ethtool_stat {
#define GBE_STATSA_INFO(field) \
{ \
"GBE_A:"#field, GBE_STATSA_MODULE, \
- FIELD_SIZEOF(struct gbe_hw_stats, field), \
+ sizeof_field(struct gbe_hw_stats, field), \
offsetof(struct gbe_hw_stats, field) \
}
#define GBE_STATSB_INFO(field) \
{ \
"GBE_B:"#field, GBE_STATSB_MODULE, \
- FIELD_SIZEOF(struct gbe_hw_stats, field), \
+ sizeof_field(struct gbe_hw_stats, field), \
offsetof(struct gbe_hw_stats, field) \
}
#define GBE_STATSC_INFO(field) \
{ \
"GBE_C:"#field, GBE_STATSC_MODULE, \
- FIELD_SIZEOF(struct gbe_hw_stats, field), \
+ sizeof_field(struct gbe_hw_stats, field), \
offsetof(struct gbe_hw_stats, field) \
}
#define GBE_STATSD_INFO(field) \
{ \
"GBE_D:"#field, GBE_STATSD_MODULE, \
- FIELD_SIZEOF(struct gbe_hw_stats, field), \
+ sizeof_field(struct gbe_hw_stats, field), \
offsetof(struct gbe_hw_stats, field) \
}
@@ -957,7 +957,7 @@ static const struct netcp_ethtool_stat gbe13_et_stats[] = {
#define GBENU_STATS_HOST(field) \
{ \
"GBE_HOST:"#field, GBENU_STATS0_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
@@ -967,56 +967,56 @@ static const struct netcp_ethtool_stat gbe13_et_stats[] = {
#define GBENU_STATS_P1(field) \
{ \
"GBE_P1:"#field, GBENU_STATS1_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P2(field) \
{ \
"GBE_P2:"#field, GBENU_STATS2_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P3(field) \
{ \
"GBE_P3:"#field, GBENU_STATS3_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P4(field) \
{ \
"GBE_P4:"#field, GBENU_STATS4_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P5(field) \
{ \
"GBE_P5:"#field, GBENU_STATS5_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P6(field) \
{ \
"GBE_P6:"#field, GBENU_STATS6_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P7(field) \
{ \
"GBE_P7:"#field, GBENU_STATS7_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
#define GBENU_STATS_P8(field) \
{ \
"GBE_P8:"#field, GBENU_STATS8_MODULE, \
- FIELD_SIZEOF(struct gbenu_hw_stats, field), \
+ sizeof_field(struct gbenu_hw_stats, field), \
offsetof(struct gbenu_hw_stats, field) \
}
@@ -1607,21 +1607,21 @@ static const struct netcp_ethtool_stat gbenu_et_stats[] = {
#define XGBE_STATS0_INFO(field) \
{ \
"GBE_0:"#field, XGBE_STATS0_MODULE, \
- FIELD_SIZEOF(struct xgbe_hw_stats, field), \
+ sizeof_field(struct xgbe_hw_stats, field), \
offsetof(struct xgbe_hw_stats, field) \
}
#define XGBE_STATS1_INFO(field) \
{ \
"GBE_1:"#field, XGBE_STATS1_MODULE, \
- FIELD_SIZEOF(struct xgbe_hw_stats, field), \
+ sizeof_field(struct xgbe_hw_stats, field), \
offsetof(struct xgbe_hw_stats, field) \
}
#define XGBE_STATS2_INFO(field) \
{ \
"GBE_2:"#field, XGBE_STATS2_MODULE, \
- FIELD_SIZEOF(struct xgbe_hw_stats, field), \
+ sizeof_field(struct xgbe_hw_stats, field), \
offsetof(struct xgbe_hw_stats, field) \
}
@@ -2533,8 +2533,6 @@ static int gbe_del_vid(void *intf_priv, int vid)
}
#if IS_ENABLED(CONFIG_TI_CPTS)
-#define HAS_PHY_TXTSTAMP(p) ((p)->drv && (p)->drv->txtstamp)
-#define HAS_PHY_RXTSTAMP(p) ((p)->drv && (p)->drv->rxtstamp)
static void gbe_txtstamp(void *context, struct sk_buff *skb)
{
@@ -2566,7 +2564,7 @@ static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf,
* We mark it here because skb_tx_timestamp() is called
* after all the txhooks are called.
*/
- if (phydev && HAS_PHY_TXTSTAMP(phydev)) {
+ if (phy_has_txtstamp(phydev)) {
skb_shinfo(p_info->skb)->tx_flags |= SKBTX_IN_PROGRESS;
return 0;
}
@@ -2588,7 +2586,7 @@ static int gbe_rxtstamp(struct gbe_intf *gbe_intf, struct netcp_packet *p_info)
if (p_info->rxtstamp_complete)
return 0;
- if (phydev && HAS_PHY_RXTSTAMP(phydev)) {
+ if (phy_has_rxtstamp(phydev)) {
p_info->rxtstamp_complete = true;
return 0;
}
@@ -2830,7 +2828,7 @@ static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd)
struct gbe_intf *gbe_intf = intf_priv;
struct phy_device *phy = gbe_intf->slave->phy;
- if (!phy || !phy->drv->hwtstamp) {
+ if (!phy_has_hwtstamp(phy)) {
switch (cmd) {
case SIOCGHWTSTAMP:
return gbe_hwtstamp_get(gbe_intf, req);
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 78f0f2d59e22..ad465202980a 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -161,7 +161,7 @@ static void tlan_set_multicast_list(struct net_device *);
static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
static int tlan_probe1(struct pci_dev *pdev, long ioaddr,
int irq, int rev, const struct pci_device_id *ent);
-static void tlan_tx_timeout(struct net_device *dev);
+static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void tlan_tx_timeout_work(struct work_struct *work);
static int tlan_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent);
@@ -997,7 +997,7 @@ static int tlan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
*
**************************************************************/
-static void tlan_tx_timeout(struct net_device *dev)
+static void tlan_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
TLAN_DBG(TLAN_DEBUG_GNRL, "%s: Transmit timed out.\n", dev->name);
@@ -1028,7 +1028,7 @@ static void tlan_tx_timeout_work(struct work_struct *work)
struct tlan_priv *priv =
container_of(work, struct tlan_priv, tlan_tqueue);
- tlan_tx_timeout(priv->dev);
+ tlan_tx_timeout(priv->dev, UINT_MAX);
}
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 9d9f8acb7ee3..070dd6fa9401 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1405,7 +1405,7 @@ out:
*
* called, if tx hangs. Schedules a task that resets the interface
*/
-void gelic_net_tx_timeout(struct net_device *netdev)
+void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct gelic_card *card;
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
index 051033580f0a..805903dbddcc 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
@@ -359,7 +359,7 @@ int gelic_net_open(struct net_device *netdev);
int gelic_net_stop(struct net_device *netdev);
netdev_tx_t gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
void gelic_net_set_multi(struct net_device *netdev);
-void gelic_net_tx_timeout(struct net_device *netdev);
+void gelic_net_tx_timeout(struct net_device *netdev, unsigned int txqueue);
int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card);
/* shared ethtool ops */
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index 538e70810d3d..6576271642c1 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -2180,7 +2180,7 @@ out:
* called, if tx hangs. Schedules a task that resets the interface
*/
static void
-spider_net_tx_timeout(struct net_device *netdev)
+spider_net_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct spider_net_card *card;
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index 12466a72cefc..708de826200e 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -483,7 +483,7 @@ static void tc35815_txdone(struct net_device *dev);
static int tc35815_close(struct net_device *dev);
static struct net_device_stats *tc35815_get_stats(struct net_device *dev);
static void tc35815_set_multicast_list(struct net_device *dev);
-static void tc35815_tx_timeout(struct net_device *dev);
+static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue);
static int tc35815_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
#ifdef CONFIG_NET_POLL_CONTROLLER
static void tc35815_poll_controller(struct net_device *dev);
@@ -1189,7 +1189,7 @@ static void tc35815_schedule_restart(struct net_device *dev)
spin_unlock_irqrestore(&lp->lock, flags);
}
-static void tc35815_tx_timeout(struct net_device *dev)
+static void tc35815_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct tc35815_regs __iomem *tr =
(struct tc35815_regs __iomem *)dev->base_addr;
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index ed12dbd156f0..803247d51fe9 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -506,7 +506,7 @@ static void mdio_write(struct net_device *dev, int phy_id, int location, int val
static int rhine_open(struct net_device *dev);
static void rhine_reset_task(struct work_struct *work);
static void rhine_slow_event_task(struct work_struct *work);
-static void rhine_tx_timeout(struct net_device *dev);
+static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue);
static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
struct net_device *dev);
static irqreturn_t rhine_interrupt(int irq, void *dev_instance);
@@ -1761,7 +1761,7 @@ out_unlock:
mutex_unlock(&rp->task_lock);
}
-static void rhine_tx_timeout(struct net_device *dev)
+static void rhine_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct rhine_private *rp = netdev_priv(dev);
void __iomem *ioaddr = rp->base;
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 346e44115c4e..4b556b74541a 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -3257,12 +3257,16 @@ static struct platform_driver velocity_platform_driver = {
* @dev: network device
*
* Called before an ethtool operation. We need to make sure the
- * chip is out of D3 state before we poke at it.
+ * chip is out of D3 state before we poke at it. In case of ethtool
+ * ops nesting, only wake the device up in the outermost block.
*/
static int velocity_ethtool_up(struct net_device *dev)
{
struct velocity_info *vptr = netdev_priv(dev);
- if (!netif_running(dev))
+
+ if (vptr->ethtool_ops_nesting == U32_MAX)
+ return -EBUSY;
+ if (!vptr->ethtool_ops_nesting++ && !netif_running(dev))
velocity_set_power_state(vptr, PCI_D0);
return 0;
}
@@ -3272,12 +3276,14 @@ static int velocity_ethtool_up(struct net_device *dev)
* @dev: network device
*
* Called after an ethtool operation. Restore the chip back to D3
- * state if it isn't running.
+ * state if it isn't running. In case of ethtool ops nesting, only
+ * put the device to sleep in the outermost block.
*/
static void velocity_ethtool_down(struct net_device *dev)
{
struct velocity_info *vptr = netdev_priv(dev);
- if (!netif_running(dev))
+
+ if (!--vptr->ethtool_ops_nesting && !netif_running(dev))
velocity_set_power_state(vptr, PCI_D3hot);
}
diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h
index cdfe7809e3c1..f196e71d2c04 100644
--- a/drivers/net/ethernet/via/via-velocity.h
+++ b/drivers/net/ethernet/via/via-velocity.h
@@ -1483,6 +1483,7 @@ struct velocity_info {
struct velocity_context context;
u32 ticks;
+ u32 ethtool_ops_nesting;
u8 rev_id;
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index bede1ff289c5..c0d181a7f83a 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -790,7 +790,7 @@ static void w5100_restart_work(struct work_struct *work)
w5100_restart(priv->ndev);
}
-static void w5100_tx_timeout(struct net_device *ndev)
+static void w5100_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct w5100_priv *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index 6ba2747779ce..46aae30c4636 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -341,7 +341,7 @@ static void w5300_get_regs(struct net_device *ndev,
}
}
-static void w5300_tx_timeout(struct net_device *ndev)
+static void w5300_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct w5300_priv *priv = netdev_priv(ndev);
diff --git a/drivers/net/ethernet/xilinx/xilinx_emaclite.c b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
index 0de52e70abcc..0c26f5bcc523 100644
--- a/drivers/net/ethernet/xilinx/xilinx_emaclite.c
+++ b/drivers/net/ethernet/xilinx/xilinx_emaclite.c
@@ -521,7 +521,7 @@ static int xemaclite_set_mac_address(struct net_device *dev, void *address)
*
* This function is called when Tx time out occurs for Emaclite device.
*/
-static void xemaclite_tx_timeout(struct net_device *dev)
+static void xemaclite_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct net_local *lp = netdev_priv(dev);
unsigned long flags;
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index fd5288ff53b5..480ab7251515 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -288,7 +288,7 @@ struct local_info {
*/
static netdev_tx_t do_start_xmit(struct sk_buff *skb,
struct net_device *dev);
-static void xirc_tx_timeout(struct net_device *dev);
+static void xirc_tx_timeout(struct net_device *dev, unsigned int txqueue);
static void xirc2ps_tx_timeout_task(struct work_struct *work);
static void set_addresses(struct net_device *dev);
static void set_multicast_list(struct net_device *dev);
@@ -1203,7 +1203,7 @@ xirc2ps_tx_timeout_task(struct work_struct *work)
}
static void
-xirc_tx_timeout(struct net_device *dev)
+xirc_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct local_info *lp = netdev_priv(dev);
dev->stats.tx_errors++;
diff --git a/drivers/net/ethernet/xscale/Kconfig b/drivers/net/ethernet/xscale/Kconfig
index cd0a8f46e7c6..98aa7b8ddb06 100644
--- a/drivers/net/ethernet/xscale/Kconfig
+++ b/drivers/net/ethernet/xscale/Kconfig
@@ -27,4 +27,18 @@ config IXP4XX_ETH
Say Y here if you want to use built-in Ethernet ports
on IXP4xx processor.
+config PTP_1588_CLOCK_IXP46X
+ tristate "Intel IXP46x as PTP clock"
+ depends on IXP4XX_ETH
+ depends on PTP_1588_CLOCK
+ default y
+ help
+ This driver adds support for using the IXP46X as a PTP
+ clock. This clock is only useful if your PTP programs are
+ getting hardware time stamps on the PTP Ethernet packets
+ using the SO_TIMESTAMPING API.
+
+ To compile this driver as a module, choose M here: the module
+ will be called ptp_ixp46x.
+
endif # NET_VENDOR_XSCALE
diff --git a/drivers/net/ethernet/xscale/Makefile b/drivers/net/ethernet/xscale/Makefile
index 794a519d07b3..607f91b1e878 100644
--- a/drivers/net/ethernet/xscale/Makefile
+++ b/drivers/net/ethernet/xscale/Makefile
@@ -3,4 +3,5 @@
# Makefile for the Intel XScale IXP device drivers.
#
-obj-$(CONFIG_IXP4XX_ETH) += ixp4xx_eth.o
+obj-$(CONFIG_IXP4XX_ETH) += ixp4xx_eth.o
+obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o
diff --git a/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h b/drivers/net/ethernet/xscale/ixp46x_ts.h
index d792130e27b0..d792130e27b0 100644
--- a/arch/arm/mach-ixp4xx/include/mach/ixp46x_ts.h
+++ b/drivers/net/ethernet/xscale/ixp46x_ts.h
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index 6fc04ffb22c2..269596c15133 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -29,14 +29,16 @@
#include <linux/net_tstamp.h>
#include <linux/of.h>
#include <linux/phy.h>
+#include <linux/platform_data/eth_ixp4xx.h>
#include <linux/platform_device.h>
#include <linux/ptp_classify.h>
#include <linux/slab.h>
#include <linux/module.h>
-#include <mach/ixp46x_ts.h>
#include <linux/soc/ixp4xx/npe.h>
#include <linux/soc/ixp4xx/qmgr.h>
+#include "ixp46x_ts.h"
+
#define DEBUG_DESC 0
#define DEBUG_RX 0
#define DEBUG_TX 0
@@ -517,25 +519,14 @@ static int ixp4xx_mdio_write(struct mii_bus *bus, int phy_id, int location,
return ret;
}
-static int ixp4xx_mdio_register(void)
+static int ixp4xx_mdio_register(struct eth_regs __iomem *regs)
{
int err;
if (!(mdio_bus = mdiobus_alloc()))
return -ENOMEM;
- if (cpu_is_ixp43x()) {
- /* IXP43x lacks NPE-B and uses NPE-C for MII PHY access */
- if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEC_ETH))
- return -ENODEV;
- mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT;
- } else {
- /* All MII PHY accesses use NPE-B Ethernet registers */
- if (!(ixp4xx_read_feature_bits() & IXP4XX_FEATURE_NPEB_ETH0))
- return -ENODEV;
- mdio_regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT;
- }
-
+ mdio_regs = regs;
__raw_writel(DEFAULT_CORE_CNTRL, &mdio_regs->core_control);
spin_lock_init(&mdio_lock);
mdio_bus->name = "IXP4xx MII Bus";
@@ -581,8 +572,8 @@ static void ixp4xx_adjust_link(struct net_device *dev)
__raw_writel(DEFAULT_TX_CNTRL0 | TX_CNTRL0_HALFDUPLEX,
&port->regs->tx_control[0]);
- printk(KERN_INFO "%s: link up, speed %u Mb/s, %s duplex\n",
- dev->name, port->speed, port->duplex ? "full" : "half");
+ netdev_info(dev, "%s: link up, speed %u Mb/s, %s duplex\n",
+ dev->name, port->speed, port->duplex ? "full" : "half");
}
@@ -592,7 +583,7 @@ static inline void debug_pkt(struct net_device *dev, const char *func,
#if DEBUG_PKT_BYTES
int i;
- printk(KERN_DEBUG "%s: %s(%i) ", dev->name, func, len);
+ netdev_debug(dev, "%s(%i) ", func, len);
for (i = 0; i < len; i++) {
if (i >= DEBUG_PKT_BYTES)
break;
@@ -683,7 +674,7 @@ static int eth_poll(struct napi_struct *napi, int budget)
int received = 0;
#if DEBUG_RX
- printk(KERN_DEBUG "%s: eth_poll\n", dev->name);
+ netdev_debug(dev, "eth_poll\n");
#endif
while (received < budget) {
@@ -697,23 +688,20 @@ static int eth_poll(struct napi_struct *napi, int budget)
if ((n = queue_get_desc(rxq, port, 0)) < 0) {
#if DEBUG_RX
- printk(KERN_DEBUG "%s: eth_poll napi_complete\n",
- dev->name);
+ netdev_debug(dev, "eth_poll napi_complete\n");
#endif
napi_complete(napi);
qmgr_enable_irq(rxq);
if (!qmgr_stat_below_low_watermark(rxq) &&
napi_reschedule(napi)) { /* not empty again */
#if DEBUG_RX
- printk(KERN_DEBUG "%s: eth_poll napi_reschedule succeeded\n",
- dev->name);
+ netdev_debug(dev, "eth_poll napi_reschedule succeeded\n");
#endif
qmgr_disable_irq(rxq);
continue;
}
#if DEBUG_RX
- printk(KERN_DEBUG "%s: eth_poll all done\n",
- dev->name);
+ netdev_debug(dev, "eth_poll all done\n");
#endif
return received; /* all work done */
}
@@ -778,7 +766,7 @@ static int eth_poll(struct napi_struct *napi, int budget)
}
#if DEBUG_RX
- printk(KERN_DEBUG "eth_poll(): end, not all work done\n");
+ netdev_debug(dev, "eth_poll(): end, not all work done\n");
#endif
return received; /* not all work done */
}
@@ -842,7 +830,7 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
struct desc *desc;
#if DEBUG_TX
- printk(KERN_DEBUG "%s: eth_xmit\n", dev->name);
+ netdev_debug(dev, "eth_xmit\n");
#endif
if (unlikely(skb->len > MAX_MRU)) {
@@ -897,22 +885,21 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
if (qmgr_stat_below_low_watermark(txreadyq)) { /* empty */
#if DEBUG_TX
- printk(KERN_DEBUG "%s: eth_xmit queue full\n", dev->name);
+ netdev_debug(dev, "eth_xmit queue full\n");
#endif
netif_stop_queue(dev);
/* we could miss TX ready interrupt */
/* really empty in fact */
if (!qmgr_stat_below_low_watermark(txreadyq)) {
#if DEBUG_TX
- printk(KERN_DEBUG "%s: eth_xmit ready again\n",
- dev->name);
+ netdev_debug(dev, "eth_xmit ready again\n");
#endif
netif_wake_queue(dev);
}
}
#if DEBUG_TX
- printk(KERN_DEBUG "%s: eth_xmit end\n", dev->name);
+ netdev_debug(dev, "eth_xmit end\n");
#endif
ixp_tx_timestamp(port, skb);
@@ -1099,7 +1086,7 @@ static int init_queues(struct port *port)
int i;
if (!ports_open) {
- dma_pool = dma_pool_create(DRV_NAME, &port->netdev->dev,
+ dma_pool = dma_pool_create(DRV_NAME, port->netdev->dev.parent,
POOL_ALLOC_SIZE, 32, 0);
if (!dma_pool)
return -ENOMEM;
@@ -1186,8 +1173,7 @@ static int eth_open(struct net_device *dev)
return err;
if (npe_recv_message(npe, &msg, "ETH_GET_STATUS")) {
- printk(KERN_ERR "%s: %s not responding\n", dev->name,
- npe_name(npe));
+ netdev_err(dev, "%s not responding\n", npe_name(npe));
return -EIO;
}
port->firmware[0] = msg.byte4;
@@ -1299,7 +1285,7 @@ static int eth_close(struct net_device *dev)
msg.eth_id = port->id;
msg.byte3 = 1;
if (npe_send_recv_message(port->npe, &msg, "ETH_ENABLE_LOOPBACK"))
- printk(KERN_CRIT "%s: unable to enable loopback\n", dev->name);
+ netdev_crit(dev, "unable to enable loopback\n");
i = 0;
do { /* drain RX buffers */
@@ -1323,11 +1309,11 @@ static int eth_close(struct net_device *dev)
} while (++i < MAX_CLOSE_WAIT);
if (buffs)
- printk(KERN_CRIT "%s: unable to drain RX queue, %i buffer(s)"
- " left in NPE\n", dev->name, buffs);
+ netdev_crit(dev, "unable to drain RX queue, %i buffer(s)"
+ " left in NPE\n", buffs);
#if DEBUG_CLOSE
if (!buffs)
- printk(KERN_DEBUG "Draining RX queue took %i cycles\n", i);
+ netdev_debug(dev, "draining RX queue took %i cycles\n", i);
#endif
buffs = TX_DESCS;
@@ -1343,17 +1329,16 @@ static int eth_close(struct net_device *dev)
} while (++i < MAX_CLOSE_WAIT);
if (buffs)
- printk(KERN_CRIT "%s: unable to drain TX queue, %i buffer(s) "
- "left in NPE\n", dev->name, buffs);
+ netdev_crit(dev, "unable to drain TX queue, %i buffer(s) "
+ "left in NPE\n", buffs);
#if DEBUG_CLOSE
if (!buffs)
- printk(KERN_DEBUG "Draining TX queues took %i cycles\n", i);
+ netdev_debug(dev, "draining TX queues took %i cycles\n", i);
#endif
msg.byte3 = 0;
if (npe_send_recv_message(port->npe, &msg, "ETH_DISABLE_LOOPBACK"))
- printk(KERN_CRIT "%s: unable to disable loopback\n",
- dev->name);
+ netdev_crit(dev, "unable to disable loopback\n");
phy_stop(dev->phydev);
@@ -1374,54 +1359,88 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
.ndo_validate_addr = eth_validate_addr,
};
-static int eth_init_one(struct platform_device *pdev)
+static int ixp4xx_eth_probe(struct platform_device *pdev)
{
- struct port *port;
- struct net_device *dev;
- struct eth_plat_info *plat = dev_get_platdata(&pdev->dev);
- struct phy_device *phydev = NULL;
- u32 regs_phys;
char phy_id[MII_BUS_ID_SIZE + 3];
+ struct phy_device *phydev = NULL;
+ struct device *dev = &pdev->dev;
+ struct eth_plat_info *plat;
+ resource_size_t regs_phys;
+ struct net_device *ndev;
+ struct resource *res;
+ struct port *port;
int err;
- if (!(dev = alloc_etherdev(sizeof(struct port))))
+ plat = dev_get_platdata(dev);
+
+ if (!(ndev = devm_alloc_etherdev(dev, sizeof(struct port))))
return -ENOMEM;
- SET_NETDEV_DEV(dev, &pdev->dev);
- port = netdev_priv(dev);
- port->netdev = dev;
+ SET_NETDEV_DEV(ndev, dev);
+ port = netdev_priv(ndev);
+ port->netdev = ndev;
port->id = pdev->id;
+ /* Get the port resource and remap */
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+ regs_phys = res->start;
+ port->regs = devm_ioremap_resource(dev, res);
+
switch (port->id) {
case IXP4XX_ETH_NPEA:
- port->regs = (struct eth_regs __iomem *)IXP4XX_EthA_BASE_VIRT;
- regs_phys = IXP4XX_EthA_BASE_PHYS;
+ /* If the MDIO bus is not up yet, defer probe */
+ if (!mdio_bus)
+ return -EPROBE_DEFER;
break;
case IXP4XX_ETH_NPEB:
- port->regs = (struct eth_regs __iomem *)IXP4XX_EthB_BASE_VIRT;
- regs_phys = IXP4XX_EthB_BASE_PHYS;
+ /*
+ * On all except IXP43x, NPE-B is used for the MDIO bus.
+ * If there is no NPE-B in the feature set, bail out, else
+ * register the MDIO bus.
+ */
+ if (!cpu_is_ixp43x()) {
+ if (!(ixp4xx_read_feature_bits() &
+ IXP4XX_FEATURE_NPEB_ETH0))
+ return -ENODEV;
+ /* Else register the MDIO bus on NPE-B */
+ if ((err = ixp4xx_mdio_register(port->regs)))
+ return err;
+ }
+ if (!mdio_bus)
+ return -EPROBE_DEFER;
break;
case IXP4XX_ETH_NPEC:
- port->regs = (struct eth_regs __iomem *)IXP4XX_EthC_BASE_VIRT;
- regs_phys = IXP4XX_EthC_BASE_PHYS;
+ /*
+ * IXP43x lacks NPE-B and uses NPE-C for the MDIO bus access,
+ * of there is no NPE-C, no bus, nothing works, so bail out.
+ */
+ if (cpu_is_ixp43x()) {
+ if (!(ixp4xx_read_feature_bits() &
+ IXP4XX_FEATURE_NPEC_ETH))
+ return -ENODEV;
+ /* Else register the MDIO bus on NPE-C */
+ if ((err = ixp4xx_mdio_register(port->regs)))
+ return err;
+ }
+ if (!mdio_bus)
+ return -EPROBE_DEFER;
break;
default:
- err = -ENODEV;
- goto err_free;
+ return -ENODEV;
}
- dev->netdev_ops = &ixp4xx_netdev_ops;
- dev->ethtool_ops = &ixp4xx_ethtool_ops;
- dev->tx_queue_len = 100;
+ ndev->netdev_ops = &ixp4xx_netdev_ops;
+ ndev->ethtool_ops = &ixp4xx_ethtool_ops;
+ ndev->tx_queue_len = 100;
- netif_napi_add(dev, &port->napi, eth_poll, NAPI_WEIGHT);
+ netif_napi_add(ndev, &port->napi, eth_poll, NAPI_WEIGHT);
- if (!(port->npe = npe_request(NPE_ID(port->id)))) {
- err = -EIO;
- goto err_free;
- }
+ if (!(port->npe = npe_request(NPE_ID(port->id))))
+ return -EIO;
- port->mem_res = request_mem_region(regs_phys, REGS_SIZE, dev->name);
+ port->mem_res = request_mem_region(regs_phys, REGS_SIZE, ndev->name);
if (!port->mem_res) {
err = -EBUSY;
goto err_npe_rel;
@@ -1429,9 +1448,9 @@ static int eth_init_one(struct platform_device *pdev)
port->plat = plat;
npe_port_tab[NPE_ID(port->id)] = port;
- memcpy(dev->dev_addr, plat->hwaddr, ETH_ALEN);
+ memcpy(ndev->dev_addr, plat->hwaddr, ETH_ALEN);
- platform_set_drvdata(pdev, dev);
+ platform_set_drvdata(pdev, ndev);
__raw_writel(DEFAULT_CORE_CNTRL | CORE_RESET,
&port->regs->core_control);
@@ -1441,7 +1460,7 @@ static int eth_init_one(struct platform_device *pdev)
snprintf(phy_id, MII_BUS_ID_SIZE + 3, PHY_ID_FMT,
mdio_bus->id, plat->phy);
- phydev = phy_connect(dev, phy_id, &ixp4xx_adjust_link,
+ phydev = phy_connect(ndev, phy_id, &ixp4xx_adjust_link,
PHY_INTERFACE_MODE_MII);
if (IS_ERR(phydev)) {
err = PTR_ERR(phydev);
@@ -1450,11 +1469,11 @@ static int eth_init_one(struct platform_device *pdev)
phydev->irq = PHY_POLL;
- if ((err = register_netdev(dev)))
+ if ((err = register_netdev(ndev)))
goto err_phy_dis;
- printk(KERN_INFO "%s: MII PHY %i on %s\n", dev->name, plat->phy,
- npe_name(port->npe));
+ netdev_info(ndev, "%s: MII PHY %i on %s\n", ndev->name, plat->phy,
+ npe_name(port->npe));
return 0;
@@ -1465,58 +1484,32 @@ err_free_mem:
release_resource(port->mem_res);
err_npe_rel:
npe_release(port->npe);
-err_free:
- free_netdev(dev);
return err;
}
-static int eth_remove_one(struct platform_device *pdev)
+static int ixp4xx_eth_remove(struct platform_device *pdev)
{
- struct net_device *dev = platform_get_drvdata(pdev);
- struct phy_device *phydev = dev->phydev;
- struct port *port = netdev_priv(dev);
+ struct net_device *ndev = platform_get_drvdata(pdev);
+ struct phy_device *phydev = ndev->phydev;
+ struct port *port = netdev_priv(ndev);
- unregister_netdev(dev);
+ unregister_netdev(ndev);
phy_disconnect(phydev);
+ ixp4xx_mdio_remove();
npe_port_tab[NPE_ID(port->id)] = NULL;
npe_release(port->npe);
release_resource(port->mem_res);
- free_netdev(dev);
return 0;
}
static struct platform_driver ixp4xx_eth_driver = {
.driver.name = DRV_NAME,
- .probe = eth_init_one,
- .remove = eth_remove_one,
+ .probe = ixp4xx_eth_probe,
+ .remove = ixp4xx_eth_remove,
};
-
-static int __init eth_init_module(void)
-{
- int err;
-
- /*
- * FIXME: we bail out on device tree boot but this really needs
- * to be fixed in a nicer way: this registers the MDIO bus before
- * even matching the driver infrastructure, we should only probe
- * detected hardware.
- */
- if (of_have_populated_dt())
- return -ENODEV;
- if ((err = ixp4xx_mdio_register()))
- return err;
- return platform_driver_register(&ixp4xx_eth_driver);
-}
-
-static void __exit eth_cleanup_module(void)
-{
- platform_driver_unregister(&ixp4xx_eth_driver);
- ixp4xx_mdio_remove();
-}
+module_platform_driver(ixp4xx_eth_driver);
MODULE_AUTHOR("Krzysztof Halasa");
MODULE_DESCRIPTION("Intel IXP4xx Ethernet driver");
MODULE_LICENSE("GPL v2");
MODULE_ALIAS("platform:ixp4xx_eth");
-module_init(eth_init_module);
-module_exit(eth_cleanup_module);
diff --git a/drivers/ptp/ptp_ixp46x.c b/drivers/net/ethernet/xscale/ptp_ixp46x.c
index 67028484e9a0..9ecc395239e9 100644
--- a/drivers/ptp/ptp_ixp46x.c
+++ b/drivers/net/ethernet/xscale/ptp_ixp46x.c
@@ -15,7 +15,8 @@
#include <linux/module.h>
#include <linux/ptp_clock_kernel.h>
-#include <mach/ixp46x_ts.h>
+
+#include "ixp46x_ts.h"
#define DRIVER "ptp_ixp46x"
#define N_EXT_TS 2
diff --git a/drivers/net/fjes/fjes_ethtool.c b/drivers/net/fjes/fjes_ethtool.c
index 09f3604cfbf8..746736c83873 100644
--- a/drivers/net/fjes/fjes_ethtool.c
+++ b/drivers/net/fjes/fjes_ethtool.c
@@ -21,7 +21,7 @@ struct fjes_stats {
#define FJES_STAT(name, stat) { \
.stat_string = name, \
- .sizeof_stat = FIELD_SIZEOF(struct fjes_adapter, stat), \
+ .sizeof_stat = sizeof_field(struct fjes_adapter, stat), \
.stat_offset = offsetof(struct fjes_adapter, stat) \
}
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index b517c1af9de0..8c810edece86 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -48,7 +48,7 @@ static void fjes_get_stats64(struct net_device *, struct rtnl_link_stats64 *);
static int fjes_change_mtu(struct net_device *, int);
static int fjes_vlan_rx_add_vid(struct net_device *, __be16 proto, u16);
static int fjes_vlan_rx_kill_vid(struct net_device *, __be16 proto, u16);
-static void fjes_tx_retry(struct net_device *);
+static void fjes_tx_retry(struct net_device *, unsigned int txqueue);
static int fjes_acpi_add(struct acpi_device *);
static int fjes_acpi_remove(struct acpi_device *);
@@ -166,6 +166,9 @@ static int fjes_acpi_add(struct acpi_device *device)
/* create platform_device */
plat_dev = platform_device_register_simple(DRV_NAME, 0, fjes_resource,
ARRAY_SIZE(fjes_resource));
+ if (IS_ERR(plat_dev))
+ return PTR_ERR(plat_dev);
+
device->driver_data = plat_dev;
return 0;
@@ -792,7 +795,7 @@ fjes_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
return ret;
}
-static void fjes_tx_retry(struct net_device *netdev)
+static void fjes_tx_retry(struct net_device *netdev, unsigned int txqueue)
{
struct netdev_queue *queue = netdev_get_tx_queue(netdev, 0);
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 5c6b7fc04ea6..75757e9954ba 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1156,7 +1156,7 @@ static void geneve_setup(struct net_device *dev)
static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_ID] = { .type = NLA_U32 },
- [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
[IFLA_GENEVE_TTL] = { .type = NLA_U8 },
[IFLA_GENEVE_TOS] = { .type = NLA_U8 },
diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
index ecfe26215935..bc85db8e6cdf 100644
--- a/drivers/net/gtp.c
+++ b/drivers/net/gtp.c
@@ -38,7 +38,6 @@ struct pdp_ctx {
struct hlist_node hlist_addr;
union {
- u64 tid;
struct {
u64 tid;
u16 flow;
@@ -541,7 +540,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
mtu = dst_mtu(&rt->dst);
}
- rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu);
+ rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false);
if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
mtu < ntohs(iph->tot_len)) {
@@ -641,9 +640,16 @@ static void gtp_link_setup(struct net_device *dev)
}
static int gtp_hashtable_new(struct gtp_dev *gtp, int hsize);
-static void gtp_hashtable_free(struct gtp_dev *gtp);
static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[]);
+static void gtp_destructor(struct net_device *dev)
+{
+ struct gtp_dev *gtp = netdev_priv(dev);
+
+ kfree(gtp->addr_hash);
+ kfree(gtp->tid_hash);
+}
+
static int gtp_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
@@ -661,10 +667,13 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
if (err < 0)
return err;
- if (!data[IFLA_GTP_PDP_HASHSIZE])
+ if (!data[IFLA_GTP_PDP_HASHSIZE]) {
hashsize = 1024;
- else
+ } else {
hashsize = nla_get_u32(data[IFLA_GTP_PDP_HASHSIZE]);
+ if (!hashsize)
+ hashsize = 1024;
+ }
err = gtp_hashtable_new(gtp, hashsize);
if (err < 0)
@@ -678,13 +687,15 @@ static int gtp_newlink(struct net *src_net, struct net_device *dev,
gn = net_generic(dev_net(dev), gtp_net_id);
list_add_rcu(&gtp->list, &gn->gtp_dev_list);
+ dev->priv_destructor = gtp_destructor;
netdev_dbg(dev, "registered new GTP interface\n");
return 0;
out_hashtable:
- gtp_hashtable_free(gtp);
+ kfree(gtp->addr_hash);
+ kfree(gtp->tid_hash);
out_encap:
gtp_encap_disable(gtp);
return err;
@@ -693,8 +704,13 @@ out_encap:
static void gtp_dellink(struct net_device *dev, struct list_head *head)
{
struct gtp_dev *gtp = netdev_priv(dev);
+ struct pdp_ctx *pctx;
+ int i;
+
+ for (i = 0; i < gtp->hash_size; i++)
+ hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
+ pdp_context_delete(pctx);
- gtp_hashtable_free(gtp);
list_del_rcu(&gtp->list);
unregister_netdevice_queue(dev, head);
}
@@ -772,20 +788,6 @@ err1:
return -ENOMEM;
}
-static void gtp_hashtable_free(struct gtp_dev *gtp)
-{
- struct pdp_ctx *pctx;
- int i;
-
- for (i = 0; i < gtp->hash_size; i++)
- hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid)
- pdp_context_delete(pctx);
-
- synchronize_rcu();
- kfree(gtp->addr_hash);
- kfree(gtp->tid_hash);
-}
-
static struct sock *gtp_encap_enable_socket(int fd, int type,
struct gtp_dev *gtp)
{
@@ -811,7 +813,7 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
lock_sock(sock->sk);
if (sock->sk->sk_user_data) {
sk = ERR_PTR(-EBUSY);
- goto out_sock;
+ goto out_rel_sock;
}
sk = sock->sk;
@@ -824,8 +826,9 @@ static struct sock *gtp_encap_enable_socket(int fd, int type,
setup_udp_tunnel_sock(sock_net(sock->sk), sock, &tuncfg);
-out_sock:
+out_rel_sock:
release_sock(sock->sk);
+out_sock:
sockfd_put(sock);
return sk;
}
@@ -849,8 +852,7 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
sk1u = gtp_encap_enable_socket(fd1, UDP_ENCAP_GTP1U, gtp);
if (IS_ERR(sk1u)) {
- if (sk0)
- gtp_encap_disable_sock(sk0);
+ gtp_encap_disable_sock(sk0);
return PTR_ERR(sk1u);
}
}
@@ -858,10 +860,8 @@ static int gtp_encap_enable(struct gtp_dev *gtp, struct nlattr *data[])
if (data[IFLA_GTP_ROLE]) {
role = nla_get_u32(data[IFLA_GTP_ROLE]);
if (role > GTP_ROLE_SGSN) {
- if (sk0)
- gtp_encap_disable_sock(sk0);
- if (sk1u)
- gtp_encap_disable_sock(sk1u);
+ gtp_encap_disable_sock(sk0);
+ gtp_encap_disable_sock(sk1u);
return -EINVAL;
}
}
@@ -926,24 +926,31 @@ static void ipv4_pdp_fill(struct pdp_ctx *pctx, struct genl_info *info)
}
}
-static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
- struct genl_info *info)
+static int gtp_pdp_add(struct gtp_dev *gtp, struct sock *sk,
+ struct genl_info *info)
{
+ struct pdp_ctx *pctx, *pctx_tid = NULL;
struct net_device *dev = gtp->dev;
u32 hash_ms, hash_tid = 0;
- struct pdp_ctx *pctx;
+ unsigned int version;
bool found = false;
__be32 ms_addr;
ms_addr = nla_get_be32(info->attrs[GTPA_MS_ADDRESS]);
hash_ms = ipv4_hashfn(ms_addr) % gtp->hash_size;
+ version = nla_get_u32(info->attrs[GTPA_VERSION]);
- hlist_for_each_entry_rcu(pctx, &gtp->addr_hash[hash_ms], hlist_addr) {
- if (pctx->ms_addr_ip4.s_addr == ms_addr) {
- found = true;
- break;
- }
- }
+ pctx = ipv4_pdp_find(gtp, ms_addr);
+ if (pctx)
+ found = true;
+ if (version == GTP_V0)
+ pctx_tid = gtp0_pdp_find(gtp,
+ nla_get_u64(info->attrs[GTPA_TID]));
+ else if (version == GTP_V1)
+ pctx_tid = gtp1_pdp_find(gtp,
+ nla_get_u32(info->attrs[GTPA_I_TEI]));
+ if (pctx_tid)
+ found = true;
if (found) {
if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
@@ -951,6 +958,11 @@ static int ipv4_pdp_add(struct gtp_dev *gtp, struct sock *sk,
if (info->nlhdr->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ if (pctx && pctx_tid)
+ return -EEXIST;
+ if (!pctx)
+ pctx = pctx_tid;
+
ipv4_pdp_fill(pctx, info);
if (pctx->gtp_version == GTP_V0)
@@ -1074,7 +1086,7 @@ static int gtp_genl_new_pdp(struct sk_buff *skb, struct genl_info *info)
goto out_unlock;
}
- err = ipv4_pdp_add(gtp, sk, info);
+ err = gtp_pdp_add(gtp, sk, info);
out_unlock:
rcu_read_unlock();
@@ -1232,43 +1244,46 @@ static int gtp_genl_dump_pdp(struct sk_buff *skb,
struct netlink_callback *cb)
{
struct gtp_dev *last_gtp = (struct gtp_dev *)cb->args[2], *gtp;
+ int i, j, bucket = cb->args[0], skip = cb->args[1];
struct net *net = sock_net(skb->sk);
- struct gtp_net *gn = net_generic(net, gtp_net_id);
- unsigned long tid = cb->args[1];
- int i, k = cb->args[0], ret;
struct pdp_ctx *pctx;
+ struct gtp_net *gn;
+
+ gn = net_generic(net, gtp_net_id);
if (cb->args[4])
return 0;
+ rcu_read_lock();
list_for_each_entry_rcu(gtp, &gn->gtp_dev_list, list) {
if (last_gtp && last_gtp != gtp)
continue;
else
last_gtp = NULL;
- for (i = k; i < gtp->hash_size; i++) {
- hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i], hlist_tid) {
- if (tid && tid != pctx->u.tid)
- continue;
- else
- tid = 0;
-
- ret = gtp_genl_fill_info(skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- cb->nlh->nlmsg_type, pctx);
- if (ret < 0) {
+ for (i = bucket; i < gtp->hash_size; i++) {
+ j = 0;
+ hlist_for_each_entry_rcu(pctx, &gtp->tid_hash[i],
+ hlist_tid) {
+ if (j >= skip &&
+ gtp_genl_fill_info(skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ cb->nlh->nlmsg_type, pctx)) {
cb->args[0] = i;
- cb->args[1] = pctx->u.tid;
+ cb->args[1] = j;
cb->args[2] = (unsigned long)gtp;
goto out;
}
+ j++;
}
+ skip = 0;
}
+ bucket = 0;
}
cb->args[4] = 1;
out:
+ rcu_read_unlock();
return skb->len;
}
diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 23281aeeb222..71d6629e65c9 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -654,10 +654,10 @@ static void sixpack_close(struct tty_struct *tty)
{
struct sixpack *sp;
- write_lock_bh(&disc_data_lock);
+ write_lock_irq(&disc_data_lock);
sp = tty->disc_data;
tty->disc_data = NULL;
- write_unlock_bh(&disc_data_lock);
+ write_unlock_irq(&disc_data_lock);
if (!sp)
return;
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index df495b5595f5..e7413a643929 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -687,8 +687,6 @@ struct net_device *hdlcdrv_register(const struct hdlcdrv_ops *ops,
struct hdlcdrv_state *s;
int err;
- BUG_ON(ops == NULL);
-
if (privsize < sizeof(struct hdlcdrv_state))
privsize = sizeof(struct hdlcdrv_state);
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index c5bfa19ddb93..deef14215110 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -773,10 +773,10 @@ static void mkiss_close(struct tty_struct *tty)
{
struct mkiss *ax;
- write_lock_bh(&disc_data_lock);
+ write_lock_irq(&disc_data_lock);
ax = tty->disc_data;
tty->disc_data = NULL;
- write_unlock_bh(&disc_data_lock);
+ write_unlock_irq(&disc_data_lock);
if (!ax)
return;
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 9caa876ce6e8..dc44819946e6 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -169,7 +169,6 @@ struct rndis_device {
u8 hw_mac_adr[ETH_ALEN];
u8 rss_key[NETVSC_HASH_KEYLEN];
- u16 rx_table[ITAB_NUM];
};
@@ -940,6 +939,8 @@ struct net_device_context {
u32 tx_table[VRSS_SEND_TAB_SIZE];
+ u16 rx_table[ITAB_NUM];
+
/* Ethtool settings */
u8 duplex;
u32 speed;
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index eff8fef4f775..f3f9eb8a402a 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -571,7 +571,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
/* Use the skb control buffer for building up the packet */
BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
packet = (struct hv_netvsc_packet *)skb->cb;
packet->q_idx = skb_get_queue_mapping(skb);
@@ -1662,7 +1662,7 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
rndis_dev = ndev->extension;
if (indir) {
for (i = 0; i < ITAB_NUM; i++)
- indir[i] = rndis_dev->rx_table[i];
+ indir[i] = ndc->rx_table[i];
}
if (key)
@@ -1692,7 +1692,7 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
return -EINVAL;
for (i = 0; i < ITAB_NUM; i++)
- rndis_dev->rx_table[i] = indir[i];
+ ndc->rx_table[i] = indir[i];
}
if (!key) {
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 206b4e77eaf0..857c4bea451c 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -773,6 +773,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev,
const u8 *rss_key, u16 flag)
{
struct net_device *ndev = rdev->ndev;
+ struct net_device_context *ndc = netdev_priv(ndev);
struct rndis_request *request;
struct rndis_set_request *set;
struct rndis_set_complete *set_complete;
@@ -812,7 +813,7 @@ static int rndis_set_rss_param_msg(struct rndis_device *rdev,
/* Set indirection table entries */
itab = (u32 *)(rssp + 1);
for (i = 0; i < ITAB_NUM; i++)
- itab[i] = rdev->rx_table[i];
+ itab[i] = ndc->rx_table[i];
/* Set hask key values */
keyp = (u8 *)((unsigned long)rssp + rssp->hashkey_offset);
@@ -1171,6 +1172,9 @@ int rndis_set_subchannel(struct net_device *ndev,
wait_event(nvdev->subchan_open,
atomic_read(&nvdev->open_chn) == nvdev->num_chn);
+ for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
+ ndev_ctx->tx_table[i] = i % nvdev->num_chn;
+
/* ignore failures from setting rss parameters, still have channels */
if (dev_info)
rndis_filter_set_rss_param(rdev, dev_info->rss_key);
@@ -1180,9 +1184,6 @@ int rndis_set_subchannel(struct net_device *ndev,
netif_set_real_num_tx_queues(ndev, nvdev->num_chn);
netif_set_real_num_rx_queues(ndev, nvdev->num_chn);
- for (i = 0; i < VRSS_SEND_TAB_SIZE; i++)
- ndev_ctx->tx_table[i] = i % nvdev->num_chn;
-
return 0;
}
@@ -1312,6 +1313,7 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
struct netvsc_device_info *device_info)
{
struct net_device *net = hv_get_drvdata(dev);
+ struct net_device_context *ndc = netdev_priv(net);
struct netvsc_device *net_device;
struct rndis_device *rndis_device;
struct ndis_recv_scale_cap rsscap;
@@ -1398,9 +1400,11 @@ struct netvsc_device *rndis_filter_device_add(struct hv_device *dev,
/* We will use the given number of channels if available. */
net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
- for (i = 0; i < ITAB_NUM; i++)
- rndis_device->rx_table[i] = ethtool_rxfh_indir_default(
+ if (!netif_is_rxfh_configured(net)) {
+ for (i = 0; i < ITAB_NUM; i++)
+ ndc->rx_table[i] = ethtool_rxfh_indir_default(
i, net_device->num_chn);
+ }
atomic_set(&net_device->open_chn, 1);
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 05631d97eeb4..67a830dd0d30 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -259,7 +259,7 @@ static void macvlan_broadcast(struct sk_buff *skb,
struct net_device *src,
enum macvlan_mode mode)
{
- const struct ethhdr *eth = eth_hdr(skb);
+ const struct ethhdr *eth = skb_eth_hdr(skb);
const struct macvlan_dev *vlan;
struct sk_buff *nskb;
unsigned int i;
@@ -1036,8 +1036,8 @@ static int macvlan_ethtool_get_ts_info(struct net_device *dev,
const struct ethtool_ops *ops = real_dev->ethtool_ops;
struct phy_device *phydev = real_dev->phydev;
- if (phydev && phydev->drv && phydev->drv->ts_info) {
- return phydev->drv->ts_info(phydev, info);
+ if (phy_has_tsinfo(phydev)) {
+ return phy_ts_info(phydev, info);
} else if (ops->get_ts_info) {
return ops->get_ts_info(real_dev, info);
} else {
diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 059711edfc61..c572960bb54e 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -270,7 +270,7 @@ struct nsim_trap_data {
};
/* All driver-specific traps must be documented in
- * Documentation/networking/devlink-trap-netdevsim.rst
+ * Documentation/networking/devlink/netdevsim.rst
*/
enum {
NSIM_TRAP_ID_BASE = DEVLINK_TRAP_GENERIC_ID_MAX,
diff --git a/drivers/net/netdevsim/fib.c b/drivers/net/netdevsim/fib.c
index 13540dee7364..b5df308b4e33 100644
--- a/drivers/net/netdevsim/fib.c
+++ b/drivers/net/netdevsim/fib.c
@@ -177,10 +177,9 @@ static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event,
event == FIB_EVENT_RULE_ADD);
break;
- case FIB_EVENT_ENTRY_ADD: /* fall through */
+ case FIB_EVENT_ENTRY_REPLACE: /* fall through */
case FIB_EVENT_ENTRY_DEL:
- err = nsim_fib_event(data, info,
- event == FIB_EVENT_ENTRY_ADD);
+ err = nsim_fib_event(data, info, event != FIB_EVENT_ENTRY_DEL);
break;
}
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 5848219005d7..2e016271e126 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -324,6 +324,12 @@ config BROADCOM_PHY
Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
BCM5481, BCM54810 and BCM5482 PHYs.
+config BCM84881_PHY
+ bool "Broadcom BCM84881 PHY"
+ depends on PHYLIB=y
+ ---help---
+ Support the Broadcom BCM84881 PHY.
+
config CICADA_PHY
tristate "Cicada PHYs"
---help---
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index b433ec3bf9a6..fe5badf13b65 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -43,6 +43,8 @@ obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o
obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o
obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o
+obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += mii_timestamper.o
+
obj-$(CONFIG_SFP) += sfp.o
sfp-obj-$(CONFIG_SFP) += sfp-bus.o
obj-y += $(sfp-obj-y) $(sfp-obj-m)
@@ -62,6 +64,7 @@ obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o
obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o
obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o
obj-$(CONFIG_BROADCOM_PHY) += broadcom.o
+obj-$(CONFIG_BCM84881_PHY) += bcm84881.o
obj-$(CONFIG_CICADA_PHY) += cicada.o
obj-$(CONFIG_CORTINA_PHY) += cortina.o
obj-$(CONFIG_DAVICOM_PHY) += davicom.o
diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c
index 3b29d381116f..31927b2c7d5a 100644
--- a/drivers/net/phy/aquantia_main.c
+++ b/drivers/net/phy/aquantia_main.c
@@ -358,9 +358,11 @@ static int aqr107_read_status(struct phy_device *phydev)
switch (FIELD_GET(MDIO_PHYXS_VEND_IF_STATUS_TYPE_MASK, val)) {
case MDIO_PHYXS_VEND_IF_STATUS_TYPE_KR:
- case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI:
phydev->interface = PHY_INTERFACE_MODE_10GKR;
break;
+ case MDIO_PHYXS_VEND_IF_STATUS_TYPE_XFI:
+ phydev->interface = PHY_INTERFACE_MODE_10GBASER;
+ break;
case MDIO_PHYXS_VEND_IF_STATUS_TYPE_USXGMII:
phydev->interface = PHY_INTERFACE_MODE_USXGMII;
break;
@@ -493,7 +495,8 @@ static int aqr107_config_init(struct phy_device *phydev)
phydev->interface != PHY_INTERFACE_MODE_2500BASEX &&
phydev->interface != PHY_INTERFACE_MODE_XGMII &&
phydev->interface != PHY_INTERFACE_MODE_USXGMII &&
- phydev->interface != PHY_INTERFACE_MODE_10GKR)
+ phydev->interface != PHY_INTERFACE_MODE_10GKR &&
+ phydev->interface != PHY_INTERFACE_MODE_10GBASER)
return -ENODEV;
WARN(phydev->interface == PHY_INTERFACE_MODE_XGMII,
@@ -627,6 +630,8 @@ static struct phy_driver aqr_driver[] = {
.config_intr = aqr_config_intr,
.ack_interrupt = aqr_ack_interrupt,
.read_status = aqr_read_status,
+ .suspend = aqr107_suspend,
+ .resume = aqr107_resume,
},
{
PHY_ID_MATCH_MODEL(PHY_ID_AQR106),
diff --git a/drivers/net/phy/bcm84881.c b/drivers/net/phy/bcm84881.c
new file mode 100644
index 000000000000..14d55a77eb28
--- /dev/null
+++ b/drivers/net/phy/bcm84881.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+// Broadcom BCM84881 NBASE-T PHY driver, as found on a SFP+ module.
+// Copyright (C) 2019 Russell King, Deep Blue Solutions Ltd.
+//
+// Like the Marvell 88x3310, the Broadcom 84881 changes its host-side
+// interface according to the operating speed between 10GBASE-R,
+// 2500BASE-X and SGMII (but unlike the 88x3310, without the control
+// word).
+//
+// This driver only supports those aspects of the PHY that I'm able to
+// observe and test with the SFP+ module, which is an incomplete subset
+// of what this PHY is able to support. For example, I only assume it
+// supports a single lane Serdes connection, but it may be that the PHY
+// is able to support more than that.
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+
+enum {
+ MDIO_AN_C22 = 0xffe0,
+};
+
+static int bcm84881_wait_init(struct phy_device *phydev)
+{
+ unsigned int tries = 20;
+ int ret, val;
+
+ do {
+ val = phy_read_mmd(phydev, MDIO_MMD_PMAPMD, MDIO_CTRL1);
+ if (val < 0) {
+ ret = val;
+ break;
+ }
+ if (!(val & MDIO_CTRL1_RESET)) {
+ ret = 0;
+ break;
+ }
+ if (!--tries) {
+ ret = -ETIMEDOUT;
+ break;
+ }
+ msleep(100);
+ } while (1);
+
+ if (ret)
+ phydev_err(phydev, "%s failed: %d\n", __func__, ret);
+
+ return ret;
+}
+
+static int bcm84881_config_init(struct phy_device *phydev)
+{
+ switch (phydev->interface) {
+ case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_2500BASEX:
+ case PHY_INTERFACE_MODE_10GBASER:
+ break;
+ default:
+ return -ENODEV;
+ }
+ return 0;
+}
+
+static int bcm84881_probe(struct phy_device *phydev)
+{
+ /* This driver requires PMAPMD and AN blocks */
+ const u32 mmd_mask = MDIO_DEVS_PMAPMD | MDIO_DEVS_AN;
+
+ if (!phydev->is_c45 ||
+ (phydev->c45_ids.devices_in_package & mmd_mask) != mmd_mask)
+ return -ENODEV;
+
+ return 0;
+}
+
+static int bcm84881_get_features(struct phy_device *phydev)
+{
+ int ret;
+
+ ret = genphy_c45_pma_read_abilities(phydev);
+ if (ret)
+ return ret;
+
+ /* Although the PHY sets bit 1.11.8, it does not support 10M modes */
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT,
+ phydev->supported);
+ linkmode_clear_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT,
+ phydev->supported);
+
+ return 0;
+}
+
+static int bcm84881_config_aneg(struct phy_device *phydev)
+{
+ bool changed = false;
+ u32 adv;
+ int ret;
+
+ /* Wait for the PHY to finish initialising, otherwise our
+ * advertisement may be overwritten.
+ */
+ ret = bcm84881_wait_init(phydev);
+ if (ret)
+ return ret;
+
+ /* We don't support manual MDI control */
+ phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+
+ /* disabled autoneg doesn't seem to work with this PHY */
+ if (phydev->autoneg == AUTONEG_DISABLE)
+ return -EINVAL;
+
+ ret = genphy_c45_an_config_aneg(phydev);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ changed = true;
+
+ adv = linkmode_adv_to_mii_ctrl1000_t(phydev->advertising);
+ ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN,
+ MDIO_AN_C22 + MII_CTRL1000,
+ ADVERTISE_1000FULL | ADVERTISE_1000HALF,
+ adv);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ changed = true;
+
+ return genphy_c45_check_and_restart_aneg(phydev, changed);
+}
+
+static int bcm84881_aneg_done(struct phy_device *phydev)
+{
+ int bmsr, val;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
+ if (val < 0)
+ return val;
+
+ bmsr = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_C22 + MII_BMSR);
+ if (bmsr < 0)
+ return val;
+
+ return !!(val & MDIO_AN_STAT1_COMPLETE) &&
+ !!(bmsr & BMSR_ANEGCOMPLETE);
+}
+
+static int bcm84881_read_status(struct phy_device *phydev)
+{
+ unsigned int mode;
+ int bmsr, val;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_CTRL1);
+ if (val < 0)
+ return val;
+
+ if (val & MDIO_AN_CTRL1_RESTART) {
+ phydev->link = 0;
+ return 0;
+ }
+
+ val = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_STAT1);
+ if (val < 0)
+ return val;
+
+ bmsr = phy_read_mmd(phydev, MDIO_MMD_AN, MDIO_AN_C22 + MII_BMSR);
+ if (bmsr < 0)
+ return val;
+
+ phydev->autoneg_complete = !!(val & MDIO_AN_STAT1_COMPLETE) &&
+ !!(bmsr & BMSR_ANEGCOMPLETE);
+ phydev->link = !!(val & MDIO_STAT1_LSTATUS) &&
+ !!(bmsr & BMSR_LSTATUS);
+ if (phydev->autoneg == AUTONEG_ENABLE && !phydev->autoneg_complete)
+ phydev->link = false;
+
+ if (!phydev->link)
+ return 0;
+
+ linkmode_zero(phydev->lp_advertising);
+ phydev->speed = SPEED_UNKNOWN;
+ phydev->duplex = DUPLEX_UNKNOWN;
+ phydev->pause = 0;
+ phydev->asym_pause = 0;
+ phydev->mdix = 0;
+
+ if (phydev->autoneg_complete) {
+ val = genphy_c45_read_lpa(phydev);
+ if (val < 0)
+ return val;
+
+ val = phy_read_mmd(phydev, MDIO_MMD_AN,
+ MDIO_AN_C22 + MII_STAT1000);
+ if (val < 0)
+ return val;
+
+ mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, val);
+
+ if (phydev->autoneg == AUTONEG_ENABLE)
+ phy_resolve_aneg_linkmode(phydev);
+ }
+
+ if (phydev->autoneg == AUTONEG_DISABLE) {
+ /* disabled autoneg doesn't seem to work, so force the link
+ * down.
+ */
+ phydev->link = 0;
+ return 0;
+ }
+
+ /* Set the host link mode - we set the phy interface mode and
+ * the speed according to this register so that downshift works.
+ * We leave the duplex setting as per the resolution from the
+ * above.
+ */
+ val = phy_read_mmd(phydev, MDIO_MMD_VEND1, 0x4011);
+ mode = (val & 0x1e) >> 1;
+ if (mode == 1 || mode == 2)
+ phydev->interface = PHY_INTERFACE_MODE_SGMII;
+ else if (mode == 3)
+ phydev->interface = PHY_INTERFACE_MODE_10GBASER;
+ else if (mode == 4)
+ phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
+ switch (mode & 7) {
+ case 1:
+ phydev->speed = SPEED_100;
+ break;
+ case 2:
+ phydev->speed = SPEED_1000;
+ break;
+ case 3:
+ phydev->speed = SPEED_10000;
+ break;
+ case 4:
+ phydev->speed = SPEED_2500;
+ break;
+ case 5:
+ phydev->speed = SPEED_5000;
+ break;
+ }
+
+ return genphy_c45_read_mdix(phydev);
+}
+
+static struct phy_driver bcm84881_drivers[] = {
+ {
+ .phy_id = 0xae025150,
+ .phy_id_mask = 0xfffffff0,
+ .name = "Broadcom BCM84881",
+ .config_init = bcm84881_config_init,
+ .probe = bcm84881_probe,
+ .get_features = bcm84881_get_features,
+ .config_aneg = bcm84881_config_aneg,
+ .aneg_done = bcm84881_aneg_done,
+ .read_status = bcm84881_read_status,
+ },
+};
+
+module_phy_driver(bcm84881_drivers);
+
+/* FIXME: module auto-loading for Clause 45 PHYs seems non-functional */
+static struct mdio_device_id __maybe_unused bcm84881_tbl[] = {
+ { 0xae025150, 0xfffffff0 },
+ { },
+};
+MODULE_AUTHOR("Russell King");
+MODULE_DESCRIPTION("Broadcom BCM84881 PHY driver");
+MODULE_DEVICE_TABLE(mdio, bcm84881_tbl);
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index 8f241b57fcf6..ac72a324fcd1 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -98,6 +98,7 @@ struct dp83640_private {
struct list_head list;
struct dp83640_clock *clock;
struct phy_device *phydev;
+ struct mii_timestamper mii_ts;
struct delayed_work ts_work;
int hwts_tx_en;
int hwts_rx_en;
@@ -1131,96 +1132,6 @@ static void dp83640_clock_put(struct dp83640_clock *clock)
mutex_unlock(&clock->clock_lock);
}
-static int dp83640_probe(struct phy_device *phydev)
-{
- struct dp83640_clock *clock;
- struct dp83640_private *dp83640;
- int err = -ENOMEM, i;
-
- if (phydev->mdio.addr == BROADCAST_ADDR)
- return 0;
-
- clock = dp83640_clock_get_bus(phydev->mdio.bus);
- if (!clock)
- goto no_clock;
-
- dp83640 = kzalloc(sizeof(struct dp83640_private), GFP_KERNEL);
- if (!dp83640)
- goto no_memory;
-
- dp83640->phydev = phydev;
- INIT_DELAYED_WORK(&dp83640->ts_work, rx_timestamp_work);
-
- INIT_LIST_HEAD(&dp83640->rxts);
- INIT_LIST_HEAD(&dp83640->rxpool);
- for (i = 0; i < MAX_RXTS; i++)
- list_add(&dp83640->rx_pool_data[i].list, &dp83640->rxpool);
-
- phydev->priv = dp83640;
-
- spin_lock_init(&dp83640->rx_lock);
- skb_queue_head_init(&dp83640->rx_queue);
- skb_queue_head_init(&dp83640->tx_queue);
-
- dp83640->clock = clock;
-
- if (choose_this_phy(clock, phydev)) {
- clock->chosen = dp83640;
- clock->ptp_clock = ptp_clock_register(&clock->caps,
- &phydev->mdio.dev);
- if (IS_ERR(clock->ptp_clock)) {
- err = PTR_ERR(clock->ptp_clock);
- goto no_register;
- }
- } else
- list_add_tail(&dp83640->list, &clock->phylist);
-
- dp83640_clock_put(clock);
- return 0;
-
-no_register:
- clock->chosen = NULL;
- kfree(dp83640);
-no_memory:
- dp83640_clock_put(clock);
-no_clock:
- return err;
-}
-
-static void dp83640_remove(struct phy_device *phydev)
-{
- struct dp83640_clock *clock;
- struct list_head *this, *next;
- struct dp83640_private *tmp, *dp83640 = phydev->priv;
-
- if (phydev->mdio.addr == BROADCAST_ADDR)
- return;
-
- enable_status_frames(phydev, false);
- cancel_delayed_work_sync(&dp83640->ts_work);
-
- skb_queue_purge(&dp83640->rx_queue);
- skb_queue_purge(&dp83640->tx_queue);
-
- clock = dp83640_clock_get(dp83640->clock);
-
- if (dp83640 == clock->chosen) {
- ptp_clock_unregister(clock->ptp_clock);
- clock->chosen = NULL;
- } else {
- list_for_each_safe(this, next, &clock->phylist) {
- tmp = list_entry(this, struct dp83640_private, list);
- if (tmp == dp83640) {
- list_del_init(&tmp->list);
- break;
- }
- }
- }
-
- dp83640_clock_put(clock);
- kfree(dp83640);
-}
-
static int dp83640_soft_reset(struct phy_device *phydev)
{
int ret;
@@ -1319,9 +1230,10 @@ static int dp83640_config_intr(struct phy_device *phydev)
}
}
-static int dp83640_hwtstamp(struct phy_device *phydev, struct ifreq *ifr)
+static int dp83640_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
{
- struct dp83640_private *dp83640 = phydev->priv;
+ struct dp83640_private *dp83640 =
+ container_of(mii_ts, struct dp83640_private, mii_ts);
struct hwtstamp_config cfg;
u16 txcfg0, rxcfg0;
@@ -1397,8 +1309,8 @@ static int dp83640_hwtstamp(struct phy_device *phydev, struct ifreq *ifr)
mutex_lock(&dp83640->clock->extreg_lock);
- ext_write(0, phydev, PAGE5, PTP_TXCFG0, txcfg0);
- ext_write(0, phydev, PAGE5, PTP_RXCFG0, rxcfg0);
+ ext_write(0, dp83640->phydev, PAGE5, PTP_TXCFG0, txcfg0);
+ ext_write(0, dp83640->phydev, PAGE5, PTP_RXCFG0, rxcfg0);
mutex_unlock(&dp83640->clock->extreg_lock);
@@ -1428,10 +1340,11 @@ static void rx_timestamp_work(struct work_struct *work)
schedule_delayed_work(&dp83640->ts_work, SKB_TIMESTAMP_TIMEOUT);
}
-static bool dp83640_rxtstamp(struct phy_device *phydev,
+static bool dp83640_rxtstamp(struct mii_timestamper *mii_ts,
struct sk_buff *skb, int type)
{
- struct dp83640_private *dp83640 = phydev->priv;
+ struct dp83640_private *dp83640 =
+ container_of(mii_ts, struct dp83640_private, mii_ts);
struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb;
struct list_head *this, *next;
struct rxts *rxts;
@@ -1477,11 +1390,12 @@ static bool dp83640_rxtstamp(struct phy_device *phydev,
return true;
}
-static void dp83640_txtstamp(struct phy_device *phydev,
+static void dp83640_txtstamp(struct mii_timestamper *mii_ts,
struct sk_buff *skb, int type)
{
struct dp83640_skb_info *skb_info = (struct dp83640_skb_info *)skb->cb;
- struct dp83640_private *dp83640 = phydev->priv;
+ struct dp83640_private *dp83640 =
+ container_of(mii_ts, struct dp83640_private, mii_ts);
switch (dp83640->hwts_tx_en) {
@@ -1504,9 +1418,11 @@ static void dp83640_txtstamp(struct phy_device *phydev,
}
}
-static int dp83640_ts_info(struct phy_device *dev, struct ethtool_ts_info *info)
+static int dp83640_ts_info(struct mii_timestamper *mii_ts,
+ struct ethtool_ts_info *info)
{
- struct dp83640_private *dp83640 = dev->priv;
+ struct dp83640_private *dp83640 =
+ container_of(mii_ts, struct dp83640_private, mii_ts);
info->so_timestamping =
SOF_TIMESTAMPING_TX_HARDWARE |
@@ -1526,6 +1442,103 @@ static int dp83640_ts_info(struct phy_device *dev, struct ethtool_ts_info *info)
return 0;
}
+static int dp83640_probe(struct phy_device *phydev)
+{
+ struct dp83640_clock *clock;
+ struct dp83640_private *dp83640;
+ int err = -ENOMEM, i;
+
+ if (phydev->mdio.addr == BROADCAST_ADDR)
+ return 0;
+
+ clock = dp83640_clock_get_bus(phydev->mdio.bus);
+ if (!clock)
+ goto no_clock;
+
+ dp83640 = kzalloc(sizeof(struct dp83640_private), GFP_KERNEL);
+ if (!dp83640)
+ goto no_memory;
+
+ dp83640->phydev = phydev;
+ dp83640->mii_ts.rxtstamp = dp83640_rxtstamp;
+ dp83640->mii_ts.txtstamp = dp83640_txtstamp;
+ dp83640->mii_ts.hwtstamp = dp83640_hwtstamp;
+ dp83640->mii_ts.ts_info = dp83640_ts_info;
+
+ INIT_DELAYED_WORK(&dp83640->ts_work, rx_timestamp_work);
+ INIT_LIST_HEAD(&dp83640->rxts);
+ INIT_LIST_HEAD(&dp83640->rxpool);
+ for (i = 0; i < MAX_RXTS; i++)
+ list_add(&dp83640->rx_pool_data[i].list, &dp83640->rxpool);
+
+ phydev->mii_ts = &dp83640->mii_ts;
+ phydev->priv = dp83640;
+
+ spin_lock_init(&dp83640->rx_lock);
+ skb_queue_head_init(&dp83640->rx_queue);
+ skb_queue_head_init(&dp83640->tx_queue);
+
+ dp83640->clock = clock;
+
+ if (choose_this_phy(clock, phydev)) {
+ clock->chosen = dp83640;
+ clock->ptp_clock = ptp_clock_register(&clock->caps,
+ &phydev->mdio.dev);
+ if (IS_ERR(clock->ptp_clock)) {
+ err = PTR_ERR(clock->ptp_clock);
+ goto no_register;
+ }
+ } else
+ list_add_tail(&dp83640->list, &clock->phylist);
+
+ dp83640_clock_put(clock);
+ return 0;
+
+no_register:
+ clock->chosen = NULL;
+ kfree(dp83640);
+no_memory:
+ dp83640_clock_put(clock);
+no_clock:
+ return err;
+}
+
+static void dp83640_remove(struct phy_device *phydev)
+{
+ struct dp83640_clock *clock;
+ struct list_head *this, *next;
+ struct dp83640_private *tmp, *dp83640 = phydev->priv;
+
+ if (phydev->mdio.addr == BROADCAST_ADDR)
+ return;
+
+ phydev->mii_ts = NULL;
+
+ enable_status_frames(phydev, false);
+ cancel_delayed_work_sync(&dp83640->ts_work);
+
+ skb_queue_purge(&dp83640->rx_queue);
+ skb_queue_purge(&dp83640->tx_queue);
+
+ clock = dp83640_clock_get(dp83640->clock);
+
+ if (dp83640 == clock->chosen) {
+ ptp_clock_unregister(clock->ptp_clock);
+ clock->chosen = NULL;
+ } else {
+ list_for_each_safe(this, next, &clock->phylist) {
+ tmp = list_entry(this, struct dp83640_private, list);
+ if (tmp == dp83640) {
+ list_del_init(&tmp->list);
+ break;
+ }
+ }
+ }
+
+ dp83640_clock_put(clock);
+ kfree(dp83640);
+}
+
static struct phy_driver dp83640_driver = {
.phy_id = DP83640_PHY_ID,
.phy_id_mask = 0xfffffff0,
@@ -1537,10 +1550,6 @@ static struct phy_driver dp83640_driver = {
.config_init = dp83640_config_init,
.ack_interrupt = dp83640_ack_interrupt,
.config_intr = dp83640_config_intr,
- .ts_info = dp83640_ts_info,
- .hwtstamp = dp83640_hwtstamp,
- .rxtstamp = dp83640_rxtstamp,
- .txtstamp = dp83640_txtstamp,
};
static int __init dp83640_init(void)
diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index 9cd9dcee4eb2..adda0d0eab80 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -93,9 +93,11 @@
#define DP83867_STRAP_STS2_CLK_SKEW_NONE BIT(2)
/* PHY CTRL bits */
-#define DP83867_PHYCR_FIFO_DEPTH_SHIFT 14
+#define DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT 14
+#define DP83867_PHYCR_RX_FIFO_DEPTH_SHIFT 12
#define DP83867_PHYCR_FIFO_DEPTH_MAX 0x03
-#define DP83867_PHYCR_FIFO_DEPTH_MASK GENMASK(15, 14)
+#define DP83867_PHYCR_TX_FIFO_DEPTH_MASK GENMASK(15, 14)
+#define DP83867_PHYCR_RX_FIFO_DEPTH_MASK GENMASK(13, 12)
#define DP83867_PHYCR_RESERVED_MASK BIT(11)
/* RGMIIDCTL bits */
@@ -131,7 +133,8 @@ enum {
struct dp83867_private {
u32 rx_id_delay;
u32 tx_id_delay;
- u32 fifo_depth;
+ u32 tx_fifo_depth;
+ u32 rx_fifo_depth;
int io_impedance;
int port_mirroring;
bool rxctrl_strap_quirk;
@@ -408,18 +411,32 @@ static int dp83867_of_init(struct phy_device *phydev)
dp83867->port_mirroring = DP83867_PORT_MIRROING_DIS;
ret = of_property_read_u32(of_node, "ti,fifo-depth",
- &dp83867->fifo_depth);
+ &dp83867->tx_fifo_depth);
if (ret) {
- phydev_err(phydev,
- "ti,fifo-depth property is required\n");
- return ret;
+ ret = of_property_read_u32(of_node, "tx-fifo-depth",
+ &dp83867->tx_fifo_depth);
+ if (ret)
+ dp83867->tx_fifo_depth =
+ DP83867_PHYCR_FIFO_DEPTH_4_B_NIB;
}
- if (dp83867->fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) {
- phydev_err(phydev,
- "ti,fifo-depth value %u out of range\n",
- dp83867->fifo_depth);
+
+ if (dp83867->tx_fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) {
+ phydev_err(phydev, "tx-fifo-depth value %u out of range\n",
+ dp83867->tx_fifo_depth);
+ return -EINVAL;
+ }
+
+ ret = of_property_read_u32(of_node, "rx-fifo-depth",
+ &dp83867->rx_fifo_depth);
+ if (ret)
+ dp83867->rx_fifo_depth = DP83867_PHYCR_FIFO_DEPTH_4_B_NIB;
+
+ if (dp83867->rx_fifo_depth > DP83867_PHYCR_FIFO_DEPTH_MAX) {
+ phydev_err(phydev, "rx-fifo-depth value %u out of range\n",
+ dp83867->rx_fifo_depth);
return -EINVAL;
}
+
return 0;
}
#else
@@ -458,12 +475,31 @@ static int dp83867_config_init(struct phy_device *phydev)
phy_clear_bits_mmd(phydev, DP83867_DEVADDR, DP83867_CFG4,
BIT(7));
+ if (phy_interface_is_rgmii(phydev) ||
+ phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+ val = phy_read(phydev, MII_DP83867_PHYCTRL);
+ if (val < 0)
+ return val;
+
+ val &= ~DP83867_PHYCR_TX_FIFO_DEPTH_MASK;
+ val |= (dp83867->tx_fifo_depth <<
+ DP83867_PHYCR_TX_FIFO_DEPTH_SHIFT);
+
+ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+ val &= ~DP83867_PHYCR_RX_FIFO_DEPTH_MASK;
+ val |= (dp83867->rx_fifo_depth <<
+ DP83867_PHYCR_RX_FIFO_DEPTH_SHIFT);
+ }
+
+ ret = phy_write(phydev, MII_DP83867_PHYCTRL, val);
+ if (ret)
+ return ret;
+ }
+
if (phy_interface_is_rgmii(phydev)) {
val = phy_read(phydev, MII_DP83867_PHYCTRL);
if (val < 0)
return val;
- val &= ~DP83867_PHYCR_FIFO_DEPTH_MASK;
- val |= (dp83867->fifo_depth << DP83867_PHYCR_FIFO_DEPTH_SHIFT);
/* The code below checks if "port mirroring" N/A MODE4 has been
* enabled during power on bootstrap.
diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
index 93021904c5e4..7996a4aea8d2 100644
--- a/drivers/net/phy/dp83869.c
+++ b/drivers/net/phy/dp83869.c
@@ -334,7 +334,7 @@ static int dp83869_configure_mode(struct phy_device *phydev,
break;
default:
return -EINVAL;
- };
+ }
return ret;
}
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 7c5265fd2b94..4a3d34f40cb9 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -210,18 +210,15 @@ static struct gpio_desc *fixed_phy_get_gpiod(struct device_node *np)
* Linux device associated with it, we simply have obtain
* the GPIO descriptor from the device tree like this.
*/
- gpiod = gpiod_get_from_of_node(fixed_link_node, "link-gpios", 0,
- GPIOD_IN, "mdio");
- of_node_put(fixed_link_node);
- if (IS_ERR(gpiod)) {
- if (PTR_ERR(gpiod) == -EPROBE_DEFER)
- return gpiod;
-
+ gpiod = fwnode_gpiod_get_index(of_fwnode_handle(fixed_link_node),
+ "link", 0, GPIOD_IN, "mdio");
+ if (IS_ERR(gpiod) && PTR_ERR(gpiod) != -EPROBE_DEFER) {
if (PTR_ERR(gpiod) != -ENOENT)
pr_err("error getting GPIO for fixed link %pOF, proceed without\n",
fixed_link_node);
gpiod = NULL;
}
+ of_node_put(fixed_link_node);
return gpiod;
}
diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c
index 356bd6472f49..fec58ad69e02 100644
--- a/drivers/net/phy/lxt.c
+++ b/drivers/net/phy/lxt.c
@@ -190,27 +190,11 @@ static int lxt973a2_read_status(struct phy_device *phydev)
phydev->duplex = DUPLEX_FULL;
}
- if (phydev->duplex == DUPLEX_FULL) {
- phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0;
- phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0;
- }
+ phy_resolve_aneg_pause(phydev);
} else {
- int bmcr = phy_read(phydev, MII_BMCR);
-
- if (bmcr < 0)
- return bmcr;
-
- if (bmcr & BMCR_FULLDPLX)
- phydev->duplex = DUPLEX_FULL;
- else
- phydev->duplex = DUPLEX_HALF;
-
- if (bmcr & BMCR_SPEED1000)
- phydev->speed = SPEED_1000;
- else if (bmcr & BMCR_SPEED100)
- phydev->speed = SPEED_100;
- else
- phydev->speed = SPEED_10;
+ err = genphy_read_status_fixed(phydev);
+ if (err < 0)
+ return err;
phydev->pause = phydev->asym_pause = 0;
linkmode_zero(phydev->lp_advertising);
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index b1fbd1937328..28e33ece4ce1 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -162,19 +162,9 @@
#define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */
#define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */
-#define LPA_FIBER_1000HALF 0x40
-#define LPA_FIBER_1000FULL 0x20
-
#define LPA_PAUSE_FIBER 0x180
#define LPA_PAUSE_ASYM_FIBER 0x100
-#define ADVERTISE_FIBER_1000HALF 0x40
-#define ADVERTISE_FIBER_1000FULL 0x20
-
-#define ADVERTISE_PAUSE_FIBER 0x180
-#define ADVERTISE_PAUSE_ASYM_FIBER 0x100
-
-#define REGISTER_LINK_STATUS 0x400
#define NB_FIBER_STATS 1
MODULE_DESCRIPTION("Marvell PHY driver");
@@ -497,16 +487,15 @@ static inline u32 linkmode_adv_to_fiber_adv_t(unsigned long *advertise)
u32 result = 0;
if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, advertise))
- result |= ADVERTISE_FIBER_1000HALF;
+ result |= ADVERTISE_1000XHALF;
if (linkmode_test_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, advertise))
- result |= ADVERTISE_FIBER_1000FULL;
+ result |= ADVERTISE_1000XFULL;
if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT, advertise) &&
linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertise))
- result |= LPA_PAUSE_ASYM_FIBER;
+ result |= ADVERTISE_1000XPSE_ASYM;
else if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT, advertise))
- result |= (ADVERTISE_PAUSE_FIBER
- & (~ADVERTISE_PAUSE_ASYM_FIBER));
+ result |= ADVERTISE_1000XPAUSE;
return result;
}
@@ -524,7 +513,7 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev)
{
int changed = 0;
int err;
- int adv, oldadv;
+ u16 adv;
if (phydev->autoneg != AUTONEG_ENABLE)
return genphy_setup_forced(phydev);
@@ -533,44 +522,19 @@ static int marvell_config_aneg_fiber(struct phy_device *phydev)
linkmode_and(phydev->advertising, phydev->advertising,
phydev->supported);
- /* Setup fiber advertisement */
- adv = phy_read(phydev, MII_ADVERTISE);
- if (adv < 0)
- return adv;
-
- oldadv = adv;
- adv &= ~(ADVERTISE_FIBER_1000HALF | ADVERTISE_FIBER_1000FULL
- | LPA_PAUSE_FIBER);
- adv |= linkmode_adv_to_fiber_adv_t(phydev->advertising);
-
- if (adv != oldadv) {
- err = phy_write(phydev, MII_ADVERTISE, adv);
- if (err < 0)
- return err;
+ adv = linkmode_adv_to_fiber_adv_t(phydev->advertising);
+ /* Setup fiber advertisement */
+ err = phy_modify_changed(phydev, MII_ADVERTISE,
+ ADVERTISE_1000XHALF | ADVERTISE_1000XFULL |
+ ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM,
+ adv);
+ if (err < 0)
+ return err;
+ if (err > 0)
changed = 1;
- }
-
- if (changed == 0) {
- /* Advertisement hasn't changed, but maybe aneg was never on to
- * begin with? Or maybe phy was isolated?
- */
- int ctl = phy_read(phydev, MII_BMCR);
-
- if (ctl < 0)
- return ctl;
-
- if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
- changed = 1; /* do restart aneg */
- }
- /* Only restart aneg if we are advertising something different
- * than we were before.
- */
- if (changed > 0)
- changed = genphy_restart_aneg(phydev);
-
- return changed;
+ return genphy_check_and_restart_aneg(phydev, changed);
}
static int m88e1510_config_aneg(struct phy_device *phydev)
@@ -1302,93 +1266,29 @@ static int m88e6390_config_aneg(struct phy_device *phydev)
static void fiber_lpa_mod_linkmode_lpa_t(unsigned long *advertising, u32 lpa)
{
linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT,
- advertising, lpa & LPA_FIBER_1000HALF);
+ advertising, lpa & LPA_1000XHALF);
linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
- advertising, lpa & LPA_FIBER_1000FULL);
-}
-
-/**
- * marvell_update_link - update link status in real time in @phydev
- * @phydev: target phy_device struct
- *
- * Description: Update the value in phydev->link to reflect the
- * current link value.
- */
-static int marvell_update_link(struct phy_device *phydev, int fiber)
-{
- int status;
-
- /* Use the generic register for copper link, or specific
- * register for fiber case
- */
- if (fiber) {
- status = phy_read(phydev, MII_M1011_PHY_STATUS);
- if (status < 0)
- return status;
-
- if ((status & REGISTER_LINK_STATUS) == 0)
- phydev->link = 0;
- else
- phydev->link = 1;
- } else {
- return genphy_update_link(phydev);
- }
-
- return 0;
+ advertising, lpa & LPA_1000XFULL);
}
static int marvell_read_status_page_an(struct phy_device *phydev,
- int fiber)
+ int fiber, int status)
{
- int status;
int lpa;
- int lpagb;
-
- status = phy_read(phydev, MII_M1011_PHY_STATUS);
- if (status < 0)
- return status;
-
- lpa = phy_read(phydev, MII_LPA);
- if (lpa < 0)
- return lpa;
-
- lpagb = phy_read(phydev, MII_STAT1000);
- if (lpagb < 0)
- return lpagb;
-
- if (status & MII_M1011_PHY_STATUS_FULLDUPLEX)
- phydev->duplex = DUPLEX_FULL;
- else
- phydev->duplex = DUPLEX_HALF;
-
- status = status & MII_M1011_PHY_STATUS_SPD_MASK;
- phydev->pause = 0;
- phydev->asym_pause = 0;
-
- switch (status) {
- case MII_M1011_PHY_STATUS_1000:
- phydev->speed = SPEED_1000;
- break;
-
- case MII_M1011_PHY_STATUS_100:
- phydev->speed = SPEED_100;
- break;
-
- default:
- phydev->speed = SPEED_10;
- break;
- }
+ int err;
if (!fiber) {
- mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising, lpa);
- mii_stat1000_mod_linkmode_lpa_t(phydev->lp_advertising, lpagb);
+ err = genphy_read_lpa(phydev);
+ if (err < 0)
+ return err;
- if (phydev->duplex == DUPLEX_FULL) {
- phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0;
- phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0;
- }
+ phy_resolve_aneg_pause(phydev);
} else {
+ lpa = phy_read(phydev, MII_LPA);
+ if (lpa < 0)
+ return lpa;
+
/* The fiber link is only 1000M capable */
fiber_lpa_mod_linkmode_lpa_t(phydev->lp_advertising, lpa);
@@ -1405,31 +1305,25 @@ static int marvell_read_status_page_an(struct phy_device *phydev,
}
}
}
- return 0;
-}
-static int marvell_read_status_page_fixed(struct phy_device *phydev)
-{
- int bmcr = phy_read(phydev, MII_BMCR);
-
- if (bmcr < 0)
- return bmcr;
-
- if (bmcr & BMCR_FULLDPLX)
+ if (status & MII_M1011_PHY_STATUS_FULLDUPLEX)
phydev->duplex = DUPLEX_FULL;
else
phydev->duplex = DUPLEX_HALF;
- if (bmcr & BMCR_SPEED1000)
+ switch (status & MII_M1011_PHY_STATUS_SPD_MASK) {
+ case MII_M1011_PHY_STATUS_1000:
phydev->speed = SPEED_1000;
- else if (bmcr & BMCR_SPEED100)
+ break;
+
+ case MII_M1011_PHY_STATUS_100:
phydev->speed = SPEED_100;
- else
- phydev->speed = SPEED_10;
+ break;
- phydev->pause = 0;
- phydev->asym_pause = 0;
- linkmode_zero(phydev->lp_advertising);
+ default:
+ phydev->speed = SPEED_10;
+ break;
+ }
return 0;
}
@@ -1444,25 +1338,38 @@ static int marvell_read_status_page_fixed(struct phy_device *phydev)
*/
static int marvell_read_status_page(struct phy_device *phydev, int page)
{
+ int status;
int fiber;
int err;
- /* Detect and update the link, but return if there
- * was an error
+ status = phy_read(phydev, MII_M1011_PHY_STATUS);
+ if (status < 0)
+ return status;
+
+ /* Use the generic register for copper link status,
+ * and the PHY status register for fiber link status.
*/
+ if (page == MII_MARVELL_FIBER_PAGE) {
+ phydev->link = !!(status & MII_M1011_PHY_STATUS_LINK);
+ } else {
+ err = genphy_update_link(phydev);
+ if (err)
+ return err;
+ }
+
if (page == MII_MARVELL_FIBER_PAGE)
fiber = 1;
else
fiber = 0;
- err = marvell_update_link(phydev, fiber);
- if (err)
- return err;
+ linkmode_zero(phydev->lp_advertising);
+ phydev->pause = 0;
+ phydev->asym_pause = 0;
if (phydev->autoneg == AUTONEG_ENABLE)
- err = marvell_read_status_page_an(phydev, fiber);
+ err = marvell_read_status_page_an(phydev, fiber, status);
else
- err = marvell_read_status_page_fixed(phydev);
+ err = genphy_read_status_fixed(phydev);
return err;
}
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 1bf13017d288..64c9f3bba2cd 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -214,9 +214,9 @@ static int mv3310_sfp_insert(void *upstream, const struct sfp_eeprom_id *id)
phy_interface_t iface;
sfp_parse_support(phydev->sfp_bus, id, support);
- iface = sfp_select_interface(phydev->sfp_bus, id, support);
+ iface = sfp_select_interface(phydev->sfp_bus, support);
- if (iface != PHY_INTERFACE_MODE_10GKR) {
+ if (iface != PHY_INTERFACE_MODE_10GBASER) {
dev_err(&phydev->mdio.dev, "incompatible SFP module inserted\n");
return -EINVAL;
}
@@ -304,7 +304,7 @@ static int mv3310_config_init(struct phy_device *phydev)
phydev->interface != PHY_INTERFACE_MODE_2500BASEX &&
phydev->interface != PHY_INTERFACE_MODE_XAUI &&
phydev->interface != PHY_INTERFACE_MODE_RXAUI &&
- phydev->interface != PHY_INTERFACE_MODE_10GKR)
+ phydev->interface != PHY_INTERFACE_MODE_10GBASER)
return -ENODEV;
return 0;
@@ -386,16 +386,17 @@ static void mv3310_update_interface(struct phy_device *phydev)
{
if ((phydev->interface == PHY_INTERFACE_MODE_SGMII ||
phydev->interface == PHY_INTERFACE_MODE_2500BASEX ||
- phydev->interface == PHY_INTERFACE_MODE_10GKR) && phydev->link) {
+ phydev->interface == PHY_INTERFACE_MODE_10GBASER) &&
+ phydev->link) {
/* The PHY automatically switches its serdes interface (and
- * active PHYXS instance) between Cisco SGMII, 10GBase-KR and
+ * active PHYXS instance) between Cisco SGMII, 10GBase-R and
* 2500BaseX modes according to the speed. Florian suggests
* setting phydev->interface to communicate this to the MAC.
* Only do this if we are already in one of the above modes.
*/
switch (phydev->speed) {
case SPEED_10000:
- phydev->interface = PHY_INTERFACE_MODE_10GKR;
+ phydev->interface = PHY_INTERFACE_MODE_10GBASER;
break;
case SPEED_2500:
phydev->interface = PHY_INTERFACE_MODE_2500BASEX;
diff --git a/drivers/net/phy/mdio-i2c.c b/drivers/net/phy/mdio-i2c.c
index 0dce67672548..0746e2cc39ae 100644
--- a/drivers/net/phy/mdio-i2c.c
+++ b/drivers/net/phy/mdio-i2c.c
@@ -33,17 +33,24 @@ static int i2c_mii_read(struct mii_bus *bus, int phy_id, int reg)
{
struct i2c_adapter *i2c = bus->priv;
struct i2c_msg msgs[2];
- u8 data[2], dev_addr = reg;
+ u8 addr[3], data[2], *p;
int bus_addr, ret;
if (!i2c_mii_valid_phy_id(phy_id))
return 0xffff;
+ p = addr;
+ if (reg & MII_ADDR_C45) {
+ *p++ = 0x20 | ((reg >> 16) & 31);
+ *p++ = reg >> 8;
+ }
+ *p++ = reg;
+
bus_addr = i2c_mii_phy_addr(phy_id);
msgs[0].addr = bus_addr;
msgs[0].flags = 0;
- msgs[0].len = 1;
- msgs[0].buf = &dev_addr;
+ msgs[0].len = p - addr;
+ msgs[0].buf = addr;
msgs[1].addr = bus_addr;
msgs[1].flags = I2C_M_RD;
msgs[1].len = sizeof(data);
@@ -61,18 +68,23 @@ static int i2c_mii_write(struct mii_bus *bus, int phy_id, int reg, u16 val)
struct i2c_adapter *i2c = bus->priv;
struct i2c_msg msg;
int ret;
- u8 data[3];
+ u8 data[5], *p;
if (!i2c_mii_valid_phy_id(phy_id))
return 0;
- data[0] = reg;
- data[1] = val >> 8;
- data[2] = val;
+ p = data;
+ if (reg & MII_ADDR_C45) {
+ *p++ = (reg >> 16) & 31;
+ *p++ = reg >> 8;
+ }
+ *p++ = reg;
+ *p++ = val >> 8;
+ *p++ = val;
msg.addr = i2c_mii_phy_addr(phy_id);
msg.flags = 0;
- msg.len = 3;
+ msg.len = p - data;
msg.buf = data;
ret = i2c_transfer(i2c, &msg, 1);
diff --git a/drivers/net/phy/mii_timestamper.c b/drivers/net/phy/mii_timestamper.c
new file mode 100644
index 000000000000..2f12c5d901df
--- /dev/null
+++ b/drivers/net/phy/mii_timestamper.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Support for generic time stamping devices on MII buses.
+// Copyright (C) 2018 Richard Cochran <[email protected]>
+//
+
+#include <linux/mii_timestamper.h>
+
+static LIST_HEAD(mii_timestamping_devices);
+static DEFINE_MUTEX(tstamping_devices_lock);
+
+struct mii_timestamping_desc {
+ struct list_head list;
+ struct mii_timestamping_ctrl *ctrl;
+ struct device *device;
+};
+
+/**
+ * register_mii_tstamp_controller() - registers an MII time stamping device.
+ *
+ * @device: The device to be registered.
+ * @ctrl: Pointer to device's control interface.
+ *
+ * Returns zero on success or non-zero on failure.
+ */
+int register_mii_tstamp_controller(struct device *device,
+ struct mii_timestamping_ctrl *ctrl)
+{
+ struct mii_timestamping_desc *desc;
+
+ desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&desc->list);
+ desc->ctrl = ctrl;
+ desc->device = device;
+
+ mutex_lock(&tstamping_devices_lock);
+ list_add_tail(&mii_timestamping_devices, &desc->list);
+ mutex_unlock(&tstamping_devices_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(register_mii_tstamp_controller);
+
+/**
+ * unregister_mii_tstamp_controller() - unregisters an MII time stamping device.
+ *
+ * @device: A device previously passed to register_mii_tstamp_controller().
+ */
+void unregister_mii_tstamp_controller(struct device *device)
+{
+ struct mii_timestamping_desc *desc;
+ struct list_head *this, *next;
+
+ mutex_lock(&tstamping_devices_lock);
+ list_for_each_safe(this, next, &mii_timestamping_devices) {
+ desc = list_entry(this, struct mii_timestamping_desc, list);
+ if (desc->device == device) {
+ list_del_init(&desc->list);
+ kfree(desc);
+ break;
+ }
+ }
+ mutex_unlock(&tstamping_devices_lock);
+}
+EXPORT_SYMBOL(unregister_mii_tstamp_controller);
+
+/**
+ * register_mii_timestamper - Enables a given port of an MII time stamper.
+ *
+ * @node: The device tree node of the MII time stamp controller.
+ * @port: The index of the port to be enabled.
+ *
+ * Returns a valid interface on success or ERR_PTR otherwise.
+ */
+struct mii_timestamper *register_mii_timestamper(struct device_node *node,
+ unsigned int port)
+{
+ struct mii_timestamper *mii_ts = NULL;
+ struct mii_timestamping_desc *desc;
+ struct list_head *this;
+
+ mutex_lock(&tstamping_devices_lock);
+ list_for_each(this, &mii_timestamping_devices) {
+ desc = list_entry(this, struct mii_timestamping_desc, list);
+ if (desc->device->of_node == node) {
+ mii_ts = desc->ctrl->probe_channel(desc->device, port);
+ if (!IS_ERR(mii_ts)) {
+ mii_ts->device = desc->device;
+ get_device(desc->device);
+ }
+ break;
+ }
+ }
+ mutex_unlock(&tstamping_devices_lock);
+
+ return mii_ts ? mii_ts : ERR_PTR(-EPROBE_DEFER);
+}
+EXPORT_SYMBOL(register_mii_timestamper);
+
+/**
+ * unregister_mii_timestamper - Disables a given MII time stamper.
+ *
+ * @mii_ts: An interface obtained via register_mii_timestamper().
+ *
+ */
+void unregister_mii_timestamper(struct mii_timestamper *mii_ts)
+{
+ struct mii_timestamping_desc *desc;
+ struct list_head *this;
+
+ mutex_lock(&tstamping_devices_lock);
+ list_for_each(this, &mii_timestamping_devices) {
+ desc = list_entry(this, struct mii_timestamping_desc, list);
+ if (desc->device == mii_ts->device) {
+ desc->ctrl->release_channel(desc->device, mii_ts);
+ put_device(desc->device);
+ break;
+ }
+ }
+ mutex_unlock(&tstamping_devices_lock);
+}
+EXPORT_SYMBOL(unregister_mii_timestamper);
diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c
index d5f8f351d9ef..50214c081164 100644
--- a/drivers/net/phy/mscc.c
+++ b/drivers/net/phy/mscc.c
@@ -2404,7 +2404,6 @@ static struct phy_driver vsc85xx_driver[] = {
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
- .aneg_done = &genphy_aneg_done,
.read_status = &vsc85xx_read_status,
.ack_interrupt = &vsc85xx_ack_interrupt,
.config_intr = &vsc85xx_config_intr,
@@ -2429,7 +2428,6 @@ static struct phy_driver vsc85xx_driver[] = {
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
- .aneg_done = &genphy_aneg_done,
.read_status = &vsc85xx_read_status,
.ack_interrupt = &vsc85xx_ack_interrupt,
.config_intr = &vsc85xx_config_intr,
@@ -2454,7 +2452,6 @@ static struct phy_driver vsc85xx_driver[] = {
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
- .aneg_done = &genphy_aneg_done,
.read_status = &vsc85xx_read_status,
.ack_interrupt = &vsc85xx_ack_interrupt,
.config_intr = &vsc85xx_config_intr,
@@ -2479,7 +2476,6 @@ static struct phy_driver vsc85xx_driver[] = {
.soft_reset = &genphy_soft_reset,
.config_init = &vsc85xx_config_init,
.config_aneg = &vsc85xx_config_aneg,
- .aneg_done = &genphy_aneg_done,
.read_status = &vsc85xx_read_status,
.ack_interrupt = &vsc85xx_ack_interrupt,
.config_intr = &vsc85xx_config_intr,
@@ -2504,7 +2500,6 @@ static struct phy_driver vsc85xx_driver[] = {
.soft_reset = &genphy_soft_reset,
.config_init = &vsc8584_config_init,
.config_aneg = &vsc85xx_config_aneg,
- .aneg_done = &genphy_aneg_done,
.read_status = &vsc85xx_read_status,
.ack_interrupt = &vsc85xx_ack_interrupt,
.config_intr = &vsc85xx_config_intr,
@@ -2530,7 +2525,6 @@ static struct phy_driver vsc85xx_driver[] = {
.soft_reset = &genphy_soft_reset,
.config_init = &vsc8584_config_init,
.config_aneg = &vsc85xx_config_aneg,
- .aneg_done = &genphy_aneg_done,
.read_status = &vsc85xx_read_status,
.ack_interrupt = &vsc85xx_ack_interrupt,
.config_intr = &vsc85xx_config_intr,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 80be4d691e5b..541ed01496bf 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -422,8 +422,8 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
return 0;
case SIOCSHWTSTAMP:
- if (phydev->drv && phydev->drv->hwtstamp)
- return phydev->drv->hwtstamp(phydev, ifr);
+ if (phydev->mii_ts && phydev->mii_ts->hwtstamp)
+ return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr);
/* fall through */
default:
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0887ed2bb050..e5dc9f87f495 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -553,7 +553,7 @@ static const struct device_type mdio_bus_phy_type = {
.pm = MDIO_BUS_PHY_PM_OPS,
};
-static int phy_request_driver_module(struct phy_device *dev, int phy_id)
+static int phy_request_driver_module(struct phy_device *dev, u32 phy_id)
{
int ret;
@@ -565,15 +565,15 @@ static int phy_request_driver_module(struct phy_device *dev, int phy_id)
* then modprobe isn't available.
*/
if (IS_ENABLED(CONFIG_MODULES) && ret < 0 && ret != -ENOENT) {
- phydev_err(dev, "error %d loading PHY driver module for ID 0x%08x\n",
- ret, phy_id);
+ phydev_err(dev, "error %d loading PHY driver module for ID 0x%08lx\n",
+ ret, (unsigned long)phy_id);
return ret;
}
return 0;
}
-struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
+struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
bool is_c45,
struct phy_c45_device_ids *c45_ids)
{
@@ -881,6 +881,9 @@ EXPORT_SYMBOL(phy_device_register);
*/
void phy_device_remove(struct phy_device *phydev)
{
+ if (phydev->mii_ts)
+ unregister_mii_timestamper(phydev->mii_ts);
+
device_del(&phydev->mdio.dev);
/* Assert the reset signal */
@@ -919,6 +922,8 @@ static void phy_link_change(struct phy_device *phydev, bool up, bool do_carrier)
netif_carrier_off(netdev);
}
phydev->adjust_link(netdev);
+ if (phydev->mii_ts && phydev->mii_ts->link_state)
+ phydev->mii_ts->link_state(phydev->mii_ts, phydev);
}
/**
@@ -1771,6 +1776,36 @@ int genphy_restart_aneg(struct phy_device *phydev)
EXPORT_SYMBOL(genphy_restart_aneg);
/**
+ * genphy_check_and_restart_aneg - Enable and restart auto-negotiation
+ * @phydev: target phy_device struct
+ * @restart: whether aneg restart is requested
+ *
+ * Check, and restart auto-negotiation if needed.
+ */
+int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart)
+{
+ int ret = 0;
+
+ if (!restart) {
+ /* Advertisement hasn't changed, but maybe aneg was never on to
+ * begin with? Or maybe phy was isolated?
+ */
+ ret = phy_read(phydev, MII_BMCR);
+ if (ret < 0)
+ return ret;
+
+ if (!(ret & BMCR_ANENABLE) || (ret & BMCR_ISOLATE))
+ restart = true;
+ }
+
+ if (restart)
+ ret = genphy_restart_aneg(phydev);
+
+ return ret;
+}
+EXPORT_SYMBOL(genphy_check_and_restart_aneg);
+
+/**
* __genphy_config_aneg - restart auto-negotiation or write BMCR
* @phydev: target phy_device struct
* @changed: whether autoneg is requested
@@ -1795,23 +1830,7 @@ int __genphy_config_aneg(struct phy_device *phydev, bool changed)
else if (err)
changed = true;
- if (!changed) {
- /* Advertisement hasn't changed, but maybe aneg was never on to
- * begin with? Or maybe phy was isolated?
- */
- int ctl = phy_read(phydev, MII_BMCR);
-
- if (ctl < 0)
- return ctl;
-
- if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE))
- changed = true; /* do restart aneg */
- }
-
- /* Only restart aneg if we are advertising something different
- * than we were before.
- */
- return changed ? genphy_restart_aneg(phydev) : 0;
+ return genphy_check_and_restart_aneg(phydev, changed);
}
EXPORT_SYMBOL(__genphy_config_aneg);
@@ -1979,6 +1998,36 @@ int genphy_read_lpa(struct phy_device *phydev)
EXPORT_SYMBOL(genphy_read_lpa);
/**
+ * genphy_read_status_fixed - read the link parameters for !aneg mode
+ * @phydev: target phy_device struct
+ *
+ * Read the current duplex and speed state for a PHY operating with
+ * autonegotiation disabled.
+ */
+int genphy_read_status_fixed(struct phy_device *phydev)
+{
+ int bmcr = phy_read(phydev, MII_BMCR);
+
+ if (bmcr < 0)
+ return bmcr;
+
+ if (bmcr & BMCR_FULLDPLX)
+ phydev->duplex = DUPLEX_FULL;
+ else
+ phydev->duplex = DUPLEX_HALF;
+
+ if (bmcr & BMCR_SPEED1000)
+ phydev->speed = SPEED_1000;
+ else if (bmcr & BMCR_SPEED100)
+ phydev->speed = SPEED_100;
+ else
+ phydev->speed = SPEED_10;
+
+ return 0;
+}
+EXPORT_SYMBOL(genphy_read_status_fixed);
+
+/**
* genphy_read_status - check the link status and update current link state
* @phydev: target phy_device struct
*
@@ -2012,22 +2061,9 @@ int genphy_read_status(struct phy_device *phydev)
if (phydev->autoneg == AUTONEG_ENABLE && phydev->autoneg_complete) {
phy_resolve_aneg_linkmode(phydev);
} else if (phydev->autoneg == AUTONEG_DISABLE) {
- int bmcr = phy_read(phydev, MII_BMCR);
-
- if (bmcr < 0)
- return bmcr;
-
- if (bmcr & BMCR_FULLDPLX)
- phydev->duplex = DUPLEX_FULL;
- else
- phydev->duplex = DUPLEX_HALF;
-
- if (bmcr & BMCR_SPEED1000)
- phydev->speed = SPEED_1000;
- else if (bmcr & BMCR_SPEED100)
- phydev->speed = SPEED_100;
- else
- phydev->speed = SPEED_10;
+ err = genphy_read_status_fixed(phydev);
+ if (err < 0)
+ return err;
}
return 0;
@@ -2575,7 +2611,6 @@ static struct phy_driver genphy_driver = {
.name = "Generic PHY",
.soft_reset = genphy_no_soft_reset,
.get_features = genphy_read_abilities,
- .aneg_done = genphy_aneg_done,
.suspend = genphy_suspend,
.resume = genphy_resume,
.set_loopback = genphy_loopback,
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 9a616d6bc4eb..8d786c99f97a 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -48,7 +48,8 @@ struct phylink {
unsigned long phylink_disable_state; /* bitmask of disables */
struct phy_device *phydev;
phy_interface_t link_interface; /* PHY_INTERFACE_xxx */
- u8 link_an_mode; /* MLO_AN_xxx */
+ u8 cfg_link_an_mode; /* MLO_AN_xxx */
+ u8 cur_link_an_mode;
u8 link_port; /* The current non-phy ethtool port */
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
@@ -71,6 +72,9 @@ struct phylink {
bool mac_link_dropped;
struct sfp_bus *sfp_bus;
+ bool sfp_may_have_phy;
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(sfp_support);
+ u8 sfp_port;
};
#define phylink_printk(level, pl, fmt, ...) \
@@ -182,8 +186,8 @@ static int phylink_parse_fixedlink(struct phylink *pl,
pl->link_config.pause |= MLO_PAUSE_ASYM;
if (ret == 0) {
- desc = fwnode_get_named_gpiod(fixed_node, "link-gpios",
- 0, GPIOD_IN, "?");
+ desc = fwnode_gpiod_get_index(fixed_node, "link", 0,
+ GPIOD_IN, "?");
if (!IS_ERR(desc))
pl->link_gpio = desc;
@@ -256,12 +260,12 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
dn = fwnode_get_named_child_node(fwnode, "fixed-link");
if (dn || fwnode_property_present(fwnode, "fixed-link"))
- pl->link_an_mode = MLO_AN_FIXED;
+ pl->cfg_link_an_mode = MLO_AN_FIXED;
fwnode_handle_put(dn);
if (fwnode_property_read_string(fwnode, "managed", &managed) == 0 &&
strcmp(managed, "in-band-status") == 0) {
- if (pl->link_an_mode == MLO_AN_FIXED) {
+ if (pl->cfg_link_an_mode == MLO_AN_FIXED) {
phylink_err(pl,
"can't use both fixed-link and in-band-status\n");
return -EINVAL;
@@ -273,10 +277,11 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
phylink_set(pl->supported, Asym_Pause);
phylink_set(pl->supported, Pause);
pl->link_config.an_enabled = true;
- pl->link_an_mode = MLO_AN_INBAND;
+ pl->cfg_link_an_mode = MLO_AN_INBAND;
switch (pl->link_config.interface) {
case PHY_INTERFACE_MODE_SGMII:
+ case PHY_INTERFACE_MODE_QSGMII:
phylink_set(pl->supported, 10baseT_Half);
phylink_set(pl->supported, 10baseT_Full);
phylink_set(pl->supported, 100baseT_Half);
@@ -294,6 +299,7 @@ static int phylink_parse_mode(struct phylink *pl, struct fwnode_handle *fwnode)
break;
case PHY_INTERFACE_MODE_10GKR:
+ case PHY_INTERFACE_MODE_10GBASER:
phylink_set(pl->supported, 10baseT_Half);
phylink_set(pl->supported, 10baseT_Full);
phylink_set(pl->supported, 100baseT_Half);
@@ -333,14 +339,14 @@ static void phylink_mac_config(struct phylink *pl,
{
phylink_dbg(pl,
"%s: mode=%s/%s/%s/%s adv=%*pb pause=%02x link=%u an=%u\n",
- __func__, phylink_an_mode_str(pl->link_an_mode),
+ __func__, phylink_an_mode_str(pl->cur_link_an_mode),
phy_modes(state->interface),
phy_speed_to_str(state->speed),
phy_duplex_to_str(state->duplex),
__ETHTOOL_LINK_MODE_MASK_NBITS, state->advertising,
state->pause, state->link, state->an_enabled);
- pl->ops->mac_config(pl->config, pl->link_an_mode, state);
+ pl->ops->mac_config(pl->config, pl->cur_link_an_mode, state);
}
static void phylink_mac_config_up(struct phylink *pl,
@@ -441,9 +447,8 @@ static void phylink_mac_link_up(struct phylink *pl,
struct net_device *ndev = pl->netdev;
pl->cur_interface = link_state.interface;
- pl->ops->mac_link_up(pl->config, pl->link_an_mode,
- pl->phy_state.interface,
- pl->phydev);
+ pl->ops->mac_link_up(pl->config, pl->cur_link_an_mode,
+ pl->cur_interface, pl->phydev);
if (ndev)
netif_carrier_on(ndev);
@@ -461,7 +466,7 @@ static void phylink_mac_link_down(struct phylink *pl)
if (ndev)
netif_carrier_off(ndev);
- pl->ops->mac_link_down(pl->config, pl->link_an_mode,
+ pl->ops->mac_link_down(pl->config, pl->cur_link_an_mode,
pl->cur_interface);
phylink_info(pl, "Link is Down\n");
}
@@ -480,7 +485,7 @@ static void phylink_resolve(struct work_struct *w)
} else if (pl->mac_link_dropped) {
link_state.link = false;
} else {
- switch (pl->link_an_mode) {
+ switch (pl->cur_link_an_mode) {
case MLO_AN_PHY:
link_state = pl->phy_state;
phylink_resolve_flow(pl, &link_state);
@@ -567,6 +572,9 @@ static int phylink_register_sfp(struct phylink *pl,
struct sfp_bus *bus;
int ret;
+ if (!fwnode)
+ return 0;
+
bus = sfp_bus_find_fwnode(fwnode);
if (IS_ERR(bus)) {
ret = PTR_ERR(bus);
@@ -648,7 +656,7 @@ struct phylink *phylink_create(struct phylink_config *config,
return ERR_PTR(ret);
}
- if (pl->link_an_mode == MLO_AN_FIXED) {
+ if (pl->cfg_link_an_mode == MLO_AN_FIXED) {
ret = phylink_parse_fixedlink(pl, fwnode);
if (ret < 0) {
kfree(pl);
@@ -656,6 +664,8 @@ struct phylink *phylink_create(struct phylink_config *config,
}
}
+ pl->cur_link_an_mode = pl->cfg_link_an_mode;
+
ret = phylink_register_sfp(pl, fwnode);
if (ret < 0) {
kfree(pl);
@@ -711,7 +721,8 @@ static void phylink_phy_change(struct phy_device *phydev, bool up,
phy_duplex_to_str(phydev->duplex));
}
-static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
+static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy,
+ phy_interface_t interface)
{
struct phylink_link_state config;
__ETHTOOL_DECLARE_LINK_MODE_MASK(supported);
@@ -729,7 +740,19 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
memset(&config, 0, sizeof(config));
linkmode_copy(supported, phy->supported);
linkmode_copy(config.advertising, phy->advertising);
- config.interface = pl->link_config.interface;
+
+ /* Clause 45 PHYs switch their Serdes lane between several different
+ * modes, normally 10GBASE-R, SGMII. Some use 2500BASE-X for 2.5G
+ * speeds. We really need to know which interface modes the PHY and
+ * MAC supports to properly work out which linkmodes can be supported.
+ */
+ if (phy->is_c45 &&
+ interface != PHY_INTERFACE_MODE_RXAUI &&
+ interface != PHY_INTERFACE_MODE_XAUI &&
+ interface != PHY_INTERFACE_MODE_USXGMII)
+ config.interface = PHY_INTERFACE_MODE_NA;
+ else
+ config.interface = interface;
ret = phylink_validate(pl, supported, &config);
if (ret)
@@ -745,6 +768,7 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
mutex_lock(&phy->lock);
mutex_lock(&pl->state_mutex);
pl->phydev = phy;
+ pl->phy_state.interface = interface;
linkmode_copy(pl->supported, supported);
linkmode_copy(pl->link_config.advertising, config.advertising);
@@ -764,28 +788,18 @@ static int phylink_bringup_phy(struct phylink *pl, struct phy_device *phy)
return 0;
}
-static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy,
- phy_interface_t interface)
+static int phylink_attach_phy(struct phylink *pl, struct phy_device *phy,
+ phy_interface_t interface)
{
- int ret;
-
- if (WARN_ON(pl->link_an_mode == MLO_AN_FIXED ||
- (pl->link_an_mode == MLO_AN_INBAND &&
+ if (WARN_ON(pl->cfg_link_an_mode == MLO_AN_FIXED ||
+ (pl->cfg_link_an_mode == MLO_AN_INBAND &&
phy_interface_mode_is_8023z(interface))))
return -EINVAL;
if (pl->phydev)
return -EBUSY;
- ret = phy_attach_direct(pl->netdev, phy, 0, interface);
- if (ret)
- return ret;
-
- ret = phylink_bringup_phy(pl, phy);
- if (ret)
- phy_detach(phy);
-
- return ret;
+ return phy_attach_direct(pl->netdev, phy, 0, interface);
}
/**
@@ -805,13 +819,23 @@ static int __phylink_connect_phy(struct phylink *pl, struct phy_device *phy,
*/
int phylink_connect_phy(struct phylink *pl, struct phy_device *phy)
{
+ int ret;
+
/* Use PHY device/driver interface */
if (pl->link_interface == PHY_INTERFACE_MODE_NA) {
pl->link_interface = phy->interface;
pl->link_config.interface = pl->link_interface;
}
- return __phylink_connect_phy(pl, phy, pl->link_interface);
+ ret = phylink_attach_phy(pl, phy, pl->link_interface);
+ if (ret < 0)
+ return ret;
+
+ ret = phylink_bringup_phy(pl, phy, pl->link_config.interface);
+ if (ret)
+ phy_detach(phy);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(phylink_connect_phy);
@@ -835,8 +859,8 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn,
int ret;
/* Fixed links and 802.3z are handled without needing a PHY */
- if (pl->link_an_mode == MLO_AN_FIXED ||
- (pl->link_an_mode == MLO_AN_INBAND &&
+ if (pl->cfg_link_an_mode == MLO_AN_FIXED ||
+ (pl->cfg_link_an_mode == MLO_AN_INBAND &&
phy_interface_mode_is_8023z(pl->link_interface)))
return 0;
@@ -847,20 +871,23 @@ int phylink_of_phy_connect(struct phylink *pl, struct device_node *dn,
phy_node = of_parse_phandle(dn, "phy-device", 0);
if (!phy_node) {
- if (pl->link_an_mode == MLO_AN_PHY)
+ if (pl->cfg_link_an_mode == MLO_AN_PHY)
return -ENODEV;
return 0;
}
- phy_dev = of_phy_attach(pl->netdev, phy_node, flags,
- pl->link_interface);
+ phy_dev = of_phy_find_device(phy_node);
/* We're done with the phy_node handle */
of_node_put(phy_node);
-
if (!phy_dev)
return -ENODEV;
- ret = phylink_bringup_phy(pl, phy_dev);
+ ret = phy_attach_direct(pl->netdev, phy_dev, flags,
+ pl->link_interface);
+ if (ret)
+ return ret;
+
+ ret = phylink_bringup_phy(pl, phy_dev, pl->link_config.interface);
if (ret)
phy_detach(phy_dev);
@@ -910,7 +937,7 @@ int phylink_fixed_state_cb(struct phylink *pl,
/* It does not make sense to let the link be overriden unless we use
* MLO_AN_FIXED
*/
- if (pl->link_an_mode != MLO_AN_FIXED)
+ if (pl->cfg_link_an_mode != MLO_AN_FIXED)
return -EINVAL;
mutex_lock(&pl->state_mutex);
@@ -960,7 +987,7 @@ void phylink_start(struct phylink *pl)
ASSERT_RTNL();
phylink_info(pl, "configuring for %s/%s link mode\n",
- phylink_an_mode_str(pl->link_an_mode),
+ phylink_an_mode_str(pl->cur_link_an_mode),
phy_modes(pl->link_config.interface));
/* Always set the carrier off */
@@ -983,7 +1010,7 @@ void phylink_start(struct phylink *pl)
clear_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state);
phylink_run_resolve(pl);
- if (pl->link_an_mode == MLO_AN_FIXED && pl->link_gpio) {
+ if (pl->cfg_link_an_mode == MLO_AN_FIXED && pl->link_gpio) {
int irq = gpiod_to_irq(pl->link_gpio);
if (irq > 0) {
@@ -998,7 +1025,8 @@ void phylink_start(struct phylink *pl)
if (irq <= 0)
mod_timer(&pl->link_poll, jiffies + HZ);
}
- if (pl->link_an_mode == MLO_AN_FIXED && pl->get_fixed_state)
+ if ((pl->cfg_link_an_mode == MLO_AN_FIXED && pl->get_fixed_state) ||
+ pl->config->pcs_poll)
mod_timer(&pl->link_poll, jiffies + HZ);
if (pl->phydev)
phy_start(pl->phydev);
@@ -1125,7 +1153,7 @@ int phylink_ethtool_ksettings_get(struct phylink *pl,
linkmode_copy(kset->link_modes.supported, pl->supported);
- switch (pl->link_an_mode) {
+ switch (pl->cur_link_an_mode) {
case MLO_AN_FIXED:
/* We are using fixed settings. Report these as the
* current link settings - and note that these also
@@ -1197,7 +1225,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
/* If we have a fixed link (as specified by firmware), refuse
* to change link parameters.
*/
- if (pl->link_an_mode == MLO_AN_FIXED &&
+ if (pl->cur_link_an_mode == MLO_AN_FIXED &&
(s->speed != pl->link_config.speed ||
s->duplex != pl->link_config.duplex))
return -EINVAL;
@@ -1209,7 +1237,7 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
__clear_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
} else {
/* If we have a fixed link, refuse to enable autonegotiation */
- if (pl->link_an_mode == MLO_AN_FIXED)
+ if (pl->cur_link_an_mode == MLO_AN_FIXED)
return -EINVAL;
config.speed = SPEED_UNKNOWN;
@@ -1219,44 +1247,66 @@ int phylink_ethtool_ksettings_set(struct phylink *pl,
__set_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, config.advertising);
}
- if (phylink_validate(pl, support, &config))
- return -EINVAL;
-
- /* If autonegotiation is enabled, we must have an advertisement */
- if (config.an_enabled && phylink_is_empty_linkmode(config.advertising))
- return -EINVAL;
-
- our_kset = *kset;
- linkmode_copy(our_kset.link_modes.advertising, config.advertising);
- our_kset.base.speed = config.speed;
- our_kset.base.duplex = config.duplex;
-
- /* If we have a PHY, configure the phy */
if (pl->phydev) {
+ /* If we have a PHY, we process the kset change via phylib.
+ * phylib will call our link state function if the PHY
+ * parameters have changed, which will trigger a resolve
+ * and update the MAC configuration.
+ */
+ our_kset = *kset;
+ linkmode_copy(our_kset.link_modes.advertising,
+ config.advertising);
+ our_kset.base.speed = config.speed;
+ our_kset.base.duplex = config.duplex;
+
ret = phy_ethtool_ksettings_set(pl->phydev, &our_kset);
if (ret)
return ret;
- }
- mutex_lock(&pl->state_mutex);
- /* Configure the MAC to match the new settings */
- linkmode_copy(pl->link_config.advertising, our_kset.link_modes.advertising);
- pl->link_config.interface = config.interface;
- pl->link_config.speed = our_kset.base.speed;
- pl->link_config.duplex = our_kset.base.duplex;
- pl->link_config.an_enabled = our_kset.base.autoneg != AUTONEG_DISABLE;
+ mutex_lock(&pl->state_mutex);
+ /* Save the new configuration */
+ linkmode_copy(pl->link_config.advertising,
+ our_kset.link_modes.advertising);
+ pl->link_config.interface = config.interface;
+ pl->link_config.speed = our_kset.base.speed;
+ pl->link_config.duplex = our_kset.base.duplex;
+ pl->link_config.an_enabled = our_kset.base.autoneg !=
+ AUTONEG_DISABLE;
+ mutex_unlock(&pl->state_mutex);
+ } else {
+ /* For a fixed link, this isn't able to change any parameters,
+ * which just leaves inband mode.
+ */
+ if (phylink_validate(pl, support, &config))
+ return -EINVAL;
- /* If we have a PHY, phylib will call our link state function if the
- * mode has changed, which will trigger a resolve and update the MAC
- * configuration. For a fixed link, this isn't able to change any
- * parameters, which just leaves inband mode.
- */
- if (pl->link_an_mode == MLO_AN_INBAND &&
- !test_bit(PHYLINK_DISABLE_STOPPED, &pl->phylink_disable_state)) {
- phylink_mac_config(pl, &pl->link_config);
- phylink_mac_an_restart(pl);
+ /* If autonegotiation is enabled, we must have an advertisement */
+ if (config.an_enabled &&
+ phylink_is_empty_linkmode(config.advertising))
+ return -EINVAL;
+
+ mutex_lock(&pl->state_mutex);
+ linkmode_copy(pl->link_config.advertising, config.advertising);
+ pl->link_config.interface = config.interface;
+ pl->link_config.speed = config.speed;
+ pl->link_config.duplex = config.duplex;
+ pl->link_config.an_enabled = kset->base.autoneg !=
+ AUTONEG_DISABLE;
+
+ if (pl->cur_link_an_mode == MLO_AN_INBAND &&
+ !test_bit(PHYLINK_DISABLE_STOPPED,
+ &pl->phylink_disable_state)) {
+ /* If in 802.3z mode, this updates the advertisement.
+ *
+ * If we are in SGMII mode without a PHY, there is no
+ * advertisement; the only thing we have is the pause
+ * modes which can only come from a PHY.
+ */
+ phylink_mac_config(pl, &pl->link_config);
+ phylink_mac_an_restart(pl);
+ }
+ mutex_unlock(&pl->state_mutex);
}
- mutex_unlock(&pl->state_mutex);
return 0;
}
@@ -1341,7 +1391,7 @@ int phylink_ethtool_set_pauseparam(struct phylink *pl,
pause->tx_pause);
} else if (!test_bit(PHYLINK_DISABLE_STOPPED,
&pl->phylink_disable_state)) {
- switch (pl->link_an_mode) {
+ switch (pl->cur_link_an_mode) {
case MLO_AN_FIXED:
/* Should we allow fixed links to change against the config? */
phylink_resolve_flow(pl, config);
@@ -1548,7 +1598,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id,
struct phylink_link_state state;
int val = 0xffff;
- switch (pl->link_an_mode) {
+ switch (pl->cur_link_an_mode) {
case MLO_AN_FIXED:
if (phy_id == 0) {
phylink_get_fixed_state(pl, &state);
@@ -1573,7 +1623,7 @@ static int phylink_mii_read(struct phylink *pl, unsigned int phy_id,
static int phylink_mii_write(struct phylink *pl, unsigned int phy_id,
unsigned int reg, unsigned int val)
{
- switch (pl->link_an_mode) {
+ switch (pl->cur_link_an_mode) {
case MLO_AN_FIXED:
break;
@@ -1679,25 +1729,21 @@ static void phylink_sfp_detach(void *upstream, struct sfp_bus *bus)
pl->netdev->sfp_bus = NULL;
}
-static int phylink_sfp_module_insert(void *upstream,
- const struct sfp_eeprom_id *id)
+static int phylink_sfp_config(struct phylink *pl, u8 mode,
+ const unsigned long *supported,
+ const unsigned long *advertising)
{
- struct phylink *pl = upstream;
- __ETHTOOL_DECLARE_LINK_MODE_MASK(support) = { 0, };
__ETHTOOL_DECLARE_LINK_MODE_MASK(support1);
+ __ETHTOOL_DECLARE_LINK_MODE_MASK(support);
struct phylink_link_state config;
phy_interface_t iface;
- int ret = 0;
bool changed;
- u8 port;
-
- ASSERT_RTNL();
+ int ret;
- sfp_parse_support(pl->sfp_bus, id, support);
- port = sfp_parse_port(pl->sfp_bus, id, support);
+ linkmode_copy(support, supported);
memset(&config, 0, sizeof(config));
- linkmode_copy(config.advertising, support);
+ linkmode_copy(config.advertising, advertising);
config.interface = PHY_INTERFACE_MODE_NA;
config.speed = SPEED_UNKNOWN;
config.duplex = DUPLEX_UNKNOWN;
@@ -1712,9 +1758,7 @@ static int phylink_sfp_module_insert(void *upstream,
return ret;
}
- linkmode_copy(support1, support);
-
- iface = sfp_select_interface(pl->sfp_bus, id, config.advertising);
+ iface = sfp_select_interface(pl->sfp_bus, config.advertising);
if (iface == PHY_INTERFACE_MODE_NA) {
phylink_err(pl,
"selection of interface failed, advertisement %*pb\n",
@@ -1723,18 +1767,18 @@ static int phylink_sfp_module_insert(void *upstream,
}
config.interface = iface;
+ linkmode_copy(support1, support);
ret = phylink_validate(pl, support1, &config);
if (ret) {
phylink_err(pl, "validation of %s/%s with support %*pb failed: %d\n",
- phylink_an_mode_str(MLO_AN_INBAND),
+ phylink_an_mode_str(mode),
phy_modes(config.interface),
__ETHTOOL_LINK_MODE_MASK_NBITS, support, ret);
return ret;
}
phylink_dbg(pl, "requesting link mode %s/%s with support %*pb\n",
- phylink_an_mode_str(MLO_AN_INBAND),
- phy_modes(config.interface),
+ phylink_an_mode_str(mode), phy_modes(config.interface),
__ETHTOOL_LINK_MODE_MASK_NBITS, support);
if (phy_interface_mode_is_8023z(iface) && pl->phydev)
@@ -1746,19 +1790,19 @@ static int phylink_sfp_module_insert(void *upstream,
linkmode_copy(pl->link_config.advertising, config.advertising);
}
- if (pl->link_an_mode != MLO_AN_INBAND ||
+ if (pl->cur_link_an_mode != mode ||
pl->link_config.interface != config.interface) {
pl->link_config.interface = config.interface;
- pl->link_an_mode = MLO_AN_INBAND;
+ pl->cur_link_an_mode = mode;
changed = true;
phylink_info(pl, "switched to %s/%s link mode\n",
- phylink_an_mode_str(MLO_AN_INBAND),
+ phylink_an_mode_str(mode),
phy_modes(config.interface));
}
- pl->link_port = port;
+ pl->link_port = pl->sfp_port;
if (changed && !test_bit(PHYLINK_DISABLE_STOPPED,
&pl->phylink_disable_state))
@@ -1767,6 +1811,55 @@ static int phylink_sfp_module_insert(void *upstream,
return ret;
}
+static int phylink_sfp_module_insert(void *upstream,
+ const struct sfp_eeprom_id *id)
+{
+ struct phylink *pl = upstream;
+ unsigned long *support = pl->sfp_support;
+
+ ASSERT_RTNL();
+
+ linkmode_zero(support);
+ sfp_parse_support(pl->sfp_bus, id, support);
+ pl->sfp_port = sfp_parse_port(pl->sfp_bus, id, support);
+
+ /* If this module may have a PHY connecting later, defer until later */
+ pl->sfp_may_have_phy = sfp_may_have_phy(pl->sfp_bus, id);
+ if (pl->sfp_may_have_phy)
+ return 0;
+
+ return phylink_sfp_config(pl, MLO_AN_INBAND, support, support);
+}
+
+static int phylink_sfp_module_start(void *upstream)
+{
+ struct phylink *pl = upstream;
+
+ /* If this SFP module has a PHY, start the PHY now. */
+ if (pl->phydev) {
+ phy_start(pl->phydev);
+ return 0;
+ }
+
+ /* If the module may have a PHY but we didn't detect one we
+ * need to configure the MAC here.
+ */
+ if (!pl->sfp_may_have_phy)
+ return 0;
+
+ return phylink_sfp_config(pl, MLO_AN_INBAND,
+ pl->sfp_support, pl->sfp_support);
+}
+
+static void phylink_sfp_module_stop(void *upstream)
+{
+ struct phylink *pl = upstream;
+
+ /* If this SFP module has a PHY, stop it. */
+ if (pl->phydev)
+ phy_stop(pl->phydev);
+}
+
static void phylink_sfp_link_down(void *upstream)
{
struct phylink *pl = upstream;
@@ -1786,11 +1879,51 @@ static void phylink_sfp_link_up(void *upstream)
phylink_run_resolve(pl);
}
+/* The Broadcom BCM84881 in the Methode DM7052 is unable to provide a SGMII
+ * or 802.3z control word, so inband will not work.
+ */
+static bool phylink_phy_no_inband(struct phy_device *phy)
+{
+ return phy->is_c45 &&
+ (phy->c45_ids.device_ids[1] & 0xfffffff0) == 0xae025150;
+}
+
static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy)
{
struct phylink *pl = upstream;
+ phy_interface_t interface;
+ u8 mode;
+ int ret;
- return __phylink_connect_phy(upstream, phy, pl->link_config.interface);
+ /*
+ * This is the new way of dealing with flow control for PHYs,
+ * as described by Timur Tabi in commit 529ed1275263 ("net: phy:
+ * phy drivers should not set SUPPORTED_[Asym_]Pause") except
+ * using our validate call to the MAC, we rely upon the MAC
+ * clearing the bits from both supported and advertising fields.
+ */
+ phy_support_asym_pause(phy);
+
+ if (phylink_phy_no_inband(phy))
+ mode = MLO_AN_PHY;
+ else
+ mode = MLO_AN_INBAND;
+
+ /* Do the initial configuration */
+ ret = phylink_sfp_config(pl, mode, phy->supported, phy->advertising);
+ if (ret < 0)
+ return ret;
+
+ interface = pl->link_config.interface;
+ ret = phylink_attach_phy(pl, phy, interface);
+ if (ret < 0)
+ return ret;
+
+ ret = phylink_bringup_phy(pl, phy, interface);
+ if (ret)
+ phy_detach(phy);
+
+ return ret;
}
static void phylink_sfp_disconnect_phy(void *upstream)
@@ -1802,6 +1935,8 @@ static const struct sfp_upstream_ops sfp_phylink_ops = {
.attach = phylink_sfp_attach,
.detach = phylink_sfp_detach,
.module_insert = phylink_sfp_module_insert,
+ .module_start = phylink_sfp_module_start,
+ .module_stop = phylink_sfp_module_stop,
.link_up = phylink_sfp_link_up,
.link_down = phylink_sfp_link_down,
.connect_phy = phylink_sfp_connect_phy,
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 476db5345e1a..f5fa2fff3ddc 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -29,6 +29,8 @@
#define RTL8211F_INSR 0x1d
#define RTL8211F_TX_DELAY BIT(8)
+#define RTL8211F_RX_DELAY BIT(3)
+
#define RTL8211E_TX_DELAY BIT(1)
#define RTL8211E_RX_DELAY BIT(2)
#define RTL8211E_MODE_MII_GMII BIT(3)
@@ -171,25 +173,66 @@ static int rtl8211c_config_init(struct phy_device *phydev)
static int rtl8211f_config_init(struct phy_device *phydev)
{
- u16 val;
+ struct device *dev = &phydev->mdio.dev;
+ u16 val_txdly, val_rxdly;
+ int ret;
- /* enable TX-delay for rgmii-{id,txid}, and disable it for rgmii and
- * rgmii-rxid. The RX-delay can be enabled by the external RXDLY pin.
- */
switch (phydev->interface) {
case PHY_INTERFACE_MODE_RGMII:
+ val_txdly = 0;
+ val_rxdly = 0;
+ break;
+
case PHY_INTERFACE_MODE_RGMII_RXID:
- val = 0;
+ val_txdly = 0;
+ val_rxdly = RTL8211F_RX_DELAY;
break;
- case PHY_INTERFACE_MODE_RGMII_ID:
+
case PHY_INTERFACE_MODE_RGMII_TXID:
- val = RTL8211F_TX_DELAY;
+ val_txdly = RTL8211F_TX_DELAY;
+ val_rxdly = 0;
+ break;
+
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ val_txdly = RTL8211F_TX_DELAY;
+ val_rxdly = RTL8211F_RX_DELAY;
break;
+
default: /* the rest of the modes imply leaving delay as is. */
return 0;
}
- return phy_modify_paged(phydev, 0xd08, 0x11, RTL8211F_TX_DELAY, val);
+ ret = phy_modify_paged_changed(phydev, 0xd08, 0x11, RTL8211F_TX_DELAY,
+ val_txdly);
+ if (ret < 0) {
+ dev_err(dev, "Failed to update the TX delay register\n");
+ return ret;
+ } else if (ret) {
+ dev_dbg(dev,
+ "%s 2ns TX delay (and changing the value from pin-strapping RXD1 or the bootloader)\n",
+ val_txdly ? "Enabling" : "Disabling");
+ } else {
+ dev_dbg(dev,
+ "2ns TX delay was already %s (by pin-strapping RXD1 or bootloader configuration)\n",
+ val_txdly ? "enabled" : "disabled");
+ }
+
+ ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY,
+ val_rxdly);
+ if (ret < 0) {
+ dev_err(dev, "Failed to update the RX delay register\n");
+ return ret;
+ } else if (ret) {
+ dev_dbg(dev,
+ "%s 2ns RX delay (and changing the value from pin-strapping RXD0 or the bootloader)\n",
+ val_rxdly ? "Enabling" : "Disabling");
+ } else {
+ dev_dbg(dev,
+ "2ns RX delay was already %s (by pin-strapping RXD0 or bootloader configuration)\n",
+ val_rxdly ? "enabled" : "disabled");
+ }
+
+ return 0;
}
static int rtl8211e_config_init(struct phy_device *phydev)
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index 5a72093ab6e7..d949ea7b4f8c 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -103,6 +103,7 @@ static const struct sfp_quirk *sfp_lookup_quirk(const struct sfp_eeprom_id *id)
return NULL;
}
+
/**
* sfp_parse_port() - Parse the EEPROM base ID, setting the port type
* @bus: a pointer to the &struct sfp_bus structure for the sfp module
@@ -124,35 +125,35 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
/* port is the physical connector, set this from the connector field. */
switch (id->base.connector) {
- case SFP_CONNECTOR_SC:
- case SFP_CONNECTOR_FIBERJACK:
- case SFP_CONNECTOR_LC:
- case SFP_CONNECTOR_MT_RJ:
- case SFP_CONNECTOR_MU:
- case SFP_CONNECTOR_OPTICAL_PIGTAIL:
+ case SFF8024_CONNECTOR_SC:
+ case SFF8024_CONNECTOR_FIBERJACK:
+ case SFF8024_CONNECTOR_LC:
+ case SFF8024_CONNECTOR_MT_RJ:
+ case SFF8024_CONNECTOR_MU:
+ case SFF8024_CONNECTOR_OPTICAL_PIGTAIL:
+ case SFF8024_CONNECTOR_MPO_1X12:
+ case SFF8024_CONNECTOR_MPO_2X16:
port = PORT_FIBRE;
break;
- case SFP_CONNECTOR_RJ45:
+ case SFF8024_CONNECTOR_RJ45:
port = PORT_TP;
break;
- case SFP_CONNECTOR_COPPER_PIGTAIL:
+ case SFF8024_CONNECTOR_COPPER_PIGTAIL:
port = PORT_DA;
break;
- case SFP_CONNECTOR_UNSPEC:
+ case SFF8024_CONNECTOR_UNSPEC:
if (id->base.e1000_base_t) {
port = PORT_TP;
break;
}
/* fallthrough */
- case SFP_CONNECTOR_SG: /* guess */
- case SFP_CONNECTOR_MPO_1X12:
- case SFP_CONNECTOR_MPO_2X16:
- case SFP_CONNECTOR_HSSDC_II:
- case SFP_CONNECTOR_NOSEPARATE:
- case SFP_CONNECTOR_MXC_2X16:
+ case SFF8024_CONNECTOR_SG: /* guess */
+ case SFF8024_CONNECTOR_HSSDC_II:
+ case SFF8024_CONNECTOR_NOSEPARATE:
+ case SFF8024_CONNECTOR_MXC_2X16:
port = PORT_OTHER;
break;
default:
@@ -179,6 +180,33 @@ int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
EXPORT_SYMBOL_GPL(sfp_parse_port);
/**
+ * sfp_may_have_phy() - indicate whether the module may have a PHY
+ * @bus: a pointer to the &struct sfp_bus structure for the sfp module
+ * @id: a pointer to the module's &struct sfp_eeprom_id
+ *
+ * Parse the EEPROM identification given in @id, and return whether
+ * this module may have a PHY.
+ */
+bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id)
+{
+ if (id->base.e1000_base_t)
+ return true;
+
+ if (id->base.phys_id != SFF8024_ID_DWDM_SFP) {
+ switch (id->base.extended_cc) {
+ case SFF8024_ECC_10GBASE_T_SFI:
+ case SFF8024_ECC_10GBASE_T_SR:
+ case SFF8024_ECC_5GBASE_T:
+ case SFF8024_ECC_2_5GBASE_T:
+ return true;
+ }
+ }
+
+ return false;
+}
+EXPORT_SYMBOL_GPL(sfp_may_have_phy);
+
+/**
* sfp_parse_support() - Parse the eeprom id for supported link modes
* @bus: a pointer to the &struct sfp_bus structure for the sfp module
* @id: a pointer to the module's &struct sfp_eeprom_id
@@ -261,22 +289,33 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
}
switch (id->base.extended_cc) {
- case 0x00: /* Unspecified */
+ case SFF8024_ECC_UNSPEC:
break;
- case 0x02: /* 100Gbase-SR4 or 25Gbase-SR */
+ case SFF8024_ECC_100GBASE_SR4_25GBASE_SR:
phylink_set(modes, 100000baseSR4_Full);
phylink_set(modes, 25000baseSR_Full);
break;
- case 0x03: /* 100Gbase-LR4 or 25Gbase-LR */
- case 0x04: /* 100Gbase-ER4 or 25Gbase-ER */
+ case SFF8024_ECC_100GBASE_LR4_25GBASE_LR:
+ case SFF8024_ECC_100GBASE_ER4_25GBASE_ER:
phylink_set(modes, 100000baseLR4_ER4_Full);
break;
- case 0x0b: /* 100Gbase-CR4 or 25Gbase-CR CA-L */
- case 0x0c: /* 25Gbase-CR CA-S */
- case 0x0d: /* 25Gbase-CR CA-N */
+ case SFF8024_ECC_100GBASE_CR4:
phylink_set(modes, 100000baseCR4_Full);
+ /* fallthrough */
+ case SFF8024_ECC_25GBASE_CR_S:
+ case SFF8024_ECC_25GBASE_CR_N:
phylink_set(modes, 25000baseCR_Full);
break;
+ case SFF8024_ECC_10GBASE_T_SFI:
+ case SFF8024_ECC_10GBASE_T_SR:
+ phylink_set(modes, 10000baseT_Full);
+ break;
+ case SFF8024_ECC_5GBASE_T:
+ phylink_set(modes, 5000baseT_Full);
+ break;
+ case SFF8024_ECC_2_5GBASE_T:
+ phylink_set(modes, 2500baseT_Full);
+ break;
default:
dev_warn(bus->sfp_dev,
"Unknown/unsupported extended compliance code: 0x%02x\n",
@@ -301,7 +340,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
*/
if (bitmap_empty(modes, __ETHTOOL_LINK_MODE_MASK_NBITS)) {
/* If the encoding and bit rate allows 1000baseX */
- if (id->base.encoding == SFP_ENCODING_8B10B && br_nom &&
+ if (id->base.encoding == SFF8024_ENCODING_8B10B && br_nom &&
br_min <= 1300 && br_max >= 1200)
phylink_set(modes, 1000baseX_Full);
}
@@ -320,31 +359,27 @@ EXPORT_SYMBOL_GPL(sfp_parse_support);
/**
* sfp_select_interface() - Select appropriate phy_interface_t mode
* @bus: a pointer to the &struct sfp_bus structure for the sfp module
- * @id: a pointer to the module's &struct sfp_eeprom_id
* @link_modes: ethtool link modes mask
*
- * Derive the phy_interface_t mode for the information found in the
- * module's identifying EEPROM and the link modes mask. There is no
- * standard or defined way to derive this information, so we decide
- * based upon the link mode mask.
+ * Derive the phy_interface_t mode for the SFP module from the link
+ * modes mask.
*/
phy_interface_t sfp_select_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id,
unsigned long *link_modes)
{
if (phylink_test(link_modes, 10000baseCR_Full) ||
phylink_test(link_modes, 10000baseSR_Full) ||
phylink_test(link_modes, 10000baseLR_Full) ||
phylink_test(link_modes, 10000baseLRM_Full) ||
- phylink_test(link_modes, 10000baseER_Full))
- return PHY_INTERFACE_MODE_10GKR;
+ phylink_test(link_modes, 10000baseER_Full) ||
+ phylink_test(link_modes, 10000baseT_Full))
+ return PHY_INTERFACE_MODE_10GBASER;
if (phylink_test(link_modes, 2500baseX_Full))
return PHY_INTERFACE_MODE_2500BASEX;
- if (id->base.e1000_base_t ||
- id->base.e100_base_lx ||
- id->base.e100_base_fx)
+ if (phylink_test(link_modes, 1000baseT_Half) ||
+ phylink_test(link_modes, 1000baseT_Full))
return PHY_INTERFACE_MODE_SGMII;
if (phylink_test(link_modes, 1000baseX_Full))
@@ -705,6 +740,27 @@ void sfp_module_remove(struct sfp_bus *bus)
}
EXPORT_SYMBOL_GPL(sfp_module_remove);
+int sfp_module_start(struct sfp_bus *bus)
+{
+ const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+ int ret = 0;
+
+ if (ops && ops->module_start)
+ ret = ops->module_start(bus->upstream);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(sfp_module_start);
+
+void sfp_module_stop(struct sfp_bus *bus)
+{
+ const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus);
+
+ if (ops && ops->module_stop)
+ ops->module_stop(bus->upstream);
+}
+EXPORT_SYMBOL_GPL(sfp_module_stop);
+
static void sfp_socket_clear(struct sfp_bus *bus)
{
bus->sfp_dev = NULL;
diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index c0b9a8e4e65a..73c2969f11a4 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -59,8 +59,10 @@ enum {
SFP_DEV_UP,
SFP_S_DOWN = 0,
+ SFP_S_FAIL,
SFP_S_WAIT,
SFP_S_INIT,
+ SFP_S_INIT_PHY,
SFP_S_INIT_TX_FAULT,
SFP_S_WAIT_LOS,
SFP_S_LINK_UP,
@@ -122,8 +124,10 @@ static const char *event_to_str(unsigned short event)
static const char * const sm_state_strings[] = {
[SFP_S_DOWN] = "down",
+ [SFP_S_FAIL] = "fail",
[SFP_S_WAIT] = "wait",
[SFP_S_INIT] = "init",
+ [SFP_S_INIT_PHY] = "init_phy",
[SFP_S_INIT_TX_FAULT] = "init_tx_fault",
[SFP_S_WAIT_LOS] = "wait_los",
[SFP_S_LINK_UP] = "link_up",
@@ -155,10 +159,34 @@ static const enum gpiod_flags gpio_flags[] = {
GPIOD_ASIS,
};
-#define T_WAIT msecs_to_jiffies(50)
-#define T_INIT_JIFFIES msecs_to_jiffies(300)
-#define T_RESET_US 10
-#define T_FAULT_RECOVER msecs_to_jiffies(1000)
+/* t_start_up (SFF-8431) or t_init (SFF-8472) is the time required for a
+ * non-cooled module to initialise its laser safety circuitry. We wait
+ * an initial T_WAIT period before we check the tx fault to give any PHY
+ * on board (for a copper SFP) time to initialise.
+ */
+#define T_WAIT msecs_to_jiffies(50)
+#define T_START_UP msecs_to_jiffies(300)
+#define T_START_UP_BAD_GPON msecs_to_jiffies(60000)
+
+/* t_reset is the time required to assert the TX_DISABLE signal to reset
+ * an indicated TX_FAULT.
+ */
+#define T_RESET_US 10
+#define T_FAULT_RECOVER msecs_to_jiffies(1000)
+
+/* N_FAULT_INIT is the number of recovery attempts at module initialisation
+ * time. If the TX_FAULT signal is not deasserted after this number of
+ * attempts at clearing it, we decide that the module is faulty.
+ * N_FAULT is the same but after the module has initialised.
+ */
+#define N_FAULT_INIT 5
+#define N_FAULT 5
+
+/* T_PHY_RETRY is the time interval between attempts to probe the PHY.
+ * R_PHY_RETRY is the number of attempts.
+ */
+#define T_PHY_RETRY msecs_to_jiffies(50)
+#define R_PHY_RETRY 12
/* SFP module presence detection is poor: the three MOD DEF signals are
* the same length on the PCB, which means it's possible for MOD DEF 0 to
@@ -214,10 +242,12 @@ struct sfp {
unsigned char sm_mod_tries;
unsigned char sm_dev_state;
unsigned short sm_state;
- unsigned int sm_retries;
+ unsigned char sm_fault_retries;
+ unsigned char sm_phy_retries;
struct sfp_eeprom_id id;
unsigned int module_power_mW;
+ unsigned int module_t_start_up;
#if IS_ENABLED(CONFIG_HWMON)
struct sfp_diag diag;
@@ -231,7 +261,7 @@ struct sfp {
static bool sff_module_supported(const struct sfp_eeprom_id *id)
{
- return id->base.phys_id == SFP_PHYS_ID_SFF &&
+ return id->base.phys_id == SFF8024_ID_SFF_8472 &&
id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP;
}
@@ -242,7 +272,7 @@ static const struct sff_data sff_data = {
static bool sfp_module_supported(const struct sfp_eeprom_id *id)
{
- return id->base.phys_id == SFP_PHYS_ID_SFP &&
+ return id->base.phys_id == SFF8024_ID_SFP &&
id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP;
}
@@ -412,13 +442,20 @@ static unsigned int sfp_soft_get_state(struct sfp *sfp)
{
unsigned int state = 0;
u8 status;
+ int ret;
- if (sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status)) ==
- sizeof(status)) {
+ ret = sfp_read(sfp, true, SFP_STATUS, &status, sizeof(status));
+ if (ret == sizeof(status)) {
if (status & SFP_STATUS_RX_LOS)
state |= SFP_F_LOS;
if (status & SFP_STATUS_TX_FAULT)
state |= SFP_F_TX_FAULT;
+ } else {
+ dev_err_ratelimited(sfp->dev,
+ "failed to read SFP soft status: %d\n",
+ ret);
+ /* Preserve the current state */
+ state = sfp->state;
}
return state & sfp->state_soft_mask;
@@ -1383,26 +1420,30 @@ static void sfp_sm_mod_next(struct sfp *sfp, unsigned int state,
static void sfp_sm_phy_detach(struct sfp *sfp)
{
- phy_stop(sfp->mod_phy);
sfp_remove_phy(sfp->sfp_bus);
phy_device_remove(sfp->mod_phy);
phy_device_free(sfp->mod_phy);
sfp->mod_phy = NULL;
}
-static void sfp_sm_probe_phy(struct sfp *sfp)
+static int sfp_sm_probe_phy(struct sfp *sfp, bool is_c45)
{
struct phy_device *phy;
int err;
- phy = mdiobus_scan(sfp->i2c_mii, SFP_PHY_ADDR);
- if (phy == ERR_PTR(-ENODEV)) {
- dev_info(sfp->dev, "no PHY detected\n");
- return;
- }
+ phy = get_phy_device(sfp->i2c_mii, SFP_PHY_ADDR, is_c45);
+ if (phy == ERR_PTR(-ENODEV))
+ return PTR_ERR(phy);
if (IS_ERR(phy)) {
dev_err(sfp->dev, "mdiobus scan returned %ld\n", PTR_ERR(phy));
- return;
+ return PTR_ERR(phy);
+ }
+
+ err = phy_device_register(phy);
+ if (err) {
+ phy_device_free(phy);
+ dev_err(sfp->dev, "phy_device_register failed: %d\n", err);
+ return err;
}
err = sfp_add_phy(sfp->sfp_bus, phy);
@@ -1410,11 +1451,12 @@ static void sfp_sm_probe_phy(struct sfp *sfp)
phy_device_remove(phy);
phy_device_free(phy);
dev_err(sfp->dev, "sfp_add_phy failed: %d\n", err);
- return;
+ return err;
}
sfp->mod_phy = phy;
- phy_start(phy);
+
+ return 0;
}
static void sfp_sm_link_up(struct sfp *sfp)
@@ -1464,7 +1506,7 @@ static bool sfp_los_event_inactive(struct sfp *sfp, unsigned int event)
static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn)
{
- if (sfp->sm_retries && !--sfp->sm_retries) {
+ if (sfp->sm_fault_retries && !--sfp->sm_fault_retries) {
dev_err(sfp->dev,
"module persistently indicates fault, disabling\n");
sfp_sm_next(sfp, SFP_S_TX_DISABLE, 0);
@@ -1476,21 +1518,35 @@ static void sfp_sm_fault(struct sfp *sfp, unsigned int next_state, bool warn)
}
}
-static void sfp_sm_probe_for_phy(struct sfp *sfp)
+/* Probe a SFP for a PHY device if the module supports copper - the PHY
+ * normally sits at I2C bus address 0x56, and may either be a clause 22
+ * or clause 45 PHY.
+ *
+ * Clause 22 copper SFP modules normally operate in Cisco SGMII mode with
+ * negotiation enabled, but some may be in 1000base-X - which is for the
+ * PHY driver to determine.
+ *
+ * Clause 45 copper SFP+ modules (10G) appear to switch their interface
+ * mode according to the negotiated line speed.
+ */
+static int sfp_sm_probe_for_phy(struct sfp *sfp)
{
- /* Setting the serdes link mode is guesswork: there's no
- * field in the EEPROM which indicates what mode should
- * be used.
- *
- * If it's a gigabit-only fiber module, it probably does
- * not have a PHY, so switch to 802.3z negotiation mode.
- * Otherwise, switch to SGMII mode (which is required to
- * support non-gigabit speeds) and probe for a PHY.
- */
- if (sfp->id.base.e1000_base_t ||
- sfp->id.base.e100_base_lx ||
- sfp->id.base.e100_base_fx)
- sfp_sm_probe_phy(sfp);
+ int err = 0;
+
+ switch (sfp->id.base.extended_cc) {
+ case SFF8024_ECC_10GBASE_T_SFI:
+ case SFF8024_ECC_10GBASE_T_SR:
+ case SFF8024_ECC_5GBASE_T:
+ case SFF8024_ECC_2_5GBASE_T:
+ err = sfp_sm_probe_phy(sfp, true);
+ break;
+
+ default:
+ if (sfp->id.base.e1000_base_t)
+ err = sfp_sm_probe_phy(sfp, false);
+ break;
+ }
+ return err;
}
static int sfp_module_parse_power(struct sfp *sfp)
@@ -1550,6 +1606,13 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable)
return -EAGAIN;
}
+ /* DM7052 reports as a high power module, responds to reads (with
+ * all bytes 0xff) at 0x51 but does not accept writes. In any case,
+ * if the bit is already set, we're already in high power mode.
+ */
+ if (!!(val & BIT(0)) == enable)
+ return 0;
+
if (enable)
val |= BIT(0);
else
@@ -1655,6 +1718,12 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report)
if (ret < 0)
return ret;
+ if (!memcmp(id.base.vendor_name, "ALCATELLUCENT ", 16) &&
+ !memcmp(id.base.vendor_pn, "3FE46541AA ", 16))
+ sfp->module_t_start_up = T_START_UP_BAD_GPON;
+ else
+ sfp->module_t_start_up = T_START_UP;
+
return 0;
}
@@ -1812,6 +1881,7 @@ static void sfp_sm_module(struct sfp *sfp, unsigned int event)
static void sfp_sm_main(struct sfp *sfp, unsigned int event)
{
unsigned long timeout;
+ int ret;
/* Some events are global */
if (sfp->sm_state != SFP_S_DOWN &&
@@ -1820,6 +1890,8 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
if (sfp->sm_state == SFP_S_LINK_UP &&
sfp->sm_dev_state == SFP_DEV_UP)
sfp_sm_link_down(sfp);
+ if (sfp->sm_state > SFP_S_INIT)
+ sfp_module_stop(sfp->sfp_bus);
if (sfp->mod_phy)
sfp_sm_phy_detach(sfp);
sfp_module_tx_disable(sfp);
@@ -1841,7 +1913,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
sfp_module_tx_enable(sfp);
/* Initialise the fault clearance retries */
- sfp->sm_retries = 5;
+ sfp->sm_fault_retries = N_FAULT_INIT;
/* We need to check the TX_FAULT state, which is not defined
* while TX_DISABLE is asserted. The earliest we want to do
@@ -1855,11 +1927,12 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
break;
if (sfp->state & SFP_F_TX_FAULT) {
- /* Wait t_init before indicating that the link is up,
- * provided the current state indicates no TX_FAULT. If
- * TX_FAULT clears before this time, that's fine too.
+ /* Wait up to t_init (SFF-8472) or t_start_up (SFF-8431)
+ * from the TX_DISABLE deassertion for the module to
+ * initialise, which is indicated by TX_FAULT
+ * deasserting.
*/
- timeout = T_INIT_JIFFIES;
+ timeout = sfp->module_t_start_up;
if (timeout > T_WAIT)
timeout -= T_WAIT;
else
@@ -1876,27 +1949,51 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
case SFP_S_INIT:
if (event == SFP_E_TIMEOUT && sfp->state & SFP_F_TX_FAULT) {
- /* TX_FAULT is still asserted after t_init, so assume
- * there is a fault.
+ /* TX_FAULT is still asserted after t_init or
+ * or t_start_up, so assume there is a fault.
*/
sfp_sm_fault(sfp, SFP_S_INIT_TX_FAULT,
- sfp->sm_retries == 5);
+ sfp->sm_fault_retries == N_FAULT_INIT);
} else if (event == SFP_E_TIMEOUT || event == SFP_E_TX_CLEAR) {
- init_done: /* TX_FAULT deasserted or we timed out with TX_FAULT
- * clear. Probe for the PHY and check the LOS state.
- */
- sfp_sm_probe_for_phy(sfp);
- sfp_sm_link_check_los(sfp);
+ init_done:
+ sfp->sm_phy_retries = R_PHY_RETRY;
+ goto phy_probe;
+ }
+ break;
- /* Reset the fault retry count */
- sfp->sm_retries = 5;
+ case SFP_S_INIT_PHY:
+ if (event != SFP_E_TIMEOUT)
+ break;
+ phy_probe:
+ /* TX_FAULT deasserted or we timed out with TX_FAULT
+ * clear. Probe for the PHY and check the LOS state.
+ */
+ ret = sfp_sm_probe_for_phy(sfp);
+ if (ret == -ENODEV) {
+ if (--sfp->sm_phy_retries) {
+ sfp_sm_next(sfp, SFP_S_INIT_PHY, T_PHY_RETRY);
+ break;
+ } else {
+ dev_info(sfp->dev, "no PHY detected\n");
+ }
+ } else if (ret) {
+ sfp_sm_next(sfp, SFP_S_FAIL, 0);
+ break;
}
+ if (sfp_module_start(sfp->sfp_bus)) {
+ sfp_sm_next(sfp, SFP_S_FAIL, 0);
+ break;
+ }
+ sfp_sm_link_check_los(sfp);
+
+ /* Reset the fault retry count */
+ sfp->sm_fault_retries = N_FAULT;
break;
case SFP_S_INIT_TX_FAULT:
if (event == SFP_E_TIMEOUT) {
sfp_module_tx_fault_reset(sfp);
- sfp_sm_next(sfp, SFP_S_INIT, T_INIT_JIFFIES);
+ sfp_sm_next(sfp, SFP_S_INIT, sfp->module_t_start_up);
}
break;
@@ -1920,7 +2017,7 @@ static void sfp_sm_main(struct sfp *sfp, unsigned int event)
case SFP_S_TX_FAULT:
if (event == SFP_E_TIMEOUT) {
sfp_module_tx_fault_reset(sfp);
- sfp_sm_next(sfp, SFP_S_REINIT, T_INIT_JIFFIES);
+ sfp_sm_next(sfp, SFP_S_REINIT, sfp->module_t_start_up);
}
break;
diff --git a/drivers/net/phy/sfp.h b/drivers/net/phy/sfp.h
index 64f54b0bbd8c..b83f70526270 100644
--- a/drivers/net/phy/sfp.h
+++ b/drivers/net/phy/sfp.h
@@ -22,6 +22,8 @@ void sfp_link_up(struct sfp_bus *bus);
void sfp_link_down(struct sfp_bus *bus);
int sfp_module_insert(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
void sfp_module_remove(struct sfp_bus *bus);
+int sfp_module_start(struct sfp_bus *bus);
+void sfp_module_stop(struct sfp_bus *bus);
int sfp_link_configure(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
struct sfp_bus *sfp_register_socket(struct device *dev, struct sfp *sfp,
const struct sfp_socket_ops *ops);
diff --git a/drivers/net/phy/uPD60620.c b/drivers/net/phy/uPD60620.c
index a32b3fd8a370..38834347a427 100644
--- a/drivers/net/phy/uPD60620.c
+++ b/drivers/net/phy/uPD60620.c
@@ -68,12 +68,7 @@ static int upd60620_read_status(struct phy_device *phydev)
mii_lpa_to_linkmode_lpa_t(phydev->lp_advertising,
phy_state);
- if (phydev->duplex == DUPLEX_FULL) {
- if (phy_state & LPA_PAUSE_CAP)
- phydev->pause = 1;
- if (phy_state & LPA_PAUSE_ASYM)
- phydev->asym_pause = 1;
- }
+ phy_resolve_aneg_pause(phydev);
}
}
return 0;
diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c
index a7b9cf3269bf..29a0917a81e6 100644
--- a/drivers/net/ppp/ppp_async.c
+++ b/drivers/net/ppp/ppp_async.c
@@ -874,15 +874,15 @@ ppp_async_input(struct asyncppp *ap, const unsigned char *buf,
skb = dev_alloc_skb(ap->mru + PPP_HDRLEN + 2);
if (!skb)
goto nomem;
- ap->rpkt = skb;
- }
- if (skb->len == 0) {
- /* Try to get the payload 4-byte aligned.
- * This should match the
- * PPP_ALLSTATIONS/PPP_UI/compressed tests in
- * process_input_packet, but we do not have
- * enough chars here to test buf[1] and buf[2].
- */
+ ap->rpkt = skb;
+ }
+ if (skb->len == 0) {
+ /* Try to get the payload 4-byte aligned.
+ * This should match the
+ * PPP_ALLSTATIONS/PPP_UI/compressed tests in
+ * process_input_packet, but we do not have
+ * enough chars here to test buf[1] and buf[2].
+ */
if (buf[0] != PPP_ALLSTATIONS)
skb_reserve(skb, 2 + (buf[0] & 1));
}
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 3bf8a8b42983..22cc2cb9d878 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -296,8 +296,6 @@ static struct class *ppp_class;
/* per net-namespace data */
static inline struct ppp_net *ppp_pernet(struct net *net)
{
- BUG_ON(!net);
-
return net_generic(net, ppp_net_id);
}
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 2a91c192659f..317d3a8df316 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -457,7 +457,7 @@ static void slip_write_wakeup(struct tty_struct *tty)
schedule_work(&sl->tx_work);
}
-static void sl_tx_timeout(struct net_device *dev)
+static void sl_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct slip *sl = netdev_priv(dev);
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index a6d63665ad03..1f4bdd94407a 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -341,6 +341,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
features |= tap->tap_features;
if (netif_needs_gso(skb, features)) {
struct sk_buff *segs = __skb_gso_segment(skb, features, false);
+ struct sk_buff *next;
if (IS_ERR(segs))
goto drop;
@@ -352,16 +353,13 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
}
consume_skb(skb);
- while (segs) {
- struct sk_buff *nskb = segs->next;
-
- segs->next = NULL;
- if (ptr_ring_produce(&q->ring, segs)) {
- kfree_skb(segs);
- kfree_skb_list(nskb);
+ skb_list_walk_safe(segs, skb, next) {
+ skb_mark_not_on_list(skb);
+ if (ptr_ring_produce(&q->ring, skb)) {
+ kfree_skb(skb);
+ kfree_skb_list(next);
break;
}
- segs = nskb;
}
} else {
/* If we receive a partial checksum and the tap side
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 1e58702c737f..d387bc7ac1b6 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -447,7 +447,7 @@ static netdev_tx_t catc_start_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
-static void catc_tx_timeout(struct net_device *netdev)
+static void catc_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct catc *catc = netdev_priv(netdev);
diff --git a/drivers/net/usb/ch9200.c b/drivers/net/usb/ch9200.c
index 9df3c1ffff35..d7f3b70d5477 100644
--- a/drivers/net/usb/ch9200.c
+++ b/drivers/net/usb/ch9200.c
@@ -111,8 +111,8 @@ static int control_read(struct usbnet *dev,
request_type = (USB_DIR_IN | USB_TYPE_VENDOR |
USB_RECIP_DEVICE);
- netdev_dbg(dev->net, "Control_read() index=0x%02x size=%d\n",
- index, size);
+ netdev_dbg(dev->net, "%s() index=0x%02x size=%d\n",
+ __func__, index, size);
buf = kmalloc(size, GFP_KERNEL);
if (!buf) {
@@ -130,8 +130,6 @@ static int control_read(struct usbnet *dev,
err = -EINVAL;
kfree(buf);
- return err;
-
err_out:
return err;
}
@@ -151,8 +149,8 @@ static int control_write(struct usbnet *dev, unsigned char request,
request_type = (USB_DIR_OUT | USB_TYPE_VENDOR |
USB_RECIP_DEVICE);
- netdev_dbg(dev->net, "Control_write() index=0x%02x size=%d\n",
- index, size);
+ netdev_dbg(dev->net, "%s() index=0x%02x size=%d\n",
+ __func__, index, size);
if (data) {
buf = kmemdup(data, size, GFP_KERNEL);
@@ -181,8 +179,8 @@ static int ch9200_mdio_read(struct net_device *netdev, int phy_id, int loc)
struct usbnet *dev = netdev_priv(netdev);
unsigned char buff[2];
- netdev_dbg(netdev, "ch9200_mdio_read phy_id:%02x loc:%02x\n",
- phy_id, loc);
+ netdev_dbg(netdev, "%s phy_id:%02x loc:%02x\n",
+ __func__, phy_id, loc);
if (phy_id != 0)
return -ENODEV;
@@ -199,8 +197,8 @@ static void ch9200_mdio_write(struct net_device *netdev,
struct usbnet *dev = netdev_priv(netdev);
unsigned char buff[2];
- netdev_dbg(netdev, "ch9200_mdio_write() phy_id=%02x loc:%02x\n",
- phy_id, loc);
+ netdev_dbg(netdev, "%s() phy_id=%02x loc:%02x\n",
+ __func__, phy_id, loc);
if (phy_id != 0)
return;
@@ -219,8 +217,8 @@ static int ch9200_link_reset(struct usbnet *dev)
mii_check_media(&dev->mii, 1, 1);
mii_ethtool_gset(&dev->mii, &ecmd);
- netdev_dbg(dev->net, "link_reset() speed:%d duplex:%d\n",
- ecmd.speed, ecmd.duplex);
+ netdev_dbg(dev->net, "%s() speed:%d duplex:%d\n",
+ __func__, ecmd.speed, ecmd.duplex);
return 0;
}
@@ -309,7 +307,7 @@ static int get_mac_address(struct usbnet *dev, unsigned char *data)
unsigned char mac_addr[0x06];
int rd_mac_len = 0;
- netdev_dbg(dev->net, "get_mac_address:\n\tusbnet VID:%0x PID:%0x\n",
+ netdev_dbg(dev->net, "%s:\n\tusbnet VID:%0x PID:%0x\n", __func__,
le16_to_cpu(dev->udev->descriptor.idVendor),
le16_to_cpu(dev->udev->descriptor.idProduct));
diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c
index ca827802f291..417e42c9fd03 100644
--- a/drivers/net/usb/hso.c
+++ b/drivers/net/usb/hso.c
@@ -820,7 +820,7 @@ static const struct ethtool_ops ops = {
};
/* called when a packet did not ack after watchdogtimeout */
-static void hso_net_tx_timeout(struct net_device *net)
+static void hso_net_tx_timeout(struct net_device *net, unsigned int txqueue)
{
struct hso_net *odev = netdev_priv(net);
diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index 8c01fbf68a89..c792d65dd7b4 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -400,7 +400,7 @@ static int ipheth_tx(struct sk_buff *skb, struct net_device *net)
return NETDEV_TX_OK;
}
-static void ipheth_tx_timeout(struct net_device *net)
+static void ipheth_tx_timeout(struct net_device *net, unsigned int txqueue)
{
struct ipheth_device *dev = netdev_priv(net);
diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c
index 8e210ba4a313..ed01dc964c99 100644
--- a/drivers/net/usb/kaweth.c
+++ b/drivers/net/usb/kaweth.c
@@ -894,7 +894,7 @@ static void kaweth_async_set_rx_mode(struct kaweth_device *kaweth)
/****************************************************************
* kaweth_tx_timeout
****************************************************************/
-static void kaweth_tx_timeout(struct net_device *net)
+static void kaweth_tx_timeout(struct net_device *net, unsigned int txqueue)
{
struct kaweth_device *kaweth = netdev_priv(net);
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index cf1f3f0a4b9b..d3239b49c3bb 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -511,7 +511,7 @@ static int lan78xx_read_stats(struct lan78xx_net *dev,
}
} else {
netdev_warn(dev->net,
- "Failed to read stat ret = 0x%x", ret);
+ "Failed to read stat ret = %d", ret);
}
kfree(stats);
@@ -1808,6 +1808,7 @@ static int lan78xx_mdio_init(struct lan78xx_net *dev)
dev->mdiobus->read = lan78xx_mdiobus_read;
dev->mdiobus->write = lan78xx_mdiobus_write;
dev->mdiobus->name = "lan78xx-mdiobus";
+ dev->mdiobus->parent = &dev->udev->dev;
snprintf(dev->mdiobus->id, MII_BUS_ID_SIZE, "usb-%03d:%03d",
dev->udev->bus->busnum, dev->udev->devnum);
@@ -2723,11 +2724,6 @@ static int lan78xx_stop(struct net_device *net)
return 0;
}
-static int lan78xx_linearize(struct sk_buff *skb)
-{
- return skb_linearize(skb);
-}
-
static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
struct sk_buff *skb, gfp_t flags)
{
@@ -2739,8 +2735,10 @@ static struct sk_buff *lan78xx_tx_prep(struct lan78xx_net *dev,
return NULL;
}
- if (lan78xx_linearize(skb) < 0)
+ if (skb_linearize(skb)) {
+ dev_kfree_skb_any(skb);
return NULL;
+ }
tx_cmd_a = (u32)(skb->len & TX_CMD_A_LEN_MASK_) | TX_CMD_A_FCS_;
@@ -3662,7 +3660,7 @@ static void lan78xx_disconnect(struct usb_interface *intf)
usb_put_dev(udev);
}
-static void lan78xx_tx_timeout(struct net_device *net)
+static void lan78xx_tx_timeout(struct net_device *net, unsigned int txqueue)
{
struct lan78xx_net *dev = netdev_priv(net);
diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c
index f7d117d80cfb..8783e2ab3ec0 100644
--- a/drivers/net/usb/pegasus.c
+++ b/drivers/net/usb/pegasus.c
@@ -693,7 +693,7 @@ static void intr_callback(struct urb *urb)
"can't resubmit interrupt urb, %d\n", res);
}
-static void pegasus_tx_timeout(struct net_device *net)
+static void pegasus_tx_timeout(struct net_device *net, unsigned int txqueue)
{
pegasus_t *pegasus = netdev_priv(net);
netif_warn(pegasus, timer, net, "tx timeout\n");
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index c5ebf35d2488..fe22a582373b 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1897,8 +1897,8 @@ static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb,
{
if (skb_shinfo(skb)->gso_size) {
netdev_features_t features = tp->netdev->features;
+ struct sk_buff *segs, *seg, *next;
struct sk_buff_head seg_list;
- struct sk_buff *segs, *nskb;
features &= ~(NETIF_F_SG | NETIF_F_IPV6_CSUM | NETIF_F_TSO6);
segs = skb_gso_segment(skb, features);
@@ -1907,12 +1907,10 @@ static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb,
__skb_queue_head_init(&seg_list);
- do {
- nskb = segs;
- segs = segs->next;
- nskb->next = NULL;
- __skb_queue_tail(&seg_list, nskb);
- } while (segs);
+ skb_list_walk_safe(segs, seg, next) {
+ skb_mark_not_on_list(seg);
+ __skb_queue_tail(&seg_list, seg);
+ }
skb_queue_splice(&seg_list, list);
dev_kfree_skb(skb);
@@ -2507,7 +2505,7 @@ static void rtl_drop_queued_tx(struct r8152 *tp)
}
}
-static void rtl8152_tx_timeout(struct net_device *netdev)
+static void rtl8152_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct r8152 *tp = netdev_priv(netdev);
diff --git a/drivers/net/usb/rtl8150.c b/drivers/net/usb/rtl8150.c
index 13e51ccf0214..e7c630d37589 100644
--- a/drivers/net/usb/rtl8150.c
+++ b/drivers/net/usb/rtl8150.c
@@ -655,7 +655,7 @@ static void disable_net_traffic(rtl8150_t * dev)
set_registers(dev, CR, 1, &cr);
}
-static void rtl8150_tx_timeout(struct net_device *netdev)
+static void rtl8150_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
rtl8150_t *dev = netdev_priv(netdev);
dev_warn(&netdev->dev, "Tx timeout.\n");
diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index 34c1eaba536c..389d19dd7909 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -865,7 +865,7 @@ static struct sk_buff *sierra_net_tx_fixup(struct usbnet *dev,
u16 len;
bool need_tail;
- BUILD_BUG_ON(FIELD_SIZEOF(struct usbnet, data)
+ BUILD_BUG_ON(sizeof_field(struct usbnet, data)
< sizeof(struct cdc_state));
dev_dbg(&dev->udev->dev, "%s", __func__);
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 30e511c2c8d0..5ec97def3513 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1293,7 +1293,7 @@ static void tx_complete (struct urb *urb)
/*-------------------------------------------------------------------------*/
-void usbnet_tx_timeout (struct net_device *net)
+void usbnet_tx_timeout (struct net_device *net, unsigned int txqueue)
{
struct usbnet *dev = netdev_priv(net);
@@ -2184,7 +2184,7 @@ static int __init usbnet_init(void)
{
/* Compiler should optimize this out. */
BUILD_BUG_ON(
- FIELD_SIZEOF(struct sk_buff, cb) < sizeof(struct skb_data));
+ sizeof_field(struct sk_buff, cb) < sizeof(struct skb_data));
eth_random_addr(node_id);
return 0;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 216acf37ca7c..18f152fa0068 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3198,7 +3198,7 @@ vmxnet3_free_intr_resources(struct vmxnet3_adapter *adapter)
static void
-vmxnet3_tx_timeout(struct net_device *netdev)
+vmxnet3_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct vmxnet3_adapter *adapter = netdev_priv(netdev);
adapter->tx_timeout_count++;
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 0a38c76688ab..1e4b9ba70983 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -555,10 +555,8 @@ vmxnet3_set_ringparam(struct net_device *netdev,
}
if (VMXNET3_VERSION_GE_3(adapter)) {
- if (param->rx_mini_pending < 0 ||
- param->rx_mini_pending > VMXNET3_RXDATA_DESC_MAX_SIZE) {
+ if (param->rx_mini_pending > VMXNET3_RXDATA_DESC_MAX_SIZE)
return -EINVAL;
- }
} else if (param->rx_mini_pending != 0) {
return -EINVAL;
}
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 4c34375c2e22..d3b08b76b1ec 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1060,6 +1060,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
return err;
} else {
union vxlan_addr *remote = &vxlan->default_dst.remote_ip;
+
if (remote->sa.sa_family == AF_INET) {
ip->sin.sin_addr.s_addr = htonl(INADDR_ANY);
ip->sa.sa_family = AF_INET;
@@ -1696,7 +1697,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
!net_eq(vxlan->net, dev_net(vxlan->dev))))
- goto drop;
+ goto drop;
if (vxlan_collect_metadata(vs)) {
struct metadata_dst *tun_dst;
@@ -2541,7 +2542,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ndst = &rt->dst;
skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
- tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
+ tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr),
vni, md, flags, udp_sum);
@@ -2581,7 +2582,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
- tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
+ tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
skb_scrub_packet(skb, xnet);
err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
@@ -3069,10 +3070,10 @@ static void vxlan_raw_setup(struct net_device *dev)
static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_ID] = { .type = NLA_U32 },
- [IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_VXLAN_GROUP] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_VXLAN_GROUP6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_LINK] = { .type = NLA_U32 },
- [IFLA_VXLAN_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
+ [IFLA_VXLAN_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
[IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_TOS] = { .type = NLA_U8 },
[IFLA_VXLAN_TTL] = { .type = NLA_U8 },
@@ -4128,30 +4129,30 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
- !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
nla_put_u8(skb, IFLA_VXLAN_PROXY,
- !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_PROXY)) ||
nla_put_u8(skb, IFLA_VXLAN_RSC,
!!(vxlan->cfg.flags & VXLAN_F_RSC)) ||
nla_put_u8(skb, IFLA_VXLAN_L2MISS,
- !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_L2MISS)) ||
nla_put_u8(skb, IFLA_VXLAN_L3MISS,
- !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_L3MISS)) ||
nla_put_u8(skb, IFLA_VXLAN_COLLECT_METADATA,
!!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) ||
nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->cfg.age_interval) ||
nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
- !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
+ !(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
- !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
- !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
- !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
+ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
- !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
+ !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
goto nla_put_failure;
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index dd1a147f2971..4530840e15ef 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -315,7 +315,8 @@ config DSCC4_PCI_RST
config IXP4XX_HSS
tristate "Intel IXP4xx HSS (synchronous serial port) support"
- depends on HDLC && ARM && ARCH_IXP4XX && IXP4XX_NPE && IXP4XX_QMGR
+ depends on HDLC && IXP4XX_NPE && IXP4XX_QMGR
+ depends on ARCH_IXP4XX
help
Say Y here if you want to use built-in HSS ports
on IXP4xx processor.
diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c
index af539151d663..5d6532ad6b78 100644
--- a/drivers/net/wan/cosa.c
+++ b/drivers/net/wan/cosa.c
@@ -268,7 +268,7 @@ static int cosa_net_attach(struct net_device *dev, unsigned short encoding,
unsigned short parity);
static int cosa_net_open(struct net_device *d);
static int cosa_net_close(struct net_device *d);
-static void cosa_net_timeout(struct net_device *d);
+static void cosa_net_timeout(struct net_device *d, unsigned int txqueue);
static netdev_tx_t cosa_net_tx(struct sk_buff *skb, struct net_device *d);
static char *cosa_net_setup_rx(struct channel_data *channel, int size);
static int cosa_net_rx_done(struct channel_data *channel);
@@ -670,7 +670,7 @@ static netdev_tx_t cosa_net_tx(struct sk_buff *skb,
return NETDEV_TX_OK;
}
-static void cosa_net_timeout(struct net_device *dev)
+static void cosa_net_timeout(struct net_device *dev, unsigned int txqueue)
{
struct channel_data *chan = dev_to_chan(dev);
diff --git a/drivers/net/wan/farsync.c b/drivers/net/wan/farsync.c
index 1901ec7948d8..7916efce7188 100644
--- a/drivers/net/wan/farsync.c
+++ b/drivers/net/wan/farsync.c
@@ -2239,7 +2239,7 @@ fst_attach(struct net_device *dev, unsigned short encoding, unsigned short parit
}
static void
-fst_tx_timeout(struct net_device *dev)
+fst_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct fst_port_info *port;
struct fst_card_info *card;
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index ca0f3be2b6bf..c28f8409067e 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -635,11 +635,9 @@ static irqreturn_t ucc_hdlc_irq_handler(int irq, void *dev_id)
struct ucc_hdlc_private *priv = (struct ucc_hdlc_private *)dev_id;
struct net_device *dev = priv->ndev;
struct ucc_fast_private *uccf;
- struct ucc_tdm_info *ut_info;
u32 ucce;
u32 uccm;
- ut_info = priv->ut_info;
uccf = priv->uccf;
ucce = ioread32be(uccf->p_ucce);
@@ -872,7 +870,6 @@ static void resume_clk_config(struct ucc_hdlc_private *priv)
static int uhdlc_suspend(struct device *dev)
{
struct ucc_hdlc_private *priv = dev_get_drvdata(dev);
- struct ucc_tdm_info *ut_info;
struct ucc_fast __iomem *uf_regs;
if (!priv)
@@ -884,7 +881,6 @@ static int uhdlc_suspend(struct device *dev)
netif_device_detach(priv->ndev);
napi_disable(&priv->napi);
- ut_info = priv->ut_info;
uf_regs = priv->uf_regs;
/* backup gumr guemr*/
@@ -917,7 +913,7 @@ static int uhdlc_resume(struct device *dev)
struct ucc_fast __iomem *uf_regs;
struct ucc_fast_private *uccf;
struct ucc_fast_info *uf_info;
- int ret, i;
+ int i;
u32 cecr_subblock;
u16 bd_status;
@@ -962,16 +958,16 @@ static int uhdlc_resume(struct device *dev)
/* Write to QE CECR, UCCx channel to Stop Transmission */
cecr_subblock = ucc_fast_get_qe_cr_subblock(uf_info->ucc_num);
- ret = qe_issue_cmd(QE_STOP_TX, cecr_subblock,
- (u8)QE_CR_PROTOCOL_UNSPECIFIED, 0);
+ qe_issue_cmd(QE_STOP_TX, cecr_subblock,
+ (u8)QE_CR_PROTOCOL_UNSPECIFIED, 0);
/* Set UPSMR normal mode */
iowrite32be(0, &uf_regs->upsmr);
/* init parameter base */
cecr_subblock = ucc_fast_get_qe_cr_subblock(uf_info->ucc_num);
- ret = qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, cecr_subblock,
- QE_CR_PROTOCOL_UNSPECIFIED, priv->ucc_pram_offset);
+ qe_issue_cmd(QE_ASSIGN_PAGE_TO_DEVICE, cecr_subblock,
+ QE_CR_PROTOCOL_UNSPECIFIED, priv->ucc_pram_offset);
priv->ucc_pram = (struct ucc_hdlc_param __iomem *)
qe_muram_addr(priv->ucc_pram_offset);
@@ -1039,7 +1035,7 @@ static const struct dev_pm_ops uhdlc_pm_ops = {
#define HDLC_PM_OPS NULL
#endif
-static void uhdlc_tx_timeout(struct net_device *ndev)
+static void uhdlc_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
netdev_err(ndev, "%s\n", __func__);
}
diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c
index a030f5aa6b95..d8cba3625c18 100644
--- a/drivers/net/wan/hdlc_cisco.c
+++ b/drivers/net/wan/hdlc_cisco.c
@@ -75,7 +75,7 @@ static int cisco_hard_header(struct sk_buff *skb, struct net_device *dev,
{
struct hdlc_header *data;
#ifdef DEBUG_HARD_HEADER
- printk(KERN_DEBUG "%s: cisco_hard_header called\n", dev->name);
+ netdev_dbg(dev, "%s called\n", __func__);
#endif
skb_push(skb, sizeof(struct hdlc_header));
@@ -101,7 +101,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type,
skb = dev_alloc_skb(sizeof(struct hdlc_header) +
sizeof(struct cisco_packet));
if (!skb) {
- netdev_warn(dev, "Memory squeeze on cisco_keepalive_send()\n");
+ netdev_warn(dev, "Memory squeeze on %s()\n", __func__);
return;
}
skb_reserve(skb, 4);
diff --git a/drivers/net/wan/ixp4xx_hss.c b/drivers/net/wan/ixp4xx_hss.c
index ea6ee6a608ce..7c5cf77e9ef1 100644
--- a/drivers/net/wan/ixp4xx_hss.c
+++ b/drivers/net/wan/ixp4xx_hss.c
@@ -17,6 +17,7 @@
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
+#include <linux/platform_data/wan_ixp4xx_hss.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/soc/ixp4xx/npe.h>
@@ -258,7 +259,7 @@ struct port {
struct hss_plat_info *plat;
buffer_t *rx_buff_tab[RX_DESCS], *tx_buff_tab[TX_DESCS];
struct desc *desc_tab; /* coherent */
- u32 desc_tab_phys;
+ dma_addr_t desc_tab_phys;
unsigned int id;
unsigned int clock_type, clock_rate, loopback;
unsigned int initialized, carrier;
@@ -858,7 +859,7 @@ static int hss_hdlc_xmit(struct sk_buff *skb, struct net_device *dev)
dev->stats.tx_dropped++;
return NETDEV_TX_OK;
}
- memcpy_swab32(mem, (u32 *)((int)skb->data & ~3), bytes / 4);
+ memcpy_swab32(mem, (u32 *)((uintptr_t)skb->data & ~3), bytes / 4);
dev_kfree_skb(skb);
#endif
@@ -1182,14 +1183,14 @@ static int hss_hdlc_attach(struct net_device *dev, unsigned short encoding,
}
}
-static u32 check_clock(u32 rate, u32 a, u32 b, u32 c,
+static u32 check_clock(u32 timer_freq, u32 rate, u32 a, u32 b, u32 c,
u32 *best, u32 *best_diff, u32 *reg)
{
/* a is 10-bit, b is 10-bit, c is 12-bit */
u64 new_rate;
u32 new_diff;
- new_rate = ixp4xx_timer_freq * (u64)(c + 1);
+ new_rate = timer_freq * (u64)(c + 1);
do_div(new_rate, a * (c + 1) + b + 1);
new_diff = abs((u32)new_rate - rate);
@@ -1201,40 +1202,43 @@ static u32 check_clock(u32 rate, u32 a, u32 b, u32 c,
return new_diff;
}
-static void find_best_clock(u32 rate, u32 *best, u32 *reg)
+static void find_best_clock(u32 timer_freq, u32 rate, u32 *best, u32 *reg)
{
u32 a, b, diff = 0xFFFFFFFF;
- a = ixp4xx_timer_freq / rate;
+ a = timer_freq / rate;
if (a > 0x3FF) { /* 10-bit value - we can go as slow as ca. 65 kb/s */
- check_clock(rate, 0x3FF, 1, 1, best, &diff, reg);
+ check_clock(timer_freq, rate, 0x3FF, 1, 1, best, &diff, reg);
return;
}
if (a == 0) { /* > 66.666 MHz */
a = 1; /* minimum divider is 1 (a = 0, b = 1, c = 1) */
- rate = ixp4xx_timer_freq;
+ rate = timer_freq;
}
- if (rate * a == ixp4xx_timer_freq) { /* don't divide by 0 later */
- check_clock(rate, a - 1, 1, 1, best, &diff, reg);
+ if (rate * a == timer_freq) { /* don't divide by 0 later */
+ check_clock(timer_freq, rate, a - 1, 1, 1, best, &diff, reg);
return;
}
for (b = 0; b < 0x400; b++) {
u64 c = (b + 1) * (u64)rate;
- do_div(c, ixp4xx_timer_freq - rate * a);
+ do_div(c, timer_freq - rate * a);
c--;
if (c >= 0xFFF) { /* 12-bit - no need to check more 'b's */
if (b == 0 && /* also try a bit higher rate */
- !check_clock(rate, a - 1, 1, 1, best, &diff, reg))
+ !check_clock(timer_freq, rate, a - 1, 1, 1, best,
+ &diff, reg))
return;
- check_clock(rate, a, b, 0xFFF, best, &diff, reg);
+ check_clock(timer_freq, rate, a, b, 0xFFF, best,
+ &diff, reg);
return;
}
- if (!check_clock(rate, a, b, c, best, &diff, reg))
+ if (!check_clock(timer_freq, rate, a, b, c, best, &diff, reg))
return;
- if (!check_clock(rate, a, b, c + 1, best, &diff, reg))
+ if (!check_clock(timer_freq, rate, a, b, c + 1, best, &diff,
+ reg))
return;
}
}
@@ -1285,8 +1289,9 @@ static int hss_hdlc_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
port->clock_type = clk; /* Update settings */
if (clk == CLOCK_INT)
- find_best_clock(new_line.clock_rate, &port->clock_rate,
- &port->clock_reg);
+ find_best_clock(port->plat->timer_freq,
+ new_line.clock_rate,
+ &port->clock_rate, &port->clock_reg);
else {
port->clock_rate = 0;
port->clock_reg = CLK42X_SPEED_2048KHZ;
diff --git a/drivers/net/wan/lmc/lmc_main.c b/drivers/net/wan/lmc/lmc_main.c
index 0e6a51525d91..a20f467ca48a 100644
--- a/drivers/net/wan/lmc/lmc_main.c
+++ b/drivers/net/wan/lmc/lmc_main.c
@@ -99,7 +99,7 @@ static int lmc_ifdown(struct net_device * const);
static void lmc_watchdog(struct timer_list *t);
static void lmc_reset(lmc_softc_t * const sc);
static void lmc_dec_reset(lmc_softc_t * const sc);
-static void lmc_driver_timeout(struct net_device *dev);
+static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue);
/*
* linux reserves 16 device specific IOCTLs. We call them
@@ -2044,7 +2044,7 @@ static void lmc_initcsrs(lmc_softc_t * const sc, lmc_csrptr_t csr_base, /*fold00
lmc_trace(sc->lmc_device, "lmc_initcsrs out");
}
-static void lmc_driver_timeout(struct net_device *dev)
+static void lmc_driver_timeout(struct net_device *dev, unsigned int txqueue)
{
lmc_softc_t *sc = dev_to_sc(dev);
u32 csr6;
diff --git a/drivers/net/wan/sdla.c b/drivers/net/wan/sdla.c
index e2e679a01b65..77ccf3672ede 100644
--- a/drivers/net/wan/sdla.c
+++ b/drivers/net/wan/sdla.c
@@ -708,7 +708,7 @@ static netdev_tx_t sdla_transmit(struct sk_buff *skb,
spin_lock_irqsave(&sdla_lock, flags);
SDLA_WINDOW(dev, addr);
- pbuf = (void *)(((int) dev->mem_start) + (addr & SDLA_ADDR_MASK));
+ pbuf = (void *)(dev->mem_start + (addr & SDLA_ADDR_MASK));
__sdla_write(dev, pbuf->buf_addr, skb->data, skb->len);
SDLA_WINDOW(dev, addr);
pbuf->opp_flag = 1;
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index 914be5847386..69773d228ec1 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -276,7 +276,7 @@ static void x25_asy_write_wakeup(struct tty_struct *tty)
sl->xhead += actual;
}
-static void x25_asy_timeout(struct net_device *dev)
+static void x25_asy_timeout(struct net_device *dev, unsigned int txqueue)
{
struct x25_asy *sl = netdev_priv(dev);
diff --git a/drivers/net/wimax/i2400m/netdev.c b/drivers/net/wimax/i2400m/netdev.c
index a5db3c06b646..a7fcbceb6e6b 100644
--- a/drivers/net/wimax/i2400m/netdev.c
+++ b/drivers/net/wimax/i2400m/netdev.c
@@ -380,7 +380,7 @@ drop:
static
-void i2400m_tx_timeout(struct net_device *net_dev)
+void i2400m_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
{
/*
* We might want to kick the device
diff --git a/drivers/net/wireguard/Makefile b/drivers/net/wireguard/Makefile
new file mode 100644
index 000000000000..fc52b2cb500b
--- /dev/null
+++ b/drivers/net/wireguard/Makefile
@@ -0,0 +1,18 @@
+ccflags-y := -O3
+ccflags-y += -D'pr_fmt(fmt)=KBUILD_MODNAME ": " fmt'
+ccflags-$(CONFIG_WIREGUARD_DEBUG) += -DDEBUG
+wireguard-y := main.o
+wireguard-y += noise.o
+wireguard-y += device.o
+wireguard-y += peer.o
+wireguard-y += timers.o
+wireguard-y += queueing.o
+wireguard-y += send.o
+wireguard-y += receive.o
+wireguard-y += socket.o
+wireguard-y += peerlookup.o
+wireguard-y += allowedips.o
+wireguard-y += ratelimiter.o
+wireguard-y += cookie.o
+wireguard-y += netlink.o
+obj-$(CONFIG_WIREGUARD) := wireguard.o
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
new file mode 100644
index 000000000000..121d9ea0f135
--- /dev/null
+++ b/drivers/net/wireguard/allowedips.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "allowedips.h"
+#include "peer.h"
+
+static void swap_endian(u8 *dst, const u8 *src, u8 bits)
+{
+ if (bits == 32) {
+ *(u32 *)dst = be32_to_cpu(*(const __be32 *)src);
+ } else if (bits == 128) {
+ ((u64 *)dst)[0] = be64_to_cpu(((const __be64 *)src)[0]);
+ ((u64 *)dst)[1] = be64_to_cpu(((const __be64 *)src)[1]);
+ }
+}
+
+static void copy_and_assign_cidr(struct allowedips_node *node, const u8 *src,
+ u8 cidr, u8 bits)
+{
+ node->cidr = cidr;
+ node->bit_at_a = cidr / 8U;
+#ifdef __LITTLE_ENDIAN
+ node->bit_at_a ^= (bits / 8U - 1U) % 8U;
+#endif
+ node->bit_at_b = 7U - (cidr % 8U);
+ node->bitlen = bits;
+ memcpy(node->bits, src, bits / 8U);
+}
+#define CHOOSE_NODE(parent, key) \
+ parent->bit[(key[parent->bit_at_a] >> parent->bit_at_b) & 1]
+
+static void push_rcu(struct allowedips_node **stack,
+ struct allowedips_node __rcu *p, unsigned int *len)
+{
+ if (rcu_access_pointer(p)) {
+ WARN_ON(IS_ENABLED(DEBUG) && *len >= 128);
+ stack[(*len)++] = rcu_dereference_raw(p);
+ }
+}
+
+static void root_free_rcu(struct rcu_head *rcu)
+{
+ struct allowedips_node *node, *stack[128] = {
+ container_of(rcu, struct allowedips_node, rcu) };
+ unsigned int len = 1;
+
+ while (len > 0 && (node = stack[--len])) {
+ push_rcu(stack, node->bit[0], &len);
+ push_rcu(stack, node->bit[1], &len);
+ kfree(node);
+ }
+}
+
+static void root_remove_peer_lists(struct allowedips_node *root)
+{
+ struct allowedips_node *node, *stack[128] = { root };
+ unsigned int len = 1;
+
+ while (len > 0 && (node = stack[--len])) {
+ push_rcu(stack, node->bit[0], &len);
+ push_rcu(stack, node->bit[1], &len);
+ if (rcu_access_pointer(node->peer))
+ list_del(&node->peer_list);
+ }
+}
+
+static void walk_remove_by_peer(struct allowedips_node __rcu **top,
+ struct wg_peer *peer, struct mutex *lock)
+{
+#define REF(p) rcu_access_pointer(p)
+#define DEREF(p) rcu_dereference_protected(*(p), lockdep_is_held(lock))
+#define PUSH(p) ({ \
+ WARN_ON(IS_ENABLED(DEBUG) && len >= 128); \
+ stack[len++] = p; \
+ })
+
+ struct allowedips_node __rcu **stack[128], **nptr;
+ struct allowedips_node *node, *prev;
+ unsigned int len;
+
+ if (unlikely(!peer || !REF(*top)))
+ return;
+
+ for (prev = NULL, len = 0, PUSH(top); len > 0; prev = node) {
+ nptr = stack[len - 1];
+ node = DEREF(nptr);
+ if (!node) {
+ --len;
+ continue;
+ }
+ if (!prev || REF(prev->bit[0]) == node ||
+ REF(prev->bit[1]) == node) {
+ if (REF(node->bit[0]))
+ PUSH(&node->bit[0]);
+ else if (REF(node->bit[1]))
+ PUSH(&node->bit[1]);
+ } else if (REF(node->bit[0]) == prev) {
+ if (REF(node->bit[1]))
+ PUSH(&node->bit[1]);
+ } else {
+ if (rcu_dereference_protected(node->peer,
+ lockdep_is_held(lock)) == peer) {
+ RCU_INIT_POINTER(node->peer, NULL);
+ list_del_init(&node->peer_list);
+ if (!node->bit[0] || !node->bit[1]) {
+ rcu_assign_pointer(*nptr, DEREF(
+ &node->bit[!REF(node->bit[0])]));
+ kfree_rcu(node, rcu);
+ node = DEREF(nptr);
+ }
+ }
+ --len;
+ }
+ }
+
+#undef REF
+#undef DEREF
+#undef PUSH
+}
+
+static unsigned int fls128(u64 a, u64 b)
+{
+ return a ? fls64(a) + 64U : fls64(b);
+}
+
+static u8 common_bits(const struct allowedips_node *node, const u8 *key,
+ u8 bits)
+{
+ if (bits == 32)
+ return 32U - fls(*(const u32 *)node->bits ^ *(const u32 *)key);
+ else if (bits == 128)
+ return 128U - fls128(
+ *(const u64 *)&node->bits[0] ^ *(const u64 *)&key[0],
+ *(const u64 *)&node->bits[8] ^ *(const u64 *)&key[8]);
+ return 0;
+}
+
+static bool prefix_matches(const struct allowedips_node *node, const u8 *key,
+ u8 bits)
+{
+ /* This could be much faster if it actually just compared the common
+ * bits properly, by precomputing a mask bswap(~0 << (32 - cidr)), and
+ * the rest, but it turns out that common_bits is already super fast on
+ * modern processors, even taking into account the unfortunate bswap.
+ * So, we just inline it like this instead.
+ */
+ return common_bits(node, key, bits) >= node->cidr;
+}
+
+static struct allowedips_node *find_node(struct allowedips_node *trie, u8 bits,
+ const u8 *key)
+{
+ struct allowedips_node *node = trie, *found = NULL;
+
+ while (node && prefix_matches(node, key, bits)) {
+ if (rcu_access_pointer(node->peer))
+ found = node;
+ if (node->cidr == bits)
+ break;
+ node = rcu_dereference_bh(CHOOSE_NODE(node, key));
+ }
+ return found;
+}
+
+/* Returns a strong reference to a peer */
+static struct wg_peer *lookup(struct allowedips_node __rcu *root, u8 bits,
+ const void *be_ip)
+{
+ /* Aligned so it can be passed to fls/fls64 */
+ u8 ip[16] __aligned(__alignof(u64));
+ struct allowedips_node *node;
+ struct wg_peer *peer = NULL;
+
+ swap_endian(ip, be_ip, bits);
+
+ rcu_read_lock_bh();
+retry:
+ node = find_node(rcu_dereference_bh(root), bits, ip);
+ if (node) {
+ peer = wg_peer_get_maybe_zero(rcu_dereference_bh(node->peer));
+ if (!peer)
+ goto retry;
+ }
+ rcu_read_unlock_bh();
+ return peer;
+}
+
+static bool node_placement(struct allowedips_node __rcu *trie, const u8 *key,
+ u8 cidr, u8 bits, struct allowedips_node **rnode,
+ struct mutex *lock)
+{
+ struct allowedips_node *node = rcu_dereference_protected(trie,
+ lockdep_is_held(lock));
+ struct allowedips_node *parent = NULL;
+ bool exact = false;
+
+ while (node && node->cidr <= cidr && prefix_matches(node, key, bits)) {
+ parent = node;
+ if (parent->cidr == cidr) {
+ exact = true;
+ break;
+ }
+ node = rcu_dereference_protected(CHOOSE_NODE(parent, key),
+ lockdep_is_held(lock));
+ }
+ *rnode = parent;
+ return exact;
+}
+
+static int add(struct allowedips_node __rcu **trie, u8 bits, const u8 *key,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock)
+{
+ struct allowedips_node *node, *parent, *down, *newnode;
+
+ if (unlikely(cidr > bits || !peer))
+ return -EINVAL;
+
+ if (!rcu_access_pointer(*trie)) {
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (unlikely(!node))
+ return -ENOMEM;
+ RCU_INIT_POINTER(node->peer, peer);
+ list_add_tail(&node->peer_list, &peer->allowedips_list);
+ copy_and_assign_cidr(node, key, cidr, bits);
+ rcu_assign_pointer(*trie, node);
+ return 0;
+ }
+ if (node_placement(*trie, key, cidr, bits, &node, lock)) {
+ rcu_assign_pointer(node->peer, peer);
+ list_move_tail(&node->peer_list, &peer->allowedips_list);
+ return 0;
+ }
+
+ newnode = kzalloc(sizeof(*newnode), GFP_KERNEL);
+ if (unlikely(!newnode))
+ return -ENOMEM;
+ RCU_INIT_POINTER(newnode->peer, peer);
+ list_add_tail(&newnode->peer_list, &peer->allowedips_list);
+ copy_and_assign_cidr(newnode, key, cidr, bits);
+
+ if (!node) {
+ down = rcu_dereference_protected(*trie, lockdep_is_held(lock));
+ } else {
+ down = rcu_dereference_protected(CHOOSE_NODE(node, key),
+ lockdep_is_held(lock));
+ if (!down) {
+ rcu_assign_pointer(CHOOSE_NODE(node, key), newnode);
+ return 0;
+ }
+ }
+ cidr = min(cidr, common_bits(down, key, bits));
+ parent = node;
+
+ if (newnode->cidr == cidr) {
+ rcu_assign_pointer(CHOOSE_NODE(newnode, down->bits), down);
+ if (!parent)
+ rcu_assign_pointer(*trie, newnode);
+ else
+ rcu_assign_pointer(CHOOSE_NODE(parent, newnode->bits),
+ newnode);
+ } else {
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (unlikely(!node)) {
+ kfree(newnode);
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&node->peer_list);
+ copy_and_assign_cidr(node, newnode->bits, cidr, bits);
+
+ rcu_assign_pointer(CHOOSE_NODE(node, down->bits), down);
+ rcu_assign_pointer(CHOOSE_NODE(node, newnode->bits), newnode);
+ if (!parent)
+ rcu_assign_pointer(*trie, node);
+ else
+ rcu_assign_pointer(CHOOSE_NODE(parent, node->bits),
+ node);
+ }
+ return 0;
+}
+
+void wg_allowedips_init(struct allowedips *table)
+{
+ table->root4 = table->root6 = NULL;
+ table->seq = 1;
+}
+
+void wg_allowedips_free(struct allowedips *table, struct mutex *lock)
+{
+ struct allowedips_node __rcu *old4 = table->root4, *old6 = table->root6;
+
+ ++table->seq;
+ RCU_INIT_POINTER(table->root4, NULL);
+ RCU_INIT_POINTER(table->root6, NULL);
+ if (rcu_access_pointer(old4)) {
+ struct allowedips_node *node = rcu_dereference_protected(old4,
+ lockdep_is_held(lock));
+
+ root_remove_peer_lists(node);
+ call_rcu(&node->rcu, root_free_rcu);
+ }
+ if (rcu_access_pointer(old6)) {
+ struct allowedips_node *node = rcu_dereference_protected(old6,
+ lockdep_is_held(lock));
+
+ root_remove_peer_lists(node);
+ call_rcu(&node->rcu, root_free_rcu);
+ }
+}
+
+int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock)
+{
+ /* Aligned so it can be passed to fls */
+ u8 key[4] __aligned(__alignof(u32));
+
+ ++table->seq;
+ swap_endian(key, (const u8 *)ip, 32);
+ return add(&table->root4, 32, key, cidr, peer, lock);
+}
+
+int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock)
+{
+ /* Aligned so it can be passed to fls64 */
+ u8 key[16] __aligned(__alignof(u64));
+
+ ++table->seq;
+ swap_endian(key, (const u8 *)ip, 128);
+ return add(&table->root6, 128, key, cidr, peer, lock);
+}
+
+void wg_allowedips_remove_by_peer(struct allowedips *table,
+ struct wg_peer *peer, struct mutex *lock)
+{
+ ++table->seq;
+ walk_remove_by_peer(&table->root4, peer, lock);
+ walk_remove_by_peer(&table->root6, peer, lock);
+}
+
+int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr)
+{
+ const unsigned int cidr_bytes = DIV_ROUND_UP(node->cidr, 8U);
+ swap_endian(ip, node->bits, node->bitlen);
+ memset(ip + cidr_bytes, 0, node->bitlen / 8U - cidr_bytes);
+ if (node->cidr)
+ ip[cidr_bytes - 1U] &= ~0U << (-node->cidr % 8U);
+
+ *cidr = node->cidr;
+ return node->bitlen == 32 ? AF_INET : AF_INET6;
+}
+
+/* Returns a strong reference to a peer */
+struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
+ struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_IP))
+ return lookup(table->root4, 32, &ip_hdr(skb)->daddr);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return lookup(table->root6, 128, &ipv6_hdr(skb)->daddr);
+ return NULL;
+}
+
+/* Returns a strong reference to a peer */
+struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
+ struct sk_buff *skb)
+{
+ if (skb->protocol == htons(ETH_P_IP))
+ return lookup(table->root4, 32, &ip_hdr(skb)->saddr);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ return lookup(table->root6, 128, &ipv6_hdr(skb)->saddr);
+ return NULL;
+}
+
+#include "selftest/allowedips.c"
diff --git a/drivers/net/wireguard/allowedips.h b/drivers/net/wireguard/allowedips.h
new file mode 100644
index 000000000000..e5c83cafcef4
--- /dev/null
+++ b/drivers/net/wireguard/allowedips.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_ALLOWEDIPS_H
+#define _WG_ALLOWEDIPS_H
+
+#include <linux/mutex.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+struct wg_peer;
+
+struct allowedips_node {
+ struct wg_peer __rcu *peer;
+ struct allowedips_node __rcu *bit[2];
+ /* While it may seem scandalous that we waste space for v4,
+ * we're alloc'ing to the nearest power of 2 anyway, so this
+ * doesn't actually make a difference.
+ */
+ u8 bits[16] __aligned(__alignof(u64));
+ u8 cidr, bit_at_a, bit_at_b, bitlen;
+
+ /* Keep rarely used list at bottom to be beyond cache line. */
+ union {
+ struct list_head peer_list;
+ struct rcu_head rcu;
+ };
+};
+
+struct allowedips {
+ struct allowedips_node __rcu *root4;
+ struct allowedips_node __rcu *root6;
+ u64 seq;
+};
+
+void wg_allowedips_init(struct allowedips *table);
+void wg_allowedips_free(struct allowedips *table, struct mutex *mutex);
+int wg_allowedips_insert_v4(struct allowedips *table, const struct in_addr *ip,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock);
+int wg_allowedips_insert_v6(struct allowedips *table, const struct in6_addr *ip,
+ u8 cidr, struct wg_peer *peer, struct mutex *lock);
+void wg_allowedips_remove_by_peer(struct allowedips *table,
+ struct wg_peer *peer, struct mutex *lock);
+/* The ip input pointer should be __aligned(__alignof(u64))) */
+int wg_allowedips_read_node(struct allowedips_node *node, u8 ip[16], u8 *cidr);
+
+/* These return a strong reference to a peer: */
+struct wg_peer *wg_allowedips_lookup_dst(struct allowedips *table,
+ struct sk_buff *skb);
+struct wg_peer *wg_allowedips_lookup_src(struct allowedips *table,
+ struct sk_buff *skb);
+
+#ifdef DEBUG
+bool wg_allowedips_selftest(void);
+#endif
+
+#endif /* _WG_ALLOWEDIPS_H */
diff --git a/drivers/net/wireguard/cookie.c b/drivers/net/wireguard/cookie.c
new file mode 100644
index 000000000000..4956f0499c19
--- /dev/null
+++ b/drivers/net/wireguard/cookie.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "cookie.h"
+#include "peer.h"
+#include "device.h"
+#include "messages.h"
+#include "ratelimiter.h"
+#include "timers.h"
+
+#include <crypto/blake2s.h>
+#include <crypto/chacha20poly1305.h>
+
+#include <net/ipv6.h>
+#include <crypto/algapi.h>
+
+void wg_cookie_checker_init(struct cookie_checker *checker,
+ struct wg_device *wg)
+{
+ init_rwsem(&checker->secret_lock);
+ checker->secret_birthdate = ktime_get_coarse_boottime_ns();
+ get_random_bytes(checker->secret, NOISE_HASH_LEN);
+ checker->device = wg;
+}
+
+enum { COOKIE_KEY_LABEL_LEN = 8 };
+static const u8 mac1_key_label[COOKIE_KEY_LABEL_LEN] = "mac1----";
+static const u8 cookie_key_label[COOKIE_KEY_LABEL_LEN] = "cookie--";
+
+static void precompute_key(u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN],
+ const u8 label[COOKIE_KEY_LABEL_LEN])
+{
+ struct blake2s_state blake;
+
+ blake2s_init(&blake, NOISE_SYMMETRIC_KEY_LEN);
+ blake2s_update(&blake, label, COOKIE_KEY_LABEL_LEN);
+ blake2s_update(&blake, pubkey, NOISE_PUBLIC_KEY_LEN);
+ blake2s_final(&blake, key);
+}
+
+/* Must hold peer->handshake.static_identity->lock */
+void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker)
+{
+ if (likely(checker->device->static_identity.has_identity)) {
+ precompute_key(checker->cookie_encryption_key,
+ checker->device->static_identity.static_public,
+ cookie_key_label);
+ precompute_key(checker->message_mac1_key,
+ checker->device->static_identity.static_public,
+ mac1_key_label);
+ } else {
+ memset(checker->cookie_encryption_key, 0,
+ NOISE_SYMMETRIC_KEY_LEN);
+ memset(checker->message_mac1_key, 0, NOISE_SYMMETRIC_KEY_LEN);
+ }
+}
+
+void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer)
+{
+ precompute_key(peer->latest_cookie.cookie_decryption_key,
+ peer->handshake.remote_static, cookie_key_label);
+ precompute_key(peer->latest_cookie.message_mac1_key,
+ peer->handshake.remote_static, mac1_key_label);
+}
+
+void wg_cookie_init(struct cookie *cookie)
+{
+ memset(cookie, 0, sizeof(*cookie));
+ init_rwsem(&cookie->lock);
+}
+
+static void compute_mac1(u8 mac1[COOKIE_LEN], const void *message, size_t len,
+ const u8 key[NOISE_SYMMETRIC_KEY_LEN])
+{
+ len = len - sizeof(struct message_macs) +
+ offsetof(struct message_macs, mac1);
+ blake2s(mac1, message, key, COOKIE_LEN, len, NOISE_SYMMETRIC_KEY_LEN);
+}
+
+static void compute_mac2(u8 mac2[COOKIE_LEN], const void *message, size_t len,
+ const u8 cookie[COOKIE_LEN])
+{
+ len = len - sizeof(struct message_macs) +
+ offsetof(struct message_macs, mac2);
+ blake2s(mac2, message, cookie, COOKIE_LEN, len, COOKIE_LEN);
+}
+
+static void make_cookie(u8 cookie[COOKIE_LEN], struct sk_buff *skb,
+ struct cookie_checker *checker)
+{
+ struct blake2s_state state;
+
+ if (wg_birthdate_has_expired(checker->secret_birthdate,
+ COOKIE_SECRET_MAX_AGE)) {
+ down_write(&checker->secret_lock);
+ checker->secret_birthdate = ktime_get_coarse_boottime_ns();
+ get_random_bytes(checker->secret, NOISE_HASH_LEN);
+ up_write(&checker->secret_lock);
+ }
+
+ down_read(&checker->secret_lock);
+
+ blake2s_init_key(&state, COOKIE_LEN, checker->secret, NOISE_HASH_LEN);
+ if (skb->protocol == htons(ETH_P_IP))
+ blake2s_update(&state, (u8 *)&ip_hdr(skb)->saddr,
+ sizeof(struct in_addr));
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ blake2s_update(&state, (u8 *)&ipv6_hdr(skb)->saddr,
+ sizeof(struct in6_addr));
+ blake2s_update(&state, (u8 *)&udp_hdr(skb)->source, sizeof(__be16));
+ blake2s_final(&state, cookie);
+
+ up_read(&checker->secret_lock);
+}
+
+enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
+ struct sk_buff *skb,
+ bool check_cookie)
+{
+ struct message_macs *macs = (struct message_macs *)
+ (skb->data + skb->len - sizeof(*macs));
+ enum cookie_mac_state ret;
+ u8 computed_mac[COOKIE_LEN];
+ u8 cookie[COOKIE_LEN];
+
+ ret = INVALID_MAC;
+ compute_mac1(computed_mac, skb->data, skb->len,
+ checker->message_mac1_key);
+ if (crypto_memneq(computed_mac, macs->mac1, COOKIE_LEN))
+ goto out;
+
+ ret = VALID_MAC_BUT_NO_COOKIE;
+
+ if (!check_cookie)
+ goto out;
+
+ make_cookie(cookie, skb, checker);
+
+ compute_mac2(computed_mac, skb->data, skb->len, cookie);
+ if (crypto_memneq(computed_mac, macs->mac2, COOKIE_LEN))
+ goto out;
+
+ ret = VALID_MAC_WITH_COOKIE_BUT_RATELIMITED;
+ if (!wg_ratelimiter_allow(skb, dev_net(checker->device->dev)))
+ goto out;
+
+ ret = VALID_MAC_WITH_COOKIE;
+
+out:
+ return ret;
+}
+
+void wg_cookie_add_mac_to_packet(void *message, size_t len,
+ struct wg_peer *peer)
+{
+ struct message_macs *macs = (struct message_macs *)
+ ((u8 *)message + len - sizeof(*macs));
+
+ down_write(&peer->latest_cookie.lock);
+ compute_mac1(macs->mac1, message, len,
+ peer->latest_cookie.message_mac1_key);
+ memcpy(peer->latest_cookie.last_mac1_sent, macs->mac1, COOKIE_LEN);
+ peer->latest_cookie.have_sent_mac1 = true;
+ up_write(&peer->latest_cookie.lock);
+
+ down_read(&peer->latest_cookie.lock);
+ if (peer->latest_cookie.is_valid &&
+ !wg_birthdate_has_expired(peer->latest_cookie.birthdate,
+ COOKIE_SECRET_MAX_AGE - COOKIE_SECRET_LATENCY))
+ compute_mac2(macs->mac2, message, len,
+ peer->latest_cookie.cookie);
+ else
+ memset(macs->mac2, 0, COOKIE_LEN);
+ up_read(&peer->latest_cookie.lock);
+}
+
+void wg_cookie_message_create(struct message_handshake_cookie *dst,
+ struct sk_buff *skb, __le32 index,
+ struct cookie_checker *checker)
+{
+ struct message_macs *macs = (struct message_macs *)
+ ((u8 *)skb->data + skb->len - sizeof(*macs));
+ u8 cookie[COOKIE_LEN];
+
+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE);
+ dst->receiver_index = index;
+ get_random_bytes_wait(dst->nonce, COOKIE_NONCE_LEN);
+
+ make_cookie(cookie, skb, checker);
+ xchacha20poly1305_encrypt(dst->encrypted_cookie, cookie, COOKIE_LEN,
+ macs->mac1, COOKIE_LEN, dst->nonce,
+ checker->cookie_encryption_key);
+}
+
+void wg_cookie_message_consume(struct message_handshake_cookie *src,
+ struct wg_device *wg)
+{
+ struct wg_peer *peer = NULL;
+ u8 cookie[COOKIE_LEN];
+ bool ret;
+
+ if (unlikely(!wg_index_hashtable_lookup(wg->index_hashtable,
+ INDEX_HASHTABLE_HANDSHAKE |
+ INDEX_HASHTABLE_KEYPAIR,
+ src->receiver_index, &peer)))
+ return;
+
+ down_read(&peer->latest_cookie.lock);
+ if (unlikely(!peer->latest_cookie.have_sent_mac1)) {
+ up_read(&peer->latest_cookie.lock);
+ goto out;
+ }
+ ret = xchacha20poly1305_decrypt(
+ cookie, src->encrypted_cookie, sizeof(src->encrypted_cookie),
+ peer->latest_cookie.last_mac1_sent, COOKIE_LEN, src->nonce,
+ peer->latest_cookie.cookie_decryption_key);
+ up_read(&peer->latest_cookie.lock);
+
+ if (ret) {
+ down_write(&peer->latest_cookie.lock);
+ memcpy(peer->latest_cookie.cookie, cookie, COOKIE_LEN);
+ peer->latest_cookie.birthdate = ktime_get_coarse_boottime_ns();
+ peer->latest_cookie.is_valid = true;
+ peer->latest_cookie.have_sent_mac1 = false;
+ up_write(&peer->latest_cookie.lock);
+ } else {
+ net_dbg_ratelimited("%s: Could not decrypt invalid cookie response\n",
+ wg->dev->name);
+ }
+
+out:
+ wg_peer_put(peer);
+}
diff --git a/drivers/net/wireguard/cookie.h b/drivers/net/wireguard/cookie.h
new file mode 100644
index 000000000000..c4bd61ca03f2
--- /dev/null
+++ b/drivers/net/wireguard/cookie.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_COOKIE_H
+#define _WG_COOKIE_H
+
+#include "messages.h"
+#include <linux/rwsem.h>
+
+struct wg_peer;
+
+struct cookie_checker {
+ u8 secret[NOISE_HASH_LEN];
+ u8 cookie_encryption_key[NOISE_SYMMETRIC_KEY_LEN];
+ u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
+ u64 secret_birthdate;
+ struct rw_semaphore secret_lock;
+ struct wg_device *device;
+};
+
+struct cookie {
+ u64 birthdate;
+ bool is_valid;
+ u8 cookie[COOKIE_LEN];
+ bool have_sent_mac1;
+ u8 last_mac1_sent[COOKIE_LEN];
+ u8 cookie_decryption_key[NOISE_SYMMETRIC_KEY_LEN];
+ u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN];
+ struct rw_semaphore lock;
+};
+
+enum cookie_mac_state {
+ INVALID_MAC,
+ VALID_MAC_BUT_NO_COOKIE,
+ VALID_MAC_WITH_COOKIE_BUT_RATELIMITED,
+ VALID_MAC_WITH_COOKIE
+};
+
+void wg_cookie_checker_init(struct cookie_checker *checker,
+ struct wg_device *wg);
+void wg_cookie_checker_precompute_device_keys(struct cookie_checker *checker);
+void wg_cookie_checker_precompute_peer_keys(struct wg_peer *peer);
+void wg_cookie_init(struct cookie *cookie);
+
+enum cookie_mac_state wg_cookie_validate_packet(struct cookie_checker *checker,
+ struct sk_buff *skb,
+ bool check_cookie);
+void wg_cookie_add_mac_to_packet(void *message, size_t len,
+ struct wg_peer *peer);
+
+void wg_cookie_message_create(struct message_handshake_cookie *src,
+ struct sk_buff *skb, __le32 index,
+ struct cookie_checker *checker);
+void wg_cookie_message_consume(struct message_handshake_cookie *src,
+ struct wg_device *wg);
+
+#endif /* _WG_COOKIE_H */
diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
new file mode 100644
index 000000000000..16b19824b9ad
--- /dev/null
+++ b/drivers/net/wireguard/device.c
@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "socket.h"
+#include "timers.h"
+#include "device.h"
+#include "ratelimiter.h"
+#include "peer.h"
+#include "messages.h"
+
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/inetdevice.h>
+#include <linux/if_arp.h>
+#include <linux/icmp.h>
+#include <linux/suspend.h>
+#include <net/icmp.h>
+#include <net/rtnetlink.h>
+#include <net/ip_tunnels.h>
+#include <net/addrconf.h>
+
+static LIST_HEAD(device_list);
+
+static int wg_open(struct net_device *dev)
+{
+ struct in_device *dev_v4 = __in_dev_get_rtnl(dev);
+ struct inet6_dev *dev_v6 = __in6_dev_get(dev);
+ struct wg_device *wg = netdev_priv(dev);
+ struct wg_peer *peer;
+ int ret;
+
+ if (dev_v4) {
+ /* At some point we might put this check near the ip_rt_send_
+ * redirect call of ip_forward in net/ipv4/ip_forward.c, similar
+ * to the current secpath check.
+ */
+ IN_DEV_CONF_SET(dev_v4, SEND_REDIRECTS, false);
+ IPV4_DEVCONF_ALL(dev_net(dev), SEND_REDIRECTS) = false;
+ }
+ if (dev_v6)
+ dev_v6->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_NONE;
+
+ ret = wg_socket_init(wg, wg->incoming_port);
+ if (ret < 0)
+ return ret;
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
+ wg_packet_send_staged_packets(peer);
+ if (peer->persistent_keepalive_interval)
+ wg_packet_send_keepalive(peer);
+ }
+ mutex_unlock(&wg->device_update_lock);
+ return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int wg_pm_notification(struct notifier_block *nb, unsigned long action,
+ void *data)
+{
+ struct wg_device *wg;
+ struct wg_peer *peer;
+
+ /* If the machine is constantly suspending and resuming, as part of
+ * its normal operation rather than as a somewhat rare event, then we
+ * don't actually want to clear keys.
+ */
+ if (IS_ENABLED(CONFIG_PM_AUTOSLEEP) || IS_ENABLED(CONFIG_ANDROID))
+ return 0;
+
+ if (action != PM_HIBERNATION_PREPARE && action != PM_SUSPEND_PREPARE)
+ return 0;
+
+ rtnl_lock();
+ list_for_each_entry(wg, &device_list, device_list) {
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
+ del_timer(&peer->timer_zero_key_material);
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_keypairs_clear(&peer->keypairs);
+ }
+ mutex_unlock(&wg->device_update_lock);
+ }
+ rtnl_unlock();
+ rcu_barrier();
+ return 0;
+}
+
+static struct notifier_block pm_notifier = { .notifier_call = wg_pm_notification };
+#endif
+
+static int wg_stop(struct net_device *dev)
+{
+ struct wg_device *wg = netdev_priv(dev);
+ struct wg_peer *peer;
+
+ mutex_lock(&wg->device_update_lock);
+ list_for_each_entry(peer, &wg->peer_list, peer_list) {
+ wg_packet_purge_staged_packets(peer);
+ wg_timers_stop(peer);
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_keypairs_clear(&peer->keypairs);
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+ }
+ mutex_unlock(&wg->device_update_lock);
+ skb_queue_purge(&wg->incoming_handshakes);
+ wg_socket_reinit(wg, NULL, NULL);
+ return 0;
+}
+
+static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct wg_device *wg = netdev_priv(dev);
+ struct sk_buff_head packets;
+ struct wg_peer *peer;
+ struct sk_buff *next;
+ sa_family_t family;
+ u32 mtu;
+ int ret;
+
+ if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol)) {
+ ret = -EPROTONOSUPPORT;
+ net_dbg_ratelimited("%s: Invalid IP packet\n", dev->name);
+ goto err;
+ }
+
+ peer = wg_allowedips_lookup_dst(&wg->peer_allowedips, skb);
+ if (unlikely(!peer)) {
+ ret = -ENOKEY;
+ if (skb->protocol == htons(ETH_P_IP))
+ net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI4\n",
+ dev->name, &ip_hdr(skb)->daddr);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
+ dev->name, &ipv6_hdr(skb)->daddr);
+ goto err;
+ }
+
+ family = READ_ONCE(peer->endpoint.addr.sa_family);
+ if (unlikely(family != AF_INET && family != AF_INET6)) {
+ ret = -EDESTADDRREQ;
+ net_dbg_ratelimited("%s: No valid endpoint has been configured or discovered for peer %llu\n",
+ dev->name, peer->internal_id);
+ goto err_peer;
+ }
+
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+ __skb_queue_head_init(&packets);
+ if (!skb_is_gso(skb)) {
+ skb_mark_not_on_list(skb);
+ } else {
+ struct sk_buff *segs = skb_gso_segment(skb, 0);
+
+ if (unlikely(IS_ERR(segs))) {
+ ret = PTR_ERR(segs);
+ goto err_peer;
+ }
+ dev_kfree_skb(skb);
+ skb = segs;
+ }
+
+ skb_list_walk_safe(skb, skb, next) {
+ skb_mark_not_on_list(skb);
+
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ continue;
+
+ /* We only need to keep the original dst around for icmp,
+ * so at this point we're in a position to drop it.
+ */
+ skb_dst_drop(skb);
+
+ PACKET_CB(skb)->mtu = mtu;
+
+ __skb_queue_tail(&packets, skb);
+ }
+
+ spin_lock_bh(&peer->staged_packet_queue.lock);
+ /* If the queue is getting too big, we start removing the oldest packets
+ * until it's small again. We do this before adding the new packet, so
+ * we don't remove GSO segments that are in excess.
+ */
+ while (skb_queue_len(&peer->staged_packet_queue) > MAX_STAGED_PACKETS) {
+ dev_kfree_skb(__skb_dequeue(&peer->staged_packet_queue));
+ ++dev->stats.tx_dropped;
+ }
+ skb_queue_splice_tail(&packets, &peer->staged_packet_queue);
+ spin_unlock_bh(&peer->staged_packet_queue.lock);
+
+ wg_packet_send_staged_packets(peer);
+
+ wg_peer_put(peer);
+ return NETDEV_TX_OK;
+
+err_peer:
+ wg_peer_put(peer);
+err:
+ ++dev->stats.tx_errors;
+ if (skb->protocol == htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ else if (skb->protocol == htons(ETH_P_IPV6))
+ icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+ kfree_skb(skb);
+ return ret;
+}
+
+static const struct net_device_ops netdev_ops = {
+ .ndo_open = wg_open,
+ .ndo_stop = wg_stop,
+ .ndo_start_xmit = wg_xmit,
+ .ndo_get_stats64 = ip_tunnel_get_stats64
+};
+
+static void wg_destruct(struct net_device *dev)
+{
+ struct wg_device *wg = netdev_priv(dev);
+
+ rtnl_lock();
+ list_del(&wg->device_list);
+ rtnl_unlock();
+ mutex_lock(&wg->device_update_lock);
+ wg->incoming_port = 0;
+ wg_socket_reinit(wg, NULL, NULL);
+ /* The final references are cleared in the below calls to destroy_workqueue. */
+ wg_peer_remove_all(wg);
+ destroy_workqueue(wg->handshake_receive_wq);
+ destroy_workqueue(wg->handshake_send_wq);
+ destroy_workqueue(wg->packet_crypt_wq);
+ wg_packet_queue_free(&wg->decrypt_queue, true);
+ wg_packet_queue_free(&wg->encrypt_queue, true);
+ rcu_barrier(); /* Wait for all the peers to be actually freed. */
+ wg_ratelimiter_uninit();
+ memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
+ skb_queue_purge(&wg->incoming_handshakes);
+ free_percpu(dev->tstats);
+ free_percpu(wg->incoming_handshakes_worker);
+ if (wg->have_creating_net_ref)
+ put_net(wg->creating_net);
+ kvfree(wg->index_hashtable);
+ kvfree(wg->peer_hashtable);
+ mutex_unlock(&wg->device_update_lock);
+
+ pr_debug("%s: Interface deleted\n", dev->name);
+ free_netdev(dev);
+}
+
+static const struct device_type device_type = { .name = KBUILD_MODNAME };
+
+static void wg_setup(struct net_device *dev)
+{
+ struct wg_device *wg = netdev_priv(dev);
+ enum { WG_NETDEV_FEATURES = NETIF_F_HW_CSUM | NETIF_F_RXCSUM |
+ NETIF_F_SG | NETIF_F_GSO |
+ NETIF_F_GSO_SOFTWARE | NETIF_F_HIGHDMA };
+
+ dev->netdev_ops = &netdev_ops;
+ dev->hard_header_len = 0;
+ dev->addr_len = 0;
+ dev->needed_headroom = DATA_PACKET_HEAD_ROOM;
+ dev->needed_tailroom = noise_encrypted_len(MESSAGE_PADDING_MULTIPLE);
+ dev->type = ARPHRD_NONE;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ dev->priv_flags |= IFF_NO_QUEUE;
+ dev->features |= NETIF_F_LLTX;
+ dev->features |= WG_NETDEV_FEATURES;
+ dev->hw_features |= WG_NETDEV_FEATURES;
+ dev->hw_enc_features |= WG_NETDEV_FEATURES;
+ dev->mtu = ETH_DATA_LEN - MESSAGE_MINIMUM_LENGTH -
+ sizeof(struct udphdr) -
+ max(sizeof(struct ipv6hdr), sizeof(struct iphdr));
+
+ SET_NETDEV_DEVTYPE(dev, &device_type);
+
+ /* We need to keep the dst around in case of icmp replies. */
+ netif_keep_dst(dev);
+
+ memset(wg, 0, sizeof(*wg));
+ wg->dev = dev;
+}
+
+static int wg_newlink(struct net *src_net, struct net_device *dev,
+ struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct wg_device *wg = netdev_priv(dev);
+ int ret = -ENOMEM;
+
+ wg->creating_net = src_net;
+ init_rwsem(&wg->static_identity.lock);
+ mutex_init(&wg->socket_update_lock);
+ mutex_init(&wg->device_update_lock);
+ skb_queue_head_init(&wg->incoming_handshakes);
+ wg_allowedips_init(&wg->peer_allowedips);
+ wg_cookie_checker_init(&wg->cookie_checker, wg);
+ INIT_LIST_HEAD(&wg->peer_list);
+ wg->device_update_gen = 1;
+
+ wg->peer_hashtable = wg_pubkey_hashtable_alloc();
+ if (!wg->peer_hashtable)
+ return ret;
+
+ wg->index_hashtable = wg_index_hashtable_alloc();
+ if (!wg->index_hashtable)
+ goto err_free_peer_hashtable;
+
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ goto err_free_index_hashtable;
+
+ wg->incoming_handshakes_worker =
+ wg_packet_percpu_multicore_worker_alloc(
+ wg_packet_handshake_receive_worker, wg);
+ if (!wg->incoming_handshakes_worker)
+ goto err_free_tstats;
+
+ wg->handshake_receive_wq = alloc_workqueue("wg-kex-%s",
+ WQ_CPU_INTENSIVE | WQ_FREEZABLE, 0, dev->name);
+ if (!wg->handshake_receive_wq)
+ goto err_free_incoming_handshakes;
+
+ wg->handshake_send_wq = alloc_workqueue("wg-kex-%s",
+ WQ_UNBOUND | WQ_FREEZABLE, 0, dev->name);
+ if (!wg->handshake_send_wq)
+ goto err_destroy_handshake_receive;
+
+ wg->packet_crypt_wq = alloc_workqueue("wg-crypt-%s",
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0, dev->name);
+ if (!wg->packet_crypt_wq)
+ goto err_destroy_handshake_send;
+
+ ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
+ true, MAX_QUEUED_PACKETS);
+ if (ret < 0)
+ goto err_destroy_packet_crypt;
+
+ ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
+ true, MAX_QUEUED_PACKETS);
+ if (ret < 0)
+ goto err_free_encrypt_queue;
+
+ ret = wg_ratelimiter_init();
+ if (ret < 0)
+ goto err_free_decrypt_queue;
+
+ ret = register_netdevice(dev);
+ if (ret < 0)
+ goto err_uninit_ratelimiter;
+
+ list_add(&wg->device_list, &device_list);
+
+ /* We wait until the end to assign priv_destructor, so that
+ * register_netdevice doesn't call it for us if it fails.
+ */
+ dev->priv_destructor = wg_destruct;
+
+ pr_debug("%s: Interface created\n", dev->name);
+ return ret;
+
+err_uninit_ratelimiter:
+ wg_ratelimiter_uninit();
+err_free_decrypt_queue:
+ wg_packet_queue_free(&wg->decrypt_queue, true);
+err_free_encrypt_queue:
+ wg_packet_queue_free(&wg->encrypt_queue, true);
+err_destroy_packet_crypt:
+ destroy_workqueue(wg->packet_crypt_wq);
+err_destroy_handshake_send:
+ destroy_workqueue(wg->handshake_send_wq);
+err_destroy_handshake_receive:
+ destroy_workqueue(wg->handshake_receive_wq);
+err_free_incoming_handshakes:
+ free_percpu(wg->incoming_handshakes_worker);
+err_free_tstats:
+ free_percpu(dev->tstats);
+err_free_index_hashtable:
+ kvfree(wg->index_hashtable);
+err_free_peer_hashtable:
+ kvfree(wg->peer_hashtable);
+ return ret;
+}
+
+static struct rtnl_link_ops link_ops __read_mostly = {
+ .kind = KBUILD_MODNAME,
+ .priv_size = sizeof(struct wg_device),
+ .setup = wg_setup,
+ .newlink = wg_newlink,
+};
+
+static int wg_netdevice_notification(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct net_device *dev = ((struct netdev_notifier_info *)data)->dev;
+ struct wg_device *wg = netdev_priv(dev);
+
+ ASSERT_RTNL();
+
+ if (action != NETDEV_REGISTER || dev->netdev_ops != &netdev_ops)
+ return 0;
+
+ if (dev_net(dev) == wg->creating_net && wg->have_creating_net_ref) {
+ put_net(wg->creating_net);
+ wg->have_creating_net_ref = false;
+ } else if (dev_net(dev) != wg->creating_net &&
+ !wg->have_creating_net_ref) {
+ wg->have_creating_net_ref = true;
+ get_net(wg->creating_net);
+ }
+ return 0;
+}
+
+static struct notifier_block netdevice_notifier = {
+ .notifier_call = wg_netdevice_notification
+};
+
+int __init wg_device_init(void)
+{
+ int ret;
+
+#ifdef CONFIG_PM_SLEEP
+ ret = register_pm_notifier(&pm_notifier);
+ if (ret)
+ return ret;
+#endif
+
+ ret = register_netdevice_notifier(&netdevice_notifier);
+ if (ret)
+ goto error_pm;
+
+ ret = rtnl_link_register(&link_ops);
+ if (ret)
+ goto error_netdevice;
+
+ return 0;
+
+error_netdevice:
+ unregister_netdevice_notifier(&netdevice_notifier);
+error_pm:
+#ifdef CONFIG_PM_SLEEP
+ unregister_pm_notifier(&pm_notifier);
+#endif
+ return ret;
+}
+
+void wg_device_uninit(void)
+{
+ rtnl_link_unregister(&link_ops);
+ unregister_netdevice_notifier(&netdevice_notifier);
+#ifdef CONFIG_PM_SLEEP
+ unregister_pm_notifier(&pm_notifier);
+#endif
+ rcu_barrier();
+}
diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h
new file mode 100644
index 000000000000..b15a8be9d816
--- /dev/null
+++ b/drivers/net/wireguard/device.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_DEVICE_H
+#define _WG_DEVICE_H
+
+#include "noise.h"
+#include "allowedips.h"
+#include "peerlookup.h"
+#include "cookie.h"
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/net.h>
+#include <linux/ptr_ring.h>
+
+struct wg_device;
+
+struct multicore_worker {
+ void *ptr;
+ struct work_struct work;
+};
+
+struct crypt_queue {
+ struct ptr_ring ring;
+ union {
+ struct {
+ struct multicore_worker __percpu *worker;
+ int last_cpu;
+ };
+ struct work_struct work;
+ };
+};
+
+struct wg_device {
+ struct net_device *dev;
+ struct crypt_queue encrypt_queue, decrypt_queue;
+ struct sock __rcu *sock4, *sock6;
+ struct net *creating_net;
+ struct noise_static_identity static_identity;
+ struct workqueue_struct *handshake_receive_wq, *handshake_send_wq;
+ struct workqueue_struct *packet_crypt_wq;
+ struct sk_buff_head incoming_handshakes;
+ int incoming_handshake_cpu;
+ struct multicore_worker __percpu *incoming_handshakes_worker;
+ struct cookie_checker cookie_checker;
+ struct pubkey_hashtable *peer_hashtable;
+ struct index_hashtable *index_hashtable;
+ struct allowedips peer_allowedips;
+ struct mutex device_update_lock, socket_update_lock;
+ struct list_head device_list, peer_list;
+ unsigned int num_peers, device_update_gen;
+ u32 fwmark;
+ u16 incoming_port;
+ bool have_creating_net_ref;
+};
+
+int wg_device_init(void);
+void wg_device_uninit(void);
+
+#endif /* _WG_DEVICE_H */
diff --git a/drivers/net/wireguard/main.c b/drivers/net/wireguard/main.c
new file mode 100644
index 000000000000..7a7d5f1a80fc
--- /dev/null
+++ b/drivers/net/wireguard/main.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "version.h"
+#include "device.h"
+#include "noise.h"
+#include "queueing.h"
+#include "ratelimiter.h"
+#include "netlink.h"
+
+#include <uapi/linux/wireguard.h>
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/genetlink.h>
+#include <net/rtnetlink.h>
+
+static int __init mod_init(void)
+{
+ int ret;
+
+#ifdef DEBUG
+ if (!wg_allowedips_selftest() || !wg_packet_counter_selftest() ||
+ !wg_ratelimiter_selftest())
+ return -ENOTRECOVERABLE;
+#endif
+ wg_noise_init();
+
+ ret = wg_device_init();
+ if (ret < 0)
+ goto err_device;
+
+ ret = wg_genetlink_init();
+ if (ret < 0)
+ goto err_netlink;
+
+ pr_info("WireGuard " WIREGUARD_VERSION " loaded. See www.wireguard.com for information.\n");
+ pr_info("Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.\n");
+
+ return 0;
+
+err_netlink:
+ wg_device_uninit();
+err_device:
+ return ret;
+}
+
+static void __exit mod_exit(void)
+{
+ wg_genetlink_uninit();
+ wg_device_uninit();
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("WireGuard secure network tunnel");
+MODULE_AUTHOR("Jason A. Donenfeld <[email protected]>");
+MODULE_VERSION(WIREGUARD_VERSION);
+MODULE_ALIAS_RTNL_LINK(KBUILD_MODNAME);
+MODULE_ALIAS_GENL_FAMILY(WG_GENL_NAME);
diff --git a/drivers/net/wireguard/messages.h b/drivers/net/wireguard/messages.h
new file mode 100644
index 000000000000..b8a7b9ce32ba
--- /dev/null
+++ b/drivers/net/wireguard/messages.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_MESSAGES_H
+#define _WG_MESSAGES_H
+
+#include <crypto/curve25519.h>
+#include <crypto/chacha20poly1305.h>
+#include <crypto/blake2s.h>
+
+#include <linux/kernel.h>
+#include <linux/param.h>
+#include <linux/skbuff.h>
+
+enum noise_lengths {
+ NOISE_PUBLIC_KEY_LEN = CURVE25519_KEY_SIZE,
+ NOISE_SYMMETRIC_KEY_LEN = CHACHA20POLY1305_KEY_SIZE,
+ NOISE_TIMESTAMP_LEN = sizeof(u64) + sizeof(u32),
+ NOISE_AUTHTAG_LEN = CHACHA20POLY1305_AUTHTAG_SIZE,
+ NOISE_HASH_LEN = BLAKE2S_HASH_SIZE
+};
+
+#define noise_encrypted_len(plain_len) ((plain_len) + NOISE_AUTHTAG_LEN)
+
+enum cookie_values {
+ COOKIE_SECRET_MAX_AGE = 2 * 60,
+ COOKIE_SECRET_LATENCY = 5,
+ COOKIE_NONCE_LEN = XCHACHA20POLY1305_NONCE_SIZE,
+ COOKIE_LEN = 16
+};
+
+enum counter_values {
+ COUNTER_BITS_TOTAL = 2048,
+ COUNTER_REDUNDANT_BITS = BITS_PER_LONG,
+ COUNTER_WINDOW_SIZE = COUNTER_BITS_TOTAL - COUNTER_REDUNDANT_BITS
+};
+
+enum limits {
+ REKEY_AFTER_MESSAGES = 1ULL << 60,
+ REJECT_AFTER_MESSAGES = U64_MAX - COUNTER_WINDOW_SIZE - 1,
+ REKEY_TIMEOUT = 5,
+ REKEY_TIMEOUT_JITTER_MAX_JIFFIES = HZ / 3,
+ REKEY_AFTER_TIME = 120,
+ REJECT_AFTER_TIME = 180,
+ INITIATIONS_PER_SECOND = 50,
+ MAX_PEERS_PER_DEVICE = 1U << 20,
+ KEEPALIVE_TIMEOUT = 10,
+ MAX_TIMER_HANDSHAKES = 90 / REKEY_TIMEOUT,
+ MAX_QUEUED_INCOMING_HANDSHAKES = 4096, /* TODO: replace this with DQL */
+ MAX_STAGED_PACKETS = 128,
+ MAX_QUEUED_PACKETS = 1024 /* TODO: replace this with DQL */
+};
+
+enum message_type {
+ MESSAGE_INVALID = 0,
+ MESSAGE_HANDSHAKE_INITIATION = 1,
+ MESSAGE_HANDSHAKE_RESPONSE = 2,
+ MESSAGE_HANDSHAKE_COOKIE = 3,
+ MESSAGE_DATA = 4
+};
+
+struct message_header {
+ /* The actual layout of this that we want is:
+ * u8 type
+ * u8 reserved_zero[3]
+ *
+ * But it turns out that by encoding this as little endian,
+ * we achieve the same thing, and it makes checking faster.
+ */
+ __le32 type;
+};
+
+struct message_macs {
+ u8 mac1[COOKIE_LEN];
+ u8 mac2[COOKIE_LEN];
+};
+
+struct message_handshake_initiation {
+ struct message_header header;
+ __le32 sender_index;
+ u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
+ u8 encrypted_static[noise_encrypted_len(NOISE_PUBLIC_KEY_LEN)];
+ u8 encrypted_timestamp[noise_encrypted_len(NOISE_TIMESTAMP_LEN)];
+ struct message_macs macs;
+};
+
+struct message_handshake_response {
+ struct message_header header;
+ __le32 sender_index;
+ __le32 receiver_index;
+ u8 unencrypted_ephemeral[NOISE_PUBLIC_KEY_LEN];
+ u8 encrypted_nothing[noise_encrypted_len(0)];
+ struct message_macs macs;
+};
+
+struct message_handshake_cookie {
+ struct message_header header;
+ __le32 receiver_index;
+ u8 nonce[COOKIE_NONCE_LEN];
+ u8 encrypted_cookie[noise_encrypted_len(COOKIE_LEN)];
+};
+
+struct message_data {
+ struct message_header header;
+ __le32 key_idx;
+ __le64 counter;
+ u8 encrypted_data[];
+};
+
+#define message_data_len(plain_len) \
+ (noise_encrypted_len(plain_len) + sizeof(struct message_data))
+
+enum message_alignments {
+ MESSAGE_PADDING_MULTIPLE = 16,
+ MESSAGE_MINIMUM_LENGTH = message_data_len(0)
+};
+
+#define SKB_HEADER_LEN \
+ (max(sizeof(struct iphdr), sizeof(struct ipv6hdr)) + \
+ sizeof(struct udphdr) + NET_SKB_PAD)
+#define DATA_PACKET_HEAD_ROOM \
+ ALIGN(sizeof(struct message_data) + SKB_HEADER_LEN, 4)
+
+enum { HANDSHAKE_DSCP = 0x88 /* AF41, plus 00 ECN */ };
+
+#endif /* _WG_MESSAGES_H */
diff --git a/drivers/net/wireguard/netlink.c b/drivers/net/wireguard/netlink.c
new file mode 100644
index 000000000000..0fdbd1c45977
--- /dev/null
+++ b/drivers/net/wireguard/netlink.c
@@ -0,0 +1,642 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "netlink.h"
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "queueing.h"
+#include "messages.h"
+
+#include <uapi/linux/wireguard.h>
+
+#include <linux/if.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include <crypto/algapi.h>
+
+static struct genl_family genl_family;
+
+static const struct nla_policy device_policy[WGDEVICE_A_MAX + 1] = {
+ [WGDEVICE_A_IFINDEX] = { .type = NLA_U32 },
+ [WGDEVICE_A_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+ [WGDEVICE_A_PRIVATE_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
+ [WGDEVICE_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
+ [WGDEVICE_A_FLAGS] = { .type = NLA_U32 },
+ [WGDEVICE_A_LISTEN_PORT] = { .type = NLA_U16 },
+ [WGDEVICE_A_FWMARK] = { .type = NLA_U32 },
+ [WGDEVICE_A_PEERS] = { .type = NLA_NESTED }
+};
+
+static const struct nla_policy peer_policy[WGPEER_A_MAX + 1] = {
+ [WGPEER_A_PUBLIC_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_PUBLIC_KEY_LEN },
+ [WGPEER_A_PRESHARED_KEY] = { .type = NLA_EXACT_LEN, .len = NOISE_SYMMETRIC_KEY_LEN },
+ [WGPEER_A_FLAGS] = { .type = NLA_U32 },
+ [WGPEER_A_ENDPOINT] = { .type = NLA_MIN_LEN, .len = sizeof(struct sockaddr) },
+ [WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL] = { .type = NLA_U16 },
+ [WGPEER_A_LAST_HANDSHAKE_TIME] = { .type = NLA_EXACT_LEN, .len = sizeof(struct __kernel_timespec) },
+ [WGPEER_A_RX_BYTES] = { .type = NLA_U64 },
+ [WGPEER_A_TX_BYTES] = { .type = NLA_U64 },
+ [WGPEER_A_ALLOWEDIPS] = { .type = NLA_NESTED },
+ [WGPEER_A_PROTOCOL_VERSION] = { .type = NLA_U32 }
+};
+
+static const struct nla_policy allowedip_policy[WGALLOWEDIP_A_MAX + 1] = {
+ [WGALLOWEDIP_A_FAMILY] = { .type = NLA_U16 },
+ [WGALLOWEDIP_A_IPADDR] = { .type = NLA_MIN_LEN, .len = sizeof(struct in_addr) },
+ [WGALLOWEDIP_A_CIDR_MASK] = { .type = NLA_U8 }
+};
+
+static struct wg_device *lookup_interface(struct nlattr **attrs,
+ struct sk_buff *skb)
+{
+ struct net_device *dev = NULL;
+
+ if (!attrs[WGDEVICE_A_IFINDEX] == !attrs[WGDEVICE_A_IFNAME])
+ return ERR_PTR(-EBADR);
+ if (attrs[WGDEVICE_A_IFINDEX])
+ dev = dev_get_by_index(sock_net(skb->sk),
+ nla_get_u32(attrs[WGDEVICE_A_IFINDEX]));
+ else if (attrs[WGDEVICE_A_IFNAME])
+ dev = dev_get_by_name(sock_net(skb->sk),
+ nla_data(attrs[WGDEVICE_A_IFNAME]));
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+ if (!dev->rtnl_link_ops || !dev->rtnl_link_ops->kind ||
+ strcmp(dev->rtnl_link_ops->kind, KBUILD_MODNAME)) {
+ dev_put(dev);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ return netdev_priv(dev);
+}
+
+static int get_allowedips(struct sk_buff *skb, const u8 *ip, u8 cidr,
+ int family)
+{
+ struct nlattr *allowedip_nest;
+
+ allowedip_nest = nla_nest_start(skb, 0);
+ if (!allowedip_nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, WGALLOWEDIP_A_CIDR_MASK, cidr) ||
+ nla_put_u16(skb, WGALLOWEDIP_A_FAMILY, family) ||
+ nla_put(skb, WGALLOWEDIP_A_IPADDR, family == AF_INET6 ?
+ sizeof(struct in6_addr) : sizeof(struct in_addr), ip)) {
+ nla_nest_cancel(skb, allowedip_nest);
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, allowedip_nest);
+ return 0;
+}
+
+struct dump_ctx {
+ struct wg_device *wg;
+ struct wg_peer *next_peer;
+ u64 allowedips_seq;
+ struct allowedips_node *next_allowedip;
+};
+
+#define DUMP_CTX(cb) ((struct dump_ctx *)(cb)->args)
+
+static int
+get_peer(struct wg_peer *peer, struct sk_buff *skb, struct dump_ctx *ctx)
+{
+
+ struct nlattr *allowedips_nest, *peer_nest = nla_nest_start(skb, 0);
+ struct allowedips_node *allowedips_node = ctx->next_allowedip;
+ bool fail;
+
+ if (!peer_nest)
+ return -EMSGSIZE;
+
+ down_read(&peer->handshake.lock);
+ fail = nla_put(skb, WGPEER_A_PUBLIC_KEY, NOISE_PUBLIC_KEY_LEN,
+ peer->handshake.remote_static);
+ up_read(&peer->handshake.lock);
+ if (fail)
+ goto err;
+
+ if (!allowedips_node) {
+ const struct __kernel_timespec last_handshake = {
+ .tv_sec = peer->walltime_last_handshake.tv_sec,
+ .tv_nsec = peer->walltime_last_handshake.tv_nsec
+ };
+
+ down_read(&peer->handshake.lock);
+ fail = nla_put(skb, WGPEER_A_PRESHARED_KEY,
+ NOISE_SYMMETRIC_KEY_LEN,
+ peer->handshake.preshared_key);
+ up_read(&peer->handshake.lock);
+ if (fail)
+ goto err;
+
+ if (nla_put(skb, WGPEER_A_LAST_HANDSHAKE_TIME,
+ sizeof(last_handshake), &last_handshake) ||
+ nla_put_u16(skb, WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
+ peer->persistent_keepalive_interval) ||
+ nla_put_u64_64bit(skb, WGPEER_A_TX_BYTES, peer->tx_bytes,
+ WGPEER_A_UNSPEC) ||
+ nla_put_u64_64bit(skb, WGPEER_A_RX_BYTES, peer->rx_bytes,
+ WGPEER_A_UNSPEC) ||
+ nla_put_u32(skb, WGPEER_A_PROTOCOL_VERSION, 1))
+ goto err;
+
+ read_lock_bh(&peer->endpoint_lock);
+ if (peer->endpoint.addr.sa_family == AF_INET)
+ fail = nla_put(skb, WGPEER_A_ENDPOINT,
+ sizeof(peer->endpoint.addr4),
+ &peer->endpoint.addr4);
+ else if (peer->endpoint.addr.sa_family == AF_INET6)
+ fail = nla_put(skb, WGPEER_A_ENDPOINT,
+ sizeof(peer->endpoint.addr6),
+ &peer->endpoint.addr6);
+ read_unlock_bh(&peer->endpoint_lock);
+ if (fail)
+ goto err;
+ allowedips_node =
+ list_first_entry_or_null(&peer->allowedips_list,
+ struct allowedips_node, peer_list);
+ }
+ if (!allowedips_node)
+ goto no_allowedips;
+ if (!ctx->allowedips_seq)
+ ctx->allowedips_seq = peer->device->peer_allowedips.seq;
+ else if (ctx->allowedips_seq != peer->device->peer_allowedips.seq)
+ goto no_allowedips;
+
+ allowedips_nest = nla_nest_start(skb, WGPEER_A_ALLOWEDIPS);
+ if (!allowedips_nest)
+ goto err;
+
+ list_for_each_entry_from(allowedips_node, &peer->allowedips_list,
+ peer_list) {
+ u8 cidr, ip[16] __aligned(__alignof(u64));
+ int family;
+
+ family = wg_allowedips_read_node(allowedips_node, ip, &cidr);
+ if (get_allowedips(skb, ip, cidr, family)) {
+ nla_nest_end(skb, allowedips_nest);
+ nla_nest_end(skb, peer_nest);
+ ctx->next_allowedip = allowedips_node;
+ return -EMSGSIZE;
+ }
+ }
+ nla_nest_end(skb, allowedips_nest);
+no_allowedips:
+ nla_nest_end(skb, peer_nest);
+ ctx->next_allowedip = NULL;
+ ctx->allowedips_seq = 0;
+ return 0;
+err:
+ nla_nest_cancel(skb, peer_nest);
+ return -EMSGSIZE;
+}
+
+static int wg_get_device_start(struct netlink_callback *cb)
+{
+ struct wg_device *wg;
+
+ wg = lookup_interface(genl_dumpit_info(cb)->attrs, cb->skb);
+ if (IS_ERR(wg))
+ return PTR_ERR(wg);
+ DUMP_CTX(cb)->wg = wg;
+ return 0;
+}
+
+static int wg_get_device_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct wg_peer *peer, *next_peer_cursor;
+ struct dump_ctx *ctx = DUMP_CTX(cb);
+ struct wg_device *wg = ctx->wg;
+ struct nlattr *peers_nest;
+ int ret = -EMSGSIZE;
+ bool done = true;
+ void *hdr;
+
+ rtnl_lock();
+ mutex_lock(&wg->device_update_lock);
+ cb->seq = wg->device_update_gen;
+ next_peer_cursor = ctx->next_peer;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &genl_family, NLM_F_MULTI, WG_CMD_GET_DEVICE);
+ if (!hdr)
+ goto out;
+ genl_dump_check_consistent(cb, hdr);
+
+ if (!ctx->next_peer) {
+ if (nla_put_u16(skb, WGDEVICE_A_LISTEN_PORT,
+ wg->incoming_port) ||
+ nla_put_u32(skb, WGDEVICE_A_FWMARK, wg->fwmark) ||
+ nla_put_u32(skb, WGDEVICE_A_IFINDEX, wg->dev->ifindex) ||
+ nla_put_string(skb, WGDEVICE_A_IFNAME, wg->dev->name))
+ goto out;
+
+ down_read(&wg->static_identity.lock);
+ if (wg->static_identity.has_identity) {
+ if (nla_put(skb, WGDEVICE_A_PRIVATE_KEY,
+ NOISE_PUBLIC_KEY_LEN,
+ wg->static_identity.static_private) ||
+ nla_put(skb, WGDEVICE_A_PUBLIC_KEY,
+ NOISE_PUBLIC_KEY_LEN,
+ wg->static_identity.static_public)) {
+ up_read(&wg->static_identity.lock);
+ goto out;
+ }
+ }
+ up_read(&wg->static_identity.lock);
+ }
+
+ peers_nest = nla_nest_start(skb, WGDEVICE_A_PEERS);
+ if (!peers_nest)
+ goto out;
+ ret = 0;
+ /* If the last cursor was removed via list_del_init in peer_remove, then
+ * we just treat this the same as there being no more peers left. The
+ * reason is that seq_nr should indicate to userspace that this isn't a
+ * coherent dump anyway, so they'll try again.
+ */
+ if (list_empty(&wg->peer_list) ||
+ (ctx->next_peer && list_empty(&ctx->next_peer->peer_list))) {
+ nla_nest_cancel(skb, peers_nest);
+ goto out;
+ }
+ lockdep_assert_held(&wg->device_update_lock);
+ peer = list_prepare_entry(ctx->next_peer, &wg->peer_list, peer_list);
+ list_for_each_entry_continue(peer, &wg->peer_list, peer_list) {
+ if (get_peer(peer, skb, ctx)) {
+ done = false;
+ break;
+ }
+ next_peer_cursor = peer;
+ }
+ nla_nest_end(skb, peers_nest);
+
+out:
+ if (!ret && !done && next_peer_cursor)
+ wg_peer_get(next_peer_cursor);
+ wg_peer_put(ctx->next_peer);
+ mutex_unlock(&wg->device_update_lock);
+ rtnl_unlock();
+
+ if (ret) {
+ genlmsg_cancel(skb, hdr);
+ return ret;
+ }
+ genlmsg_end(skb, hdr);
+ if (done) {
+ ctx->next_peer = NULL;
+ return 0;
+ }
+ ctx->next_peer = next_peer_cursor;
+ return skb->len;
+
+ /* At this point, we can't really deal ourselves with safely zeroing out
+ * the private key material after usage. This will need an additional API
+ * in the kernel for marking skbs as zero_on_free.
+ */
+}
+
+static int wg_get_device_done(struct netlink_callback *cb)
+{
+ struct dump_ctx *ctx = DUMP_CTX(cb);
+
+ if (ctx->wg)
+ dev_put(ctx->wg->dev);
+ wg_peer_put(ctx->next_peer);
+ return 0;
+}
+
+static int set_port(struct wg_device *wg, u16 port)
+{
+ struct wg_peer *peer;
+
+ if (wg->incoming_port == port)
+ return 0;
+ list_for_each_entry(peer, &wg->peer_list, peer_list)
+ wg_socket_clear_peer_endpoint_src(peer);
+ if (!netif_running(wg->dev)) {
+ wg->incoming_port = port;
+ return 0;
+ }
+ return wg_socket_init(wg, port);
+}
+
+static int set_allowedip(struct wg_peer *peer, struct nlattr **attrs)
+{
+ int ret = -EINVAL;
+ u16 family;
+ u8 cidr;
+
+ if (!attrs[WGALLOWEDIP_A_FAMILY] || !attrs[WGALLOWEDIP_A_IPADDR] ||
+ !attrs[WGALLOWEDIP_A_CIDR_MASK])
+ return ret;
+ family = nla_get_u16(attrs[WGALLOWEDIP_A_FAMILY]);
+ cidr = nla_get_u8(attrs[WGALLOWEDIP_A_CIDR_MASK]);
+
+ if (family == AF_INET && cidr <= 32 &&
+ nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in_addr))
+ ret = wg_allowedips_insert_v4(
+ &peer->device->peer_allowedips,
+ nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
+ &peer->device->device_update_lock);
+ else if (family == AF_INET6 && cidr <= 128 &&
+ nla_len(attrs[WGALLOWEDIP_A_IPADDR]) == sizeof(struct in6_addr))
+ ret = wg_allowedips_insert_v6(
+ &peer->device->peer_allowedips,
+ nla_data(attrs[WGALLOWEDIP_A_IPADDR]), cidr, peer,
+ &peer->device->device_update_lock);
+
+ return ret;
+}
+
+static int set_peer(struct wg_device *wg, struct nlattr **attrs)
+{
+ u8 *public_key = NULL, *preshared_key = NULL;
+ struct wg_peer *peer = NULL;
+ u32 flags = 0;
+ int ret;
+
+ ret = -EINVAL;
+ if (attrs[WGPEER_A_PUBLIC_KEY] &&
+ nla_len(attrs[WGPEER_A_PUBLIC_KEY]) == NOISE_PUBLIC_KEY_LEN)
+ public_key = nla_data(attrs[WGPEER_A_PUBLIC_KEY]);
+ else
+ goto out;
+ if (attrs[WGPEER_A_PRESHARED_KEY] &&
+ nla_len(attrs[WGPEER_A_PRESHARED_KEY]) == NOISE_SYMMETRIC_KEY_LEN)
+ preshared_key = nla_data(attrs[WGPEER_A_PRESHARED_KEY]);
+
+ if (attrs[WGPEER_A_FLAGS])
+ flags = nla_get_u32(attrs[WGPEER_A_FLAGS]);
+ ret = -EOPNOTSUPP;
+ if (flags & ~__WGPEER_F_ALL)
+ goto out;
+
+ ret = -EPFNOSUPPORT;
+ if (attrs[WGPEER_A_PROTOCOL_VERSION]) {
+ if (nla_get_u32(attrs[WGPEER_A_PROTOCOL_VERSION]) != 1)
+ goto out;
+ }
+
+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
+ nla_data(attrs[WGPEER_A_PUBLIC_KEY]));
+ ret = 0;
+ if (!peer) { /* Peer doesn't exist yet. Add a new one. */
+ if (flags & (WGPEER_F_REMOVE_ME | WGPEER_F_UPDATE_ONLY))
+ goto out;
+
+ /* The peer is new, so there aren't allowed IPs to remove. */
+ flags &= ~WGPEER_F_REPLACE_ALLOWEDIPS;
+
+ down_read(&wg->static_identity.lock);
+ if (wg->static_identity.has_identity &&
+ !memcmp(nla_data(attrs[WGPEER_A_PUBLIC_KEY]),
+ wg->static_identity.static_public,
+ NOISE_PUBLIC_KEY_LEN)) {
+ /* We silently ignore peers that have the same public
+ * key as the device. The reason we do it silently is
+ * that we'd like for people to be able to reuse the
+ * same set of API calls across peers.
+ */
+ up_read(&wg->static_identity.lock);
+ ret = 0;
+ goto out;
+ }
+ up_read(&wg->static_identity.lock);
+
+ peer = wg_peer_create(wg, public_key, preshared_key);
+ if (IS_ERR(peer)) {
+ /* Similar to the above, if the key is invalid, we skip
+ * it without fanfare, so that services don't need to
+ * worry about doing key validation themselves.
+ */
+ ret = PTR_ERR(peer) == -EKEYREJECTED ? 0 : PTR_ERR(peer);
+ peer = NULL;
+ goto out;
+ }
+ /* Take additional reference, as though we've just been
+ * looked up.
+ */
+ wg_peer_get(peer);
+ }
+
+ if (flags & WGPEER_F_REMOVE_ME) {
+ wg_peer_remove(peer);
+ goto out;
+ }
+
+ if (preshared_key) {
+ down_write(&peer->handshake.lock);
+ memcpy(&peer->handshake.preshared_key, preshared_key,
+ NOISE_SYMMETRIC_KEY_LEN);
+ up_write(&peer->handshake.lock);
+ }
+
+ if (attrs[WGPEER_A_ENDPOINT]) {
+ struct sockaddr *addr = nla_data(attrs[WGPEER_A_ENDPOINT]);
+ size_t len = nla_len(attrs[WGPEER_A_ENDPOINT]);
+
+ if ((len == sizeof(struct sockaddr_in) &&
+ addr->sa_family == AF_INET) ||
+ (len == sizeof(struct sockaddr_in6) &&
+ addr->sa_family == AF_INET6)) {
+ struct endpoint endpoint = { { { 0 } } };
+
+ memcpy(&endpoint.addr, addr, len);
+ wg_socket_set_peer_endpoint(peer, &endpoint);
+ }
+ }
+
+ if (flags & WGPEER_F_REPLACE_ALLOWEDIPS)
+ wg_allowedips_remove_by_peer(&wg->peer_allowedips, peer,
+ &wg->device_update_lock);
+
+ if (attrs[WGPEER_A_ALLOWEDIPS]) {
+ struct nlattr *attr, *allowedip[WGALLOWEDIP_A_MAX + 1];
+ int rem;
+
+ nla_for_each_nested(attr, attrs[WGPEER_A_ALLOWEDIPS], rem) {
+ ret = nla_parse_nested(allowedip, WGALLOWEDIP_A_MAX,
+ attr, allowedip_policy, NULL);
+ if (ret < 0)
+ goto out;
+ ret = set_allowedip(peer, allowedip);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ if (attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]) {
+ const u16 persistent_keepalive_interval = nla_get_u16(
+ attrs[WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL]);
+ const bool send_keepalive =
+ !peer->persistent_keepalive_interval &&
+ persistent_keepalive_interval &&
+ netif_running(wg->dev);
+
+ peer->persistent_keepalive_interval = persistent_keepalive_interval;
+ if (send_keepalive)
+ wg_packet_send_keepalive(peer);
+ }
+
+ if (netif_running(wg->dev))
+ wg_packet_send_staged_packets(peer);
+
+out:
+ wg_peer_put(peer);
+ if (attrs[WGPEER_A_PRESHARED_KEY])
+ memzero_explicit(nla_data(attrs[WGPEER_A_PRESHARED_KEY]),
+ nla_len(attrs[WGPEER_A_PRESHARED_KEY]));
+ return ret;
+}
+
+static int wg_set_device(struct sk_buff *skb, struct genl_info *info)
+{
+ struct wg_device *wg = lookup_interface(info->attrs, skb);
+ u32 flags = 0;
+ int ret;
+
+ if (IS_ERR(wg)) {
+ ret = PTR_ERR(wg);
+ goto out_nodev;
+ }
+
+ rtnl_lock();
+ mutex_lock(&wg->device_update_lock);
+
+ if (info->attrs[WGDEVICE_A_FLAGS])
+ flags = nla_get_u32(info->attrs[WGDEVICE_A_FLAGS]);
+ ret = -EOPNOTSUPP;
+ if (flags & ~__WGDEVICE_F_ALL)
+ goto out;
+
+ ret = -EPERM;
+ if ((info->attrs[WGDEVICE_A_LISTEN_PORT] ||
+ info->attrs[WGDEVICE_A_FWMARK]) &&
+ !ns_capable(wg->creating_net->user_ns, CAP_NET_ADMIN))
+ goto out;
+
+ ++wg->device_update_gen;
+
+ if (info->attrs[WGDEVICE_A_FWMARK]) {
+ struct wg_peer *peer;
+
+ wg->fwmark = nla_get_u32(info->attrs[WGDEVICE_A_FWMARK]);
+ list_for_each_entry(peer, &wg->peer_list, peer_list)
+ wg_socket_clear_peer_endpoint_src(peer);
+ }
+
+ if (info->attrs[WGDEVICE_A_LISTEN_PORT]) {
+ ret = set_port(wg,
+ nla_get_u16(info->attrs[WGDEVICE_A_LISTEN_PORT]));
+ if (ret)
+ goto out;
+ }
+
+ if (flags & WGDEVICE_F_REPLACE_PEERS)
+ wg_peer_remove_all(wg);
+
+ if (info->attrs[WGDEVICE_A_PRIVATE_KEY] &&
+ nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]) ==
+ NOISE_PUBLIC_KEY_LEN) {
+ u8 *private_key = nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]);
+ u8 public_key[NOISE_PUBLIC_KEY_LEN];
+ struct wg_peer *peer, *temp;
+
+ if (!crypto_memneq(wg->static_identity.static_private,
+ private_key, NOISE_PUBLIC_KEY_LEN))
+ goto skip_set_private_key;
+
+ /* We remove before setting, to prevent race, which means doing
+ * two 25519-genpub ops.
+ */
+ if (curve25519_generate_public(public_key, private_key)) {
+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable,
+ public_key);
+ if (peer) {
+ wg_peer_put(peer);
+ wg_peer_remove(peer);
+ }
+ }
+
+ down_write(&wg->static_identity.lock);
+ wg_noise_set_static_identity_private_key(&wg->static_identity,
+ private_key);
+ list_for_each_entry_safe(peer, temp, &wg->peer_list,
+ peer_list) {
+ if (wg_noise_precompute_static_static(peer))
+ wg_noise_expire_current_peer_keypairs(peer);
+ else
+ wg_peer_remove(peer);
+ }
+ wg_cookie_checker_precompute_device_keys(&wg->cookie_checker);
+ up_write(&wg->static_identity.lock);
+ }
+skip_set_private_key:
+
+ if (info->attrs[WGDEVICE_A_PEERS]) {
+ struct nlattr *attr, *peer[WGPEER_A_MAX + 1];
+ int rem;
+
+ nla_for_each_nested(attr, info->attrs[WGDEVICE_A_PEERS], rem) {
+ ret = nla_parse_nested(peer, WGPEER_A_MAX, attr,
+ peer_policy, NULL);
+ if (ret < 0)
+ goto out;
+ ret = set_peer(wg, peer);
+ if (ret < 0)
+ goto out;
+ }
+ }
+ ret = 0;
+
+out:
+ mutex_unlock(&wg->device_update_lock);
+ rtnl_unlock();
+ dev_put(wg->dev);
+out_nodev:
+ if (info->attrs[WGDEVICE_A_PRIVATE_KEY])
+ memzero_explicit(nla_data(info->attrs[WGDEVICE_A_PRIVATE_KEY]),
+ nla_len(info->attrs[WGDEVICE_A_PRIVATE_KEY]));
+ return ret;
+}
+
+static const struct genl_ops genl_ops[] = {
+ {
+ .cmd = WG_CMD_GET_DEVICE,
+ .start = wg_get_device_start,
+ .dumpit = wg_get_device_dump,
+ .done = wg_get_device_done,
+ .flags = GENL_UNS_ADMIN_PERM
+ }, {
+ .cmd = WG_CMD_SET_DEVICE,
+ .doit = wg_set_device,
+ .flags = GENL_UNS_ADMIN_PERM
+ }
+};
+
+static struct genl_family genl_family __ro_after_init = {
+ .ops = genl_ops,
+ .n_ops = ARRAY_SIZE(genl_ops),
+ .name = WG_GENL_NAME,
+ .version = WG_GENL_VERSION,
+ .maxattr = WGDEVICE_A_MAX,
+ .module = THIS_MODULE,
+ .policy = device_policy,
+ .netnsok = true
+};
+
+int __init wg_genetlink_init(void)
+{
+ return genl_register_family(&genl_family);
+}
+
+void __exit wg_genetlink_uninit(void)
+{
+ genl_unregister_family(&genl_family);
+}
diff --git a/drivers/net/wireguard/netlink.h b/drivers/net/wireguard/netlink.h
new file mode 100644
index 000000000000..15100d92e2e3
--- /dev/null
+++ b/drivers/net/wireguard/netlink.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_NETLINK_H
+#define _WG_NETLINK_H
+
+int wg_genetlink_init(void);
+void wg_genetlink_uninit(void);
+
+#endif /* _WG_NETLINK_H */
diff --git a/drivers/net/wireguard/noise.c b/drivers/net/wireguard/noise.c
new file mode 100644
index 000000000000..d71c8db68a8c
--- /dev/null
+++ b/drivers/net/wireguard/noise.c
@@ -0,0 +1,828 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "noise.h"
+#include "device.h"
+#include "peer.h"
+#include "messages.h"
+#include "queueing.h"
+#include "peerlookup.h"
+
+#include <linux/rcupdate.h>
+#include <linux/slab.h>
+#include <linux/bitmap.h>
+#include <linux/scatterlist.h>
+#include <linux/highmem.h>
+#include <crypto/algapi.h>
+
+/* This implements Noise_IKpsk2:
+ *
+ * <- s
+ * ******
+ * -> e, es, s, ss, {t}
+ * <- e, ee, se, psk, {}
+ */
+
+static const u8 handshake_name[37] = "Noise_IKpsk2_25519_ChaChaPoly_BLAKE2s";
+static const u8 identifier_name[34] = "WireGuard v1 zx2c4 [email protected]";
+static u8 handshake_init_hash[NOISE_HASH_LEN] __ro_after_init;
+static u8 handshake_init_chaining_key[NOISE_HASH_LEN] __ro_after_init;
+static atomic64_t keypair_counter = ATOMIC64_INIT(0);
+
+void __init wg_noise_init(void)
+{
+ struct blake2s_state blake;
+
+ blake2s(handshake_init_chaining_key, handshake_name, NULL,
+ NOISE_HASH_LEN, sizeof(handshake_name), 0);
+ blake2s_init(&blake, NOISE_HASH_LEN);
+ blake2s_update(&blake, handshake_init_chaining_key, NOISE_HASH_LEN);
+ blake2s_update(&blake, identifier_name, sizeof(identifier_name));
+ blake2s_final(&blake, handshake_init_hash);
+}
+
+/* Must hold peer->handshake.static_identity->lock */
+bool wg_noise_precompute_static_static(struct wg_peer *peer)
+{
+ bool ret = true;
+
+ down_write(&peer->handshake.lock);
+ if (peer->handshake.static_identity->has_identity)
+ ret = curve25519(
+ peer->handshake.precomputed_static_static,
+ peer->handshake.static_identity->static_private,
+ peer->handshake.remote_static);
+ else
+ memset(peer->handshake.precomputed_static_static, 0,
+ NOISE_PUBLIC_KEY_LEN);
+ up_write(&peer->handshake.lock);
+ return ret;
+}
+
+bool wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer)
+{
+ memset(handshake, 0, sizeof(*handshake));
+ init_rwsem(&handshake->lock);
+ handshake->entry.type = INDEX_HASHTABLE_HANDSHAKE;
+ handshake->entry.peer = peer;
+ memcpy(handshake->remote_static, peer_public_key, NOISE_PUBLIC_KEY_LEN);
+ if (peer_preshared_key)
+ memcpy(handshake->preshared_key, peer_preshared_key,
+ NOISE_SYMMETRIC_KEY_LEN);
+ handshake->static_identity = static_identity;
+ handshake->state = HANDSHAKE_ZEROED;
+ return wg_noise_precompute_static_static(peer);
+}
+
+static void handshake_zero(struct noise_handshake *handshake)
+{
+ memset(&handshake->ephemeral_private, 0, NOISE_PUBLIC_KEY_LEN);
+ memset(&handshake->remote_ephemeral, 0, NOISE_PUBLIC_KEY_LEN);
+ memset(&handshake->hash, 0, NOISE_HASH_LEN);
+ memset(&handshake->chaining_key, 0, NOISE_HASH_LEN);
+ handshake->remote_index = 0;
+ handshake->state = HANDSHAKE_ZEROED;
+}
+
+void wg_noise_handshake_clear(struct noise_handshake *handshake)
+{
+ wg_index_hashtable_remove(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry);
+ down_write(&handshake->lock);
+ handshake_zero(handshake);
+ up_write(&handshake->lock);
+ wg_index_hashtable_remove(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry);
+}
+
+static struct noise_keypair *keypair_create(struct wg_peer *peer)
+{
+ struct noise_keypair *keypair = kzalloc(sizeof(*keypair), GFP_KERNEL);
+
+ if (unlikely(!keypair))
+ return NULL;
+ keypair->internal_id = atomic64_inc_return(&keypair_counter);
+ keypair->entry.type = INDEX_HASHTABLE_KEYPAIR;
+ keypair->entry.peer = peer;
+ kref_init(&keypair->refcount);
+ return keypair;
+}
+
+static void keypair_free_rcu(struct rcu_head *rcu)
+{
+ kzfree(container_of(rcu, struct noise_keypair, rcu));
+}
+
+static void keypair_free_kref(struct kref *kref)
+{
+ struct noise_keypair *keypair =
+ container_of(kref, struct noise_keypair, refcount);
+
+ net_dbg_ratelimited("%s: Keypair %llu destroyed for peer %llu\n",
+ keypair->entry.peer->device->dev->name,
+ keypair->internal_id,
+ keypair->entry.peer->internal_id);
+ wg_index_hashtable_remove(keypair->entry.peer->device->index_hashtable,
+ &keypair->entry);
+ call_rcu(&keypair->rcu, keypair_free_rcu);
+}
+
+void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now)
+{
+ if (unlikely(!keypair))
+ return;
+ if (unlikely(unreference_now))
+ wg_index_hashtable_remove(
+ keypair->entry.peer->device->index_hashtable,
+ &keypair->entry);
+ kref_put(&keypair->refcount, keypair_free_kref);
+}
+
+struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
+ "Taking noise keypair reference without holding the RCU BH read lock");
+ if (unlikely(!keypair || !kref_get_unless_zero(&keypair->refcount)))
+ return NULL;
+ return keypair;
+}
+
+void wg_noise_keypairs_clear(struct noise_keypairs *keypairs)
+{
+ struct noise_keypair *old;
+
+ spin_lock_bh(&keypairs->keypair_update_lock);
+
+ /* We zero the next_keypair before zeroing the others, so that
+ * wg_noise_received_with_keypair returns early before subsequent ones
+ * are zeroed.
+ */
+ old = rcu_dereference_protected(keypairs->next_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
+ wg_noise_keypair_put(old, true);
+
+ old = rcu_dereference_protected(keypairs->previous_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
+ wg_noise_keypair_put(old, true);
+
+ old = rcu_dereference_protected(keypairs->current_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ RCU_INIT_POINTER(keypairs->current_keypair, NULL);
+ wg_noise_keypair_put(old, true);
+
+ spin_unlock_bh(&keypairs->keypair_update_lock);
+}
+
+void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer)
+{
+ struct noise_keypair *keypair;
+
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+
+ spin_lock_bh(&peer->keypairs.keypair_update_lock);
+ keypair = rcu_dereference_protected(peer->keypairs.next_keypair,
+ lockdep_is_held(&peer->keypairs.keypair_update_lock));
+ if (keypair)
+ keypair->sending.is_valid = false;
+ keypair = rcu_dereference_protected(peer->keypairs.current_keypair,
+ lockdep_is_held(&peer->keypairs.keypair_update_lock));
+ if (keypair)
+ keypair->sending.is_valid = false;
+ spin_unlock_bh(&peer->keypairs.keypair_update_lock);
+}
+
+static void add_new_keypair(struct noise_keypairs *keypairs,
+ struct noise_keypair *new_keypair)
+{
+ struct noise_keypair *previous_keypair, *next_keypair, *current_keypair;
+
+ spin_lock_bh(&keypairs->keypair_update_lock);
+ previous_keypair = rcu_dereference_protected(keypairs->previous_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ next_keypair = rcu_dereference_protected(keypairs->next_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ current_keypair = rcu_dereference_protected(keypairs->current_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ if (new_keypair->i_am_the_initiator) {
+ /* If we're the initiator, it means we've sent a handshake, and
+ * received a confirmation response, which means this new
+ * keypair can now be used.
+ */
+ if (next_keypair) {
+ /* If there already was a next keypair pending, we
+ * demote it to be the previous keypair, and free the
+ * existing current. Note that this means KCI can result
+ * in this transition. It would perhaps be more sound to
+ * always just get rid of the unused next keypair
+ * instead of putting it in the previous slot, but this
+ * might be a bit less robust. Something to think about
+ * for the future.
+ */
+ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
+ rcu_assign_pointer(keypairs->previous_keypair,
+ next_keypair);
+ wg_noise_keypair_put(current_keypair, true);
+ } else /* If there wasn't an existing next keypair, we replace
+ * the previous with the current one.
+ */
+ rcu_assign_pointer(keypairs->previous_keypair,
+ current_keypair);
+ /* At this point we can get rid of the old previous keypair, and
+ * set up the new keypair.
+ */
+ wg_noise_keypair_put(previous_keypair, true);
+ rcu_assign_pointer(keypairs->current_keypair, new_keypair);
+ } else {
+ /* If we're the responder, it means we can't use the new keypair
+ * until we receive confirmation via the first data packet, so
+ * we get rid of the existing previous one, the possibly
+ * existing next one, and slide in the new next one.
+ */
+ rcu_assign_pointer(keypairs->next_keypair, new_keypair);
+ wg_noise_keypair_put(next_keypair, true);
+ RCU_INIT_POINTER(keypairs->previous_keypair, NULL);
+ wg_noise_keypair_put(previous_keypair, true);
+ }
+ spin_unlock_bh(&keypairs->keypair_update_lock);
+}
+
+bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
+ struct noise_keypair *received_keypair)
+{
+ struct noise_keypair *old_keypair;
+ bool key_is_new;
+
+ /* We first check without taking the spinlock. */
+ key_is_new = received_keypair ==
+ rcu_access_pointer(keypairs->next_keypair);
+ if (likely(!key_is_new))
+ return false;
+
+ spin_lock_bh(&keypairs->keypair_update_lock);
+ /* After locking, we double check that things didn't change from
+ * beneath us.
+ */
+ if (unlikely(received_keypair !=
+ rcu_dereference_protected(keypairs->next_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock)))) {
+ spin_unlock_bh(&keypairs->keypair_update_lock);
+ return false;
+ }
+
+ /* When we've finally received the confirmation, we slide the next
+ * into the current, the current into the previous, and get rid of
+ * the old previous.
+ */
+ old_keypair = rcu_dereference_protected(keypairs->previous_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock));
+ rcu_assign_pointer(keypairs->previous_keypair,
+ rcu_dereference_protected(keypairs->current_keypair,
+ lockdep_is_held(&keypairs->keypair_update_lock)));
+ wg_noise_keypair_put(old_keypair, true);
+ rcu_assign_pointer(keypairs->current_keypair, received_keypair);
+ RCU_INIT_POINTER(keypairs->next_keypair, NULL);
+
+ spin_unlock_bh(&keypairs->keypair_update_lock);
+ return true;
+}
+
+/* Must hold static_identity->lock */
+void wg_noise_set_static_identity_private_key(
+ struct noise_static_identity *static_identity,
+ const u8 private_key[NOISE_PUBLIC_KEY_LEN])
+{
+ memcpy(static_identity->static_private, private_key,
+ NOISE_PUBLIC_KEY_LEN);
+ curve25519_clamp_secret(static_identity->static_private);
+ static_identity->has_identity = curve25519_generate_public(
+ static_identity->static_public, private_key);
+}
+
+/* This is Hugo Krawczyk's HKDF:
+ * - https://eprint.iacr.org/2010/264.pdf
+ * - https://tools.ietf.org/html/rfc5869
+ */
+static void kdf(u8 *first_dst, u8 *second_dst, u8 *third_dst, const u8 *data,
+ size_t first_len, size_t second_len, size_t third_len,
+ size_t data_len, const u8 chaining_key[NOISE_HASH_LEN])
+{
+ u8 output[BLAKE2S_HASH_SIZE + 1];
+ u8 secret[BLAKE2S_HASH_SIZE];
+
+ WARN_ON(IS_ENABLED(DEBUG) &&
+ (first_len > BLAKE2S_HASH_SIZE ||
+ second_len > BLAKE2S_HASH_SIZE ||
+ third_len > BLAKE2S_HASH_SIZE ||
+ ((second_len || second_dst || third_len || third_dst) &&
+ (!first_len || !first_dst)) ||
+ ((third_len || third_dst) && (!second_len || !second_dst))));
+
+ /* Extract entropy from data into secret */
+ blake2s256_hmac(secret, data, chaining_key, data_len, NOISE_HASH_LEN);
+
+ if (!first_dst || !first_len)
+ goto out;
+
+ /* Expand first key: key = secret, data = 0x1 */
+ output[0] = 1;
+ blake2s256_hmac(output, output, secret, 1, BLAKE2S_HASH_SIZE);
+ memcpy(first_dst, output, first_len);
+
+ if (!second_dst || !second_len)
+ goto out;
+
+ /* Expand second key: key = secret, data = first-key || 0x2 */
+ output[BLAKE2S_HASH_SIZE] = 2;
+ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
+ BLAKE2S_HASH_SIZE);
+ memcpy(second_dst, output, second_len);
+
+ if (!third_dst || !third_len)
+ goto out;
+
+ /* Expand third key: key = secret, data = second-key || 0x3 */
+ output[BLAKE2S_HASH_SIZE] = 3;
+ blake2s256_hmac(output, output, secret, BLAKE2S_HASH_SIZE + 1,
+ BLAKE2S_HASH_SIZE);
+ memcpy(third_dst, output, third_len);
+
+out:
+ /* Clear sensitive data from stack */
+ memzero_explicit(secret, BLAKE2S_HASH_SIZE);
+ memzero_explicit(output, BLAKE2S_HASH_SIZE + 1);
+}
+
+static void symmetric_key_init(struct noise_symmetric_key *key)
+{
+ spin_lock_init(&key->counter.receive.lock);
+ atomic64_set(&key->counter.counter, 0);
+ memset(key->counter.receive.backtrack, 0,
+ sizeof(key->counter.receive.backtrack));
+ key->birthdate = ktime_get_coarse_boottime_ns();
+ key->is_valid = true;
+}
+
+static void derive_keys(struct noise_symmetric_key *first_dst,
+ struct noise_symmetric_key *second_dst,
+ const u8 chaining_key[NOISE_HASH_LEN])
+{
+ kdf(first_dst->key, second_dst->key, NULL, NULL,
+ NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, 0,
+ chaining_key);
+ symmetric_key_init(first_dst);
+ symmetric_key_init(second_dst);
+}
+
+static bool __must_check mix_dh(u8 chaining_key[NOISE_HASH_LEN],
+ u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 private[NOISE_PUBLIC_KEY_LEN],
+ const u8 public[NOISE_PUBLIC_KEY_LEN])
+{
+ u8 dh_calculation[NOISE_PUBLIC_KEY_LEN];
+
+ if (unlikely(!curve25519(dh_calculation, private, public)))
+ return false;
+ kdf(chaining_key, key, NULL, dh_calculation, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN, chaining_key);
+ memzero_explicit(dh_calculation, NOISE_PUBLIC_KEY_LEN);
+ return true;
+}
+
+static void mix_hash(u8 hash[NOISE_HASH_LEN], const u8 *src, size_t src_len)
+{
+ struct blake2s_state blake;
+
+ blake2s_init(&blake, NOISE_HASH_LEN);
+ blake2s_update(&blake, hash, NOISE_HASH_LEN);
+ blake2s_update(&blake, src, src_len);
+ blake2s_final(&blake, hash);
+}
+
+static void mix_psk(u8 chaining_key[NOISE_HASH_LEN], u8 hash[NOISE_HASH_LEN],
+ u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ const u8 psk[NOISE_SYMMETRIC_KEY_LEN])
+{
+ u8 temp_hash[NOISE_HASH_LEN];
+
+ kdf(chaining_key, temp_hash, key, psk, NOISE_HASH_LEN, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, NOISE_SYMMETRIC_KEY_LEN, chaining_key);
+ mix_hash(hash, temp_hash, NOISE_HASH_LEN);
+ memzero_explicit(temp_hash, NOISE_HASH_LEN);
+}
+
+static void handshake_init(u8 chaining_key[NOISE_HASH_LEN],
+ u8 hash[NOISE_HASH_LEN],
+ const u8 remote_static[NOISE_PUBLIC_KEY_LEN])
+{
+ memcpy(hash, handshake_init_hash, NOISE_HASH_LEN);
+ memcpy(chaining_key, handshake_init_chaining_key, NOISE_HASH_LEN);
+ mix_hash(hash, remote_static, NOISE_PUBLIC_KEY_LEN);
+}
+
+static void message_encrypt(u8 *dst_ciphertext, const u8 *src_plaintext,
+ size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ u8 hash[NOISE_HASH_LEN])
+{
+ chacha20poly1305_encrypt(dst_ciphertext, src_plaintext, src_len, hash,
+ NOISE_HASH_LEN,
+ 0 /* Always zero for Noise_IK */, key);
+ mix_hash(hash, dst_ciphertext, noise_encrypted_len(src_len));
+}
+
+static bool message_decrypt(u8 *dst_plaintext, const u8 *src_ciphertext,
+ size_t src_len, u8 key[NOISE_SYMMETRIC_KEY_LEN],
+ u8 hash[NOISE_HASH_LEN])
+{
+ if (!chacha20poly1305_decrypt(dst_plaintext, src_ciphertext, src_len,
+ hash, NOISE_HASH_LEN,
+ 0 /* Always zero for Noise_IK */, key))
+ return false;
+ mix_hash(hash, src_ciphertext, src_len);
+ return true;
+}
+
+static void message_ephemeral(u8 ephemeral_dst[NOISE_PUBLIC_KEY_LEN],
+ const u8 ephemeral_src[NOISE_PUBLIC_KEY_LEN],
+ u8 chaining_key[NOISE_HASH_LEN],
+ u8 hash[NOISE_HASH_LEN])
+{
+ if (ephemeral_dst != ephemeral_src)
+ memcpy(ephemeral_dst, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
+ mix_hash(hash, ephemeral_src, NOISE_PUBLIC_KEY_LEN);
+ kdf(chaining_key, NULL, NULL, ephemeral_src, NOISE_HASH_LEN, 0, 0,
+ NOISE_PUBLIC_KEY_LEN, chaining_key);
+}
+
+static void tai64n_now(u8 output[NOISE_TIMESTAMP_LEN])
+{
+ struct timespec64 now;
+
+ ktime_get_real_ts64(&now);
+
+ /* In order to prevent some sort of infoleak from precise timers, we
+ * round down the nanoseconds part to the closest rounded-down power of
+ * two to the maximum initiations per second allowed anyway by the
+ * implementation.
+ */
+ now.tv_nsec = ALIGN_DOWN(now.tv_nsec,
+ rounddown_pow_of_two(NSEC_PER_SEC / INITIATIONS_PER_SECOND));
+
+ /* https://cr.yp.to/libtai/tai64.html */
+ *(__be64 *)output = cpu_to_be64(0x400000000000000aULL + now.tv_sec);
+ *(__be32 *)(output + sizeof(__be64)) = cpu_to_be32(now.tv_nsec);
+}
+
+bool
+wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
+ struct noise_handshake *handshake)
+{
+ u8 timestamp[NOISE_TIMESTAMP_LEN];
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ bool ret = false;
+
+ /* We need to wait for crng _before_ taking any locks, since
+ * curve25519_generate_secret uses get_random_bytes_wait.
+ */
+ wait_for_random_bytes();
+
+ down_read(&handshake->static_identity->lock);
+ down_write(&handshake->lock);
+
+ if (unlikely(!handshake->static_identity->has_identity))
+ goto out;
+
+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION);
+
+ handshake_init(handshake->chaining_key, handshake->hash,
+ handshake->remote_static);
+
+ /* e */
+ curve25519_generate_secret(handshake->ephemeral_private);
+ if (!curve25519_generate_public(dst->unencrypted_ephemeral,
+ handshake->ephemeral_private))
+ goto out;
+ message_ephemeral(dst->unencrypted_ephemeral,
+ dst->unencrypted_ephemeral, handshake->chaining_key,
+ handshake->hash);
+
+ /* es */
+ if (!mix_dh(handshake->chaining_key, key, handshake->ephemeral_private,
+ handshake->remote_static))
+ goto out;
+
+ /* s */
+ message_encrypt(dst->encrypted_static,
+ handshake->static_identity->static_public,
+ NOISE_PUBLIC_KEY_LEN, key, handshake->hash);
+
+ /* ss */
+ kdf(handshake->chaining_key, key, NULL,
+ handshake->precomputed_static_static, NOISE_HASH_LEN,
+ NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
+ handshake->chaining_key);
+
+ /* {t} */
+ tai64n_now(timestamp);
+ message_encrypt(dst->encrypted_timestamp, timestamp,
+ NOISE_TIMESTAMP_LEN, key, handshake->hash);
+
+ dst->sender_index = wg_index_hashtable_insert(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry);
+
+ handshake->state = HANDSHAKE_CREATED_INITIATION;
+ ret = true;
+
+out:
+ up_write(&handshake->lock);
+ up_read(&handshake->static_identity->lock);
+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+ return ret;
+}
+
+struct wg_peer *
+wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
+ struct wg_device *wg)
+{
+ struct wg_peer *peer = NULL, *ret_peer = NULL;
+ struct noise_handshake *handshake;
+ bool replay_attack, flood_attack;
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ u8 chaining_key[NOISE_HASH_LEN];
+ u8 hash[NOISE_HASH_LEN];
+ u8 s[NOISE_PUBLIC_KEY_LEN];
+ u8 e[NOISE_PUBLIC_KEY_LEN];
+ u8 t[NOISE_TIMESTAMP_LEN];
+ u64 initiation_consumption;
+
+ down_read(&wg->static_identity.lock);
+ if (unlikely(!wg->static_identity.has_identity))
+ goto out;
+
+ handshake_init(chaining_key, hash, wg->static_identity.static_public);
+
+ /* e */
+ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
+
+ /* es */
+ if (!mix_dh(chaining_key, key, wg->static_identity.static_private, e))
+ goto out;
+
+ /* s */
+ if (!message_decrypt(s, src->encrypted_static,
+ sizeof(src->encrypted_static), key, hash))
+ goto out;
+
+ /* Lookup which peer we're actually talking to */
+ peer = wg_pubkey_hashtable_lookup(wg->peer_hashtable, s);
+ if (!peer)
+ goto out;
+ handshake = &peer->handshake;
+
+ /* ss */
+ kdf(chaining_key, key, NULL, handshake->precomputed_static_static,
+ NOISE_HASH_LEN, NOISE_SYMMETRIC_KEY_LEN, 0, NOISE_PUBLIC_KEY_LEN,
+ chaining_key);
+
+ /* {t} */
+ if (!message_decrypt(t, src->encrypted_timestamp,
+ sizeof(src->encrypted_timestamp), key, hash))
+ goto out;
+
+ down_read(&handshake->lock);
+ replay_attack = memcmp(t, handshake->latest_timestamp,
+ NOISE_TIMESTAMP_LEN) <= 0;
+ flood_attack = (s64)handshake->last_initiation_consumption +
+ NSEC_PER_SEC / INITIATIONS_PER_SECOND >
+ (s64)ktime_get_coarse_boottime_ns();
+ up_read(&handshake->lock);
+ if (replay_attack || flood_attack)
+ goto out;
+
+ /* Success! Copy everything to peer */
+ down_write(&handshake->lock);
+ memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
+ if (memcmp(t, handshake->latest_timestamp, NOISE_TIMESTAMP_LEN) > 0)
+ memcpy(handshake->latest_timestamp, t, NOISE_TIMESTAMP_LEN);
+ memcpy(handshake->hash, hash, NOISE_HASH_LEN);
+ memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
+ handshake->remote_index = src->sender_index;
+ if ((s64)(handshake->last_initiation_consumption -
+ (initiation_consumption = ktime_get_coarse_boottime_ns())) < 0)
+ handshake->last_initiation_consumption = initiation_consumption;
+ handshake->state = HANDSHAKE_CONSUMED_INITIATION;
+ up_write(&handshake->lock);
+ ret_peer = peer;
+
+out:
+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+ memzero_explicit(hash, NOISE_HASH_LEN);
+ memzero_explicit(chaining_key, NOISE_HASH_LEN);
+ up_read(&wg->static_identity.lock);
+ if (!ret_peer)
+ wg_peer_put(peer);
+ return ret_peer;
+}
+
+bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
+ struct noise_handshake *handshake)
+{
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ bool ret = false;
+
+ /* We need to wait for crng _before_ taking any locks, since
+ * curve25519_generate_secret uses get_random_bytes_wait.
+ */
+ wait_for_random_bytes();
+
+ down_read(&handshake->static_identity->lock);
+ down_write(&handshake->lock);
+
+ if (handshake->state != HANDSHAKE_CONSUMED_INITIATION)
+ goto out;
+
+ dst->header.type = cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE);
+ dst->receiver_index = handshake->remote_index;
+
+ /* e */
+ curve25519_generate_secret(handshake->ephemeral_private);
+ if (!curve25519_generate_public(dst->unencrypted_ephemeral,
+ handshake->ephemeral_private))
+ goto out;
+ message_ephemeral(dst->unencrypted_ephemeral,
+ dst->unencrypted_ephemeral, handshake->chaining_key,
+ handshake->hash);
+
+ /* ee */
+ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
+ handshake->remote_ephemeral))
+ goto out;
+
+ /* se */
+ if (!mix_dh(handshake->chaining_key, NULL, handshake->ephemeral_private,
+ handshake->remote_static))
+ goto out;
+
+ /* psk */
+ mix_psk(handshake->chaining_key, handshake->hash, key,
+ handshake->preshared_key);
+
+ /* {} */
+ message_encrypt(dst->encrypted_nothing, NULL, 0, key, handshake->hash);
+
+ dst->sender_index = wg_index_hashtable_insert(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry);
+
+ handshake->state = HANDSHAKE_CREATED_RESPONSE;
+ ret = true;
+
+out:
+ up_write(&handshake->lock);
+ up_read(&handshake->static_identity->lock);
+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+ return ret;
+}
+
+struct wg_peer *
+wg_noise_handshake_consume_response(struct message_handshake_response *src,
+ struct wg_device *wg)
+{
+ enum noise_handshake_state state = HANDSHAKE_ZEROED;
+ struct wg_peer *peer = NULL, *ret_peer = NULL;
+ struct noise_handshake *handshake;
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ u8 hash[NOISE_HASH_LEN];
+ u8 chaining_key[NOISE_HASH_LEN];
+ u8 e[NOISE_PUBLIC_KEY_LEN];
+ u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
+ u8 static_private[NOISE_PUBLIC_KEY_LEN];
+
+ down_read(&wg->static_identity.lock);
+
+ if (unlikely(!wg->static_identity.has_identity))
+ goto out;
+
+ handshake = (struct noise_handshake *)wg_index_hashtable_lookup(
+ wg->index_hashtable, INDEX_HASHTABLE_HANDSHAKE,
+ src->receiver_index, &peer);
+ if (unlikely(!handshake))
+ goto out;
+
+ down_read(&handshake->lock);
+ state = handshake->state;
+ memcpy(hash, handshake->hash, NOISE_HASH_LEN);
+ memcpy(chaining_key, handshake->chaining_key, NOISE_HASH_LEN);
+ memcpy(ephemeral_private, handshake->ephemeral_private,
+ NOISE_PUBLIC_KEY_LEN);
+ up_read(&handshake->lock);
+
+ if (state != HANDSHAKE_CREATED_INITIATION)
+ goto fail;
+
+ /* e */
+ message_ephemeral(e, src->unencrypted_ephemeral, chaining_key, hash);
+
+ /* ee */
+ if (!mix_dh(chaining_key, NULL, ephemeral_private, e))
+ goto fail;
+
+ /* se */
+ if (!mix_dh(chaining_key, NULL, wg->static_identity.static_private, e))
+ goto fail;
+
+ /* psk */
+ mix_psk(chaining_key, hash, key, handshake->preshared_key);
+
+ /* {} */
+ if (!message_decrypt(NULL, src->encrypted_nothing,
+ sizeof(src->encrypted_nothing), key, hash))
+ goto fail;
+
+ /* Success! Copy everything to peer */
+ down_write(&handshake->lock);
+ /* It's important to check that the state is still the same, while we
+ * have an exclusive lock.
+ */
+ if (handshake->state != state) {
+ up_write(&handshake->lock);
+ goto fail;
+ }
+ memcpy(handshake->remote_ephemeral, e, NOISE_PUBLIC_KEY_LEN);
+ memcpy(handshake->hash, hash, NOISE_HASH_LEN);
+ memcpy(handshake->chaining_key, chaining_key, NOISE_HASH_LEN);
+ handshake->remote_index = src->sender_index;
+ handshake->state = HANDSHAKE_CONSUMED_RESPONSE;
+ up_write(&handshake->lock);
+ ret_peer = peer;
+ goto out;
+
+fail:
+ wg_peer_put(peer);
+out:
+ memzero_explicit(key, NOISE_SYMMETRIC_KEY_LEN);
+ memzero_explicit(hash, NOISE_HASH_LEN);
+ memzero_explicit(chaining_key, NOISE_HASH_LEN);
+ memzero_explicit(ephemeral_private, NOISE_PUBLIC_KEY_LEN);
+ memzero_explicit(static_private, NOISE_PUBLIC_KEY_LEN);
+ up_read(&wg->static_identity.lock);
+ return ret_peer;
+}
+
+bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
+ struct noise_keypairs *keypairs)
+{
+ struct noise_keypair *new_keypair;
+ bool ret = false;
+
+ down_write(&handshake->lock);
+ if (handshake->state != HANDSHAKE_CREATED_RESPONSE &&
+ handshake->state != HANDSHAKE_CONSUMED_RESPONSE)
+ goto out;
+
+ new_keypair = keypair_create(handshake->entry.peer);
+ if (!new_keypair)
+ goto out;
+ new_keypair->i_am_the_initiator = handshake->state ==
+ HANDSHAKE_CONSUMED_RESPONSE;
+ new_keypair->remote_index = handshake->remote_index;
+
+ if (new_keypair->i_am_the_initiator)
+ derive_keys(&new_keypair->sending, &new_keypair->receiving,
+ handshake->chaining_key);
+ else
+ derive_keys(&new_keypair->receiving, &new_keypair->sending,
+ handshake->chaining_key);
+
+ handshake_zero(handshake);
+ rcu_read_lock_bh();
+ if (likely(!READ_ONCE(container_of(handshake, struct wg_peer,
+ handshake)->is_dead))) {
+ add_new_keypair(keypairs, new_keypair);
+ net_dbg_ratelimited("%s: Keypair %llu created for peer %llu\n",
+ handshake->entry.peer->device->dev->name,
+ new_keypair->internal_id,
+ handshake->entry.peer->internal_id);
+ ret = wg_index_hashtable_replace(
+ handshake->entry.peer->device->index_hashtable,
+ &handshake->entry, &new_keypair->entry);
+ } else {
+ kzfree(new_keypair);
+ }
+ rcu_read_unlock_bh();
+
+out:
+ up_write(&handshake->lock);
+ return ret;
+}
diff --git a/drivers/net/wireguard/noise.h b/drivers/net/wireguard/noise.h
new file mode 100644
index 000000000000..138a07bb817c
--- /dev/null
+++ b/drivers/net/wireguard/noise.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+#ifndef _WG_NOISE_H
+#define _WG_NOISE_H
+
+#include "messages.h"
+#include "peerlookup.h"
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/rwsem.h>
+#include <linux/mutex.h>
+#include <linux/kref.h>
+
+union noise_counter {
+ struct {
+ u64 counter;
+ unsigned long backtrack[COUNTER_BITS_TOTAL / BITS_PER_LONG];
+ spinlock_t lock;
+ } receive;
+ atomic64_t counter;
+};
+
+struct noise_symmetric_key {
+ u8 key[NOISE_SYMMETRIC_KEY_LEN];
+ union noise_counter counter;
+ u64 birthdate;
+ bool is_valid;
+};
+
+struct noise_keypair {
+ struct index_hashtable_entry entry;
+ struct noise_symmetric_key sending;
+ struct noise_symmetric_key receiving;
+ __le32 remote_index;
+ bool i_am_the_initiator;
+ struct kref refcount;
+ struct rcu_head rcu;
+ u64 internal_id;
+};
+
+struct noise_keypairs {
+ struct noise_keypair __rcu *current_keypair;
+ struct noise_keypair __rcu *previous_keypair;
+ struct noise_keypair __rcu *next_keypair;
+ spinlock_t keypair_update_lock;
+};
+
+struct noise_static_identity {
+ u8 static_public[NOISE_PUBLIC_KEY_LEN];
+ u8 static_private[NOISE_PUBLIC_KEY_LEN];
+ struct rw_semaphore lock;
+ bool has_identity;
+};
+
+enum noise_handshake_state {
+ HANDSHAKE_ZEROED,
+ HANDSHAKE_CREATED_INITIATION,
+ HANDSHAKE_CONSUMED_INITIATION,
+ HANDSHAKE_CREATED_RESPONSE,
+ HANDSHAKE_CONSUMED_RESPONSE
+};
+
+struct noise_handshake {
+ struct index_hashtable_entry entry;
+
+ enum noise_handshake_state state;
+ u64 last_initiation_consumption;
+
+ struct noise_static_identity *static_identity;
+
+ u8 ephemeral_private[NOISE_PUBLIC_KEY_LEN];
+ u8 remote_static[NOISE_PUBLIC_KEY_LEN];
+ u8 remote_ephemeral[NOISE_PUBLIC_KEY_LEN];
+ u8 precomputed_static_static[NOISE_PUBLIC_KEY_LEN];
+
+ u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN];
+
+ u8 hash[NOISE_HASH_LEN];
+ u8 chaining_key[NOISE_HASH_LEN];
+
+ u8 latest_timestamp[NOISE_TIMESTAMP_LEN];
+ __le32 remote_index;
+
+ /* Protects all members except the immutable (after noise_handshake_
+ * init): remote_static, precomputed_static_static, static_identity.
+ */
+ struct rw_semaphore lock;
+};
+
+struct wg_device;
+
+void wg_noise_init(void);
+bool wg_noise_handshake_init(struct noise_handshake *handshake,
+ struct noise_static_identity *static_identity,
+ const u8 peer_public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 peer_preshared_key[NOISE_SYMMETRIC_KEY_LEN],
+ struct wg_peer *peer);
+void wg_noise_handshake_clear(struct noise_handshake *handshake);
+static inline void wg_noise_reset_last_sent_handshake(atomic64_t *handshake_ns)
+{
+ atomic64_set(handshake_ns, ktime_get_coarse_boottime_ns() -
+ (u64)(REKEY_TIMEOUT + 1) * NSEC_PER_SEC);
+}
+
+void wg_noise_keypair_put(struct noise_keypair *keypair, bool unreference_now);
+struct noise_keypair *wg_noise_keypair_get(struct noise_keypair *keypair);
+void wg_noise_keypairs_clear(struct noise_keypairs *keypairs);
+bool wg_noise_received_with_keypair(struct noise_keypairs *keypairs,
+ struct noise_keypair *received_keypair);
+void wg_noise_expire_current_peer_keypairs(struct wg_peer *peer);
+
+void wg_noise_set_static_identity_private_key(
+ struct noise_static_identity *static_identity,
+ const u8 private_key[NOISE_PUBLIC_KEY_LEN]);
+bool wg_noise_precompute_static_static(struct wg_peer *peer);
+
+bool
+wg_noise_handshake_create_initiation(struct message_handshake_initiation *dst,
+ struct noise_handshake *handshake);
+struct wg_peer *
+wg_noise_handshake_consume_initiation(struct message_handshake_initiation *src,
+ struct wg_device *wg);
+
+bool wg_noise_handshake_create_response(struct message_handshake_response *dst,
+ struct noise_handshake *handshake);
+struct wg_peer *
+wg_noise_handshake_consume_response(struct message_handshake_response *src,
+ struct wg_device *wg);
+
+bool wg_noise_handshake_begin_session(struct noise_handshake *handshake,
+ struct noise_keypairs *keypairs);
+
+#endif /* _WG_NOISE_H */
diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c
new file mode 100644
index 000000000000..071eedf33f5a
--- /dev/null
+++ b/drivers/net/wireguard/peer.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "peer.h"
+#include "device.h"
+#include "queueing.h"
+#include "timers.h"
+#include "peerlookup.h"
+#include "noise.h"
+
+#include <linux/kref.h>
+#include <linux/lockdep.h>
+#include <linux/rcupdate.h>
+#include <linux/list.h>
+
+static atomic64_t peer_counter = ATOMIC64_INIT(0);
+
+struct wg_peer *wg_peer_create(struct wg_device *wg,
+ const u8 public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN])
+{
+ struct wg_peer *peer;
+ int ret = -ENOMEM;
+
+ lockdep_assert_held(&wg->device_update_lock);
+
+ if (wg->num_peers >= MAX_PEERS_PER_DEVICE)
+ return ERR_PTR(ret);
+
+ peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+ if (unlikely(!peer))
+ return ERR_PTR(ret);
+ peer->device = wg;
+
+ if (!wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
+ public_key, preshared_key, peer)) {
+ ret = -EKEYREJECTED;
+ goto err_1;
+ }
+ if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
+ goto err_1;
+ if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
+ MAX_QUEUED_PACKETS))
+ goto err_2;
+ if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
+ MAX_QUEUED_PACKETS))
+ goto err_3;
+
+ peer->internal_id = atomic64_inc_return(&peer_counter);
+ peer->serial_work_cpu = nr_cpumask_bits;
+ wg_cookie_init(&peer->latest_cookie);
+ wg_timers_init(peer);
+ wg_cookie_checker_precompute_peer_keys(peer);
+ spin_lock_init(&peer->keypairs.keypair_update_lock);
+ INIT_WORK(&peer->transmit_handshake_work,
+ wg_packet_handshake_send_worker);
+ rwlock_init(&peer->endpoint_lock);
+ kref_init(&peer->refcount);
+ skb_queue_head_init(&peer->staged_packet_queue);
+ wg_noise_reset_last_sent_handshake(&peer->last_sent_handshake);
+ set_bit(NAPI_STATE_NO_BUSY_POLL, &peer->napi.state);
+ netif_napi_add(wg->dev, &peer->napi, wg_packet_rx_poll,
+ NAPI_POLL_WEIGHT);
+ napi_enable(&peer->napi);
+ list_add_tail(&peer->peer_list, &wg->peer_list);
+ INIT_LIST_HEAD(&peer->allowedips_list);
+ wg_pubkey_hashtable_add(wg->peer_hashtable, peer);
+ ++wg->num_peers;
+ pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
+ return peer;
+
+err_3:
+ wg_packet_queue_free(&peer->tx_queue, false);
+err_2:
+ dst_cache_destroy(&peer->endpoint_cache);
+err_1:
+ kfree(peer);
+ return ERR_PTR(ret);
+}
+
+struct wg_peer *wg_peer_get_maybe_zero(struct wg_peer *peer)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_bh_held(),
+ "Taking peer reference without holding the RCU read lock");
+ if (unlikely(!peer || !kref_get_unless_zero(&peer->refcount)))
+ return NULL;
+ return peer;
+}
+
+static void peer_make_dead(struct wg_peer *peer)
+{
+ /* Remove from configuration-time lookup structures. */
+ list_del_init(&peer->peer_list);
+ wg_allowedips_remove_by_peer(&peer->device->peer_allowedips, peer,
+ &peer->device->device_update_lock);
+ wg_pubkey_hashtable_remove(peer->device->peer_hashtable, peer);
+
+ /* Mark as dead, so that we don't allow jumping contexts after. */
+ WRITE_ONCE(peer->is_dead, true);
+
+ /* The caller must now synchronize_rcu() for this to take effect. */
+}
+
+static void peer_remove_after_dead(struct wg_peer *peer)
+{
+ WARN_ON(!peer->is_dead);
+
+ /* No more keypairs can be created for this peer, since is_dead protects
+ * add_new_keypair, so we can now destroy existing ones.
+ */
+ wg_noise_keypairs_clear(&peer->keypairs);
+
+ /* Destroy all ongoing timers that were in-flight at the beginning of
+ * this function.
+ */
+ wg_timers_stop(peer);
+
+ /* The transition between packet encryption/decryption queues isn't
+ * guarded by is_dead, but each reference's life is strictly bounded by
+ * two generations: once for parallel crypto and once for serial
+ * ingestion, so we can simply flush twice, and be sure that we no
+ * longer have references inside these queues.
+ */
+
+ /* a) For encrypt/decrypt. */
+ flush_workqueue(peer->device->packet_crypt_wq);
+ /* b.1) For send (but not receive, since that's napi). */
+ flush_workqueue(peer->device->packet_crypt_wq);
+ /* b.2.1) For receive (but not send, since that's wq). */
+ napi_disable(&peer->napi);
+ /* b.2.1) It's now safe to remove the napi struct, which must be done
+ * here from process context.
+ */
+ netif_napi_del(&peer->napi);
+
+ /* Ensure any workstructs we own (like transmit_handshake_work or
+ * clear_peer_work) no longer are in use.
+ */
+ flush_workqueue(peer->device->handshake_send_wq);
+
+ /* After the above flushes, a peer might still be active in a few
+ * different contexts: 1) from xmit(), before hitting is_dead and
+ * returning, 2) from wg_packet_consume_data(), before hitting is_dead
+ * and returning, 3) from wg_receive_handshake_packet() after a point
+ * where it has processed an incoming handshake packet, but where
+ * all calls to pass it off to timers fails because of is_dead. We won't
+ * have new references in (1) eventually, because we're removed from
+ * allowedips; we won't have new references in (2) eventually, because
+ * wg_index_hashtable_lookup will always return NULL, since we removed
+ * all existing keypairs and no more can be created; we won't have new
+ * references in (3) eventually, because we're removed from the pubkey
+ * hash table, which allows for a maximum of one handshake response,
+ * via the still-uncleared index hashtable entry, but not more than one,
+ * and in wg_cookie_message_consume, the lookup eventually gets a peer
+ * with a refcount of zero, so no new reference is taken.
+ */
+
+ --peer->device->num_peers;
+ wg_peer_put(peer);
+}
+
+/* We have a separate "remove" function make sure that all active places where
+ * a peer is currently operating will eventually come to an end and not pass
+ * their reference onto another context.
+ */
+void wg_peer_remove(struct wg_peer *peer)
+{
+ if (unlikely(!peer))
+ return;
+ lockdep_assert_held(&peer->device->device_update_lock);
+
+ peer_make_dead(peer);
+ synchronize_rcu();
+ peer_remove_after_dead(peer);
+}
+
+void wg_peer_remove_all(struct wg_device *wg)
+{
+ struct wg_peer *peer, *temp;
+ LIST_HEAD(dead_peers);
+
+ lockdep_assert_held(&wg->device_update_lock);
+
+ /* Avoid having to traverse individually for each one. */
+ wg_allowedips_free(&wg->peer_allowedips, &wg->device_update_lock);
+
+ list_for_each_entry_safe(peer, temp, &wg->peer_list, peer_list) {
+ peer_make_dead(peer);
+ list_add_tail(&peer->peer_list, &dead_peers);
+ }
+ synchronize_rcu();
+ list_for_each_entry_safe(peer, temp, &dead_peers, peer_list)
+ peer_remove_after_dead(peer);
+}
+
+static void rcu_release(struct rcu_head *rcu)
+{
+ struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
+
+ dst_cache_destroy(&peer->endpoint_cache);
+ wg_packet_queue_free(&peer->rx_queue, false);
+ wg_packet_queue_free(&peer->tx_queue, false);
+
+ /* The final zeroing takes care of clearing any remaining handshake key
+ * material and other potentially sensitive information.
+ */
+ kzfree(peer);
+}
+
+static void kref_release(struct kref *refcount)
+{
+ struct wg_peer *peer = container_of(refcount, struct wg_peer, refcount);
+
+ pr_debug("%s: Peer %llu (%pISpfsc) destroyed\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+
+ /* Remove ourself from dynamic runtime lookup structures, now that the
+ * last reference is gone.
+ */
+ wg_index_hashtable_remove(peer->device->index_hashtable,
+ &peer->handshake.entry);
+
+ /* Remove any lingering packets that didn't have a chance to be
+ * transmitted.
+ */
+ wg_packet_purge_staged_packets(peer);
+
+ /* Free the memory used. */
+ call_rcu(&peer->rcu, rcu_release);
+}
+
+void wg_peer_put(struct wg_peer *peer)
+{
+ if (unlikely(!peer))
+ return;
+ kref_put(&peer->refcount, kref_release);
+}
diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h
new file mode 100644
index 000000000000..23af40922997
--- /dev/null
+++ b/drivers/net/wireguard/peer.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_PEER_H
+#define _WG_PEER_H
+
+#include "device.h"
+#include "noise.h"
+#include "cookie.h"
+
+#include <linux/types.h>
+#include <linux/netfilter.h>
+#include <linux/spinlock.h>
+#include <linux/kref.h>
+#include <net/dst_cache.h>
+
+struct wg_device;
+
+struct endpoint {
+ union {
+ struct sockaddr addr;
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ };
+ union {
+ struct {
+ struct in_addr src4;
+ /* Essentially the same as addr6->scope_id */
+ int src_if4;
+ };
+ struct in6_addr src6;
+ };
+};
+
+struct wg_peer {
+ struct wg_device *device;
+ struct crypt_queue tx_queue, rx_queue;
+ struct sk_buff_head staged_packet_queue;
+ int serial_work_cpu;
+ struct noise_keypairs keypairs;
+ struct endpoint endpoint;
+ struct dst_cache endpoint_cache;
+ rwlock_t endpoint_lock;
+ struct noise_handshake handshake;
+ atomic64_t last_sent_handshake;
+ struct work_struct transmit_handshake_work, clear_peer_work;
+ struct cookie latest_cookie;
+ struct hlist_node pubkey_hash;
+ u64 rx_bytes, tx_bytes;
+ struct timer_list timer_retransmit_handshake, timer_send_keepalive;
+ struct timer_list timer_new_handshake, timer_zero_key_material;
+ struct timer_list timer_persistent_keepalive;
+ unsigned int timer_handshake_attempts;
+ u16 persistent_keepalive_interval;
+ bool timer_need_another_keepalive;
+ bool sent_lastminute_handshake;
+ struct timespec64 walltime_last_handshake;
+ struct kref refcount;
+ struct rcu_head rcu;
+ struct list_head peer_list;
+ struct list_head allowedips_list;
+ u64 internal_id;
+ struct napi_struct napi;
+ bool is_dead;
+};
+
+struct wg_peer *wg_peer_create(struct wg_device *wg,
+ const u8 public_key[NOISE_PUBLIC_KEY_LEN],
+ const u8 preshared_key[NOISE_SYMMETRIC_KEY_LEN]);
+
+struct wg_peer *__must_check wg_peer_get_maybe_zero(struct wg_peer *peer);
+static inline struct wg_peer *wg_peer_get(struct wg_peer *peer)
+{
+ kref_get(&peer->refcount);
+ return peer;
+}
+void wg_peer_put(struct wg_peer *peer);
+void wg_peer_remove(struct wg_peer *peer);
+void wg_peer_remove_all(struct wg_device *wg);
+
+#endif /* _WG_PEER_H */
diff --git a/drivers/net/wireguard/peerlookup.c b/drivers/net/wireguard/peerlookup.c
new file mode 100644
index 000000000000..e4deb331476b
--- /dev/null
+++ b/drivers/net/wireguard/peerlookup.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "peerlookup.h"
+#include "peer.h"
+#include "noise.h"
+
+static struct hlist_head *pubkey_bucket(struct pubkey_hashtable *table,
+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
+{
+ /* siphash gives us a secure 64bit number based on a random key. Since
+ * the bits are uniformly distributed, we can then mask off to get the
+ * bits we need.
+ */
+ const u64 hash = siphash(pubkey, NOISE_PUBLIC_KEY_LEN, &table->key);
+
+ return &table->hashtable[hash & (HASH_SIZE(table->hashtable) - 1)];
+}
+
+struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void)
+{
+ struct pubkey_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
+
+ if (!table)
+ return NULL;
+
+ get_random_bytes(&table->key, sizeof(table->key));
+ hash_init(table->hashtable);
+ mutex_init(&table->lock);
+ return table;
+}
+
+void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
+ struct wg_peer *peer)
+{
+ mutex_lock(&table->lock);
+ hlist_add_head_rcu(&peer->pubkey_hash,
+ pubkey_bucket(table, peer->handshake.remote_static));
+ mutex_unlock(&table->lock);
+}
+
+void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
+ struct wg_peer *peer)
+{
+ mutex_lock(&table->lock);
+ hlist_del_init_rcu(&peer->pubkey_hash);
+ mutex_unlock(&table->lock);
+}
+
+/* Returns a strong reference to a peer */
+struct wg_peer *
+wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN])
+{
+ struct wg_peer *iter_peer, *peer = NULL;
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu_bh(iter_peer, pubkey_bucket(table, pubkey),
+ pubkey_hash) {
+ if (!memcmp(pubkey, iter_peer->handshake.remote_static,
+ NOISE_PUBLIC_KEY_LEN)) {
+ peer = iter_peer;
+ break;
+ }
+ }
+ peer = wg_peer_get_maybe_zero(peer);
+ rcu_read_unlock_bh();
+ return peer;
+}
+
+static struct hlist_head *index_bucket(struct index_hashtable *table,
+ const __le32 index)
+{
+ /* Since the indices are random and thus all bits are uniformly
+ * distributed, we can find its bucket simply by masking.
+ */
+ return &table->hashtable[(__force u32)index &
+ (HASH_SIZE(table->hashtable) - 1)];
+}
+
+struct index_hashtable *wg_index_hashtable_alloc(void)
+{
+ struct index_hashtable *table = kvmalloc(sizeof(*table), GFP_KERNEL);
+
+ if (!table)
+ return NULL;
+
+ hash_init(table->hashtable);
+ spin_lock_init(&table->lock);
+ return table;
+}
+
+/* At the moment, we limit ourselves to 2^20 total peers, which generally might
+ * amount to 2^20*3 items in this hashtable. The algorithm below works by
+ * picking a random number and testing it. We can see that these limits mean we
+ * usually succeed pretty quickly:
+ *
+ * >>> def calculation(tries, size):
+ * ... return (size / 2**32)**(tries - 1) * (1 - (size / 2**32))
+ * ...
+ * >>> calculation(1, 2**20 * 3)
+ * 0.999267578125
+ * >>> calculation(2, 2**20 * 3)
+ * 0.0007318854331970215
+ * >>> calculation(3, 2**20 * 3)
+ * 5.360489012673497e-07
+ * >>> calculation(4, 2**20 * 3)
+ * 3.9261394135792216e-10
+ *
+ * At the moment, we don't do any masking, so this algorithm isn't exactly
+ * constant time in either the random guessing or in the hash list lookup. We
+ * could require a minimum of 3 tries, which would successfully mask the
+ * guessing. this would not, however, help with the growing hash lengths, which
+ * is another thing to consider moving forward.
+ */
+
+__le32 wg_index_hashtable_insert(struct index_hashtable *table,
+ struct index_hashtable_entry *entry)
+{
+ struct index_hashtable_entry *existing_entry;
+
+ spin_lock_bh(&table->lock);
+ hlist_del_init_rcu(&entry->index_hash);
+ spin_unlock_bh(&table->lock);
+
+ rcu_read_lock_bh();
+
+search_unused_slot:
+ /* First we try to find an unused slot, randomly, while unlocked. */
+ entry->index = (__force __le32)get_random_u32();
+ hlist_for_each_entry_rcu_bh(existing_entry,
+ index_bucket(table, entry->index),
+ index_hash) {
+ if (existing_entry->index == entry->index)
+ /* If it's already in use, we continue searching. */
+ goto search_unused_slot;
+ }
+
+ /* Once we've found an unused slot, we lock it, and then double-check
+ * that nobody else stole it from us.
+ */
+ spin_lock_bh(&table->lock);
+ hlist_for_each_entry_rcu_bh(existing_entry,
+ index_bucket(table, entry->index),
+ index_hash) {
+ if (existing_entry->index == entry->index) {
+ spin_unlock_bh(&table->lock);
+ /* If it was stolen, we start over. */
+ goto search_unused_slot;
+ }
+ }
+ /* Otherwise, we know we have it exclusively (since we're locked),
+ * so we insert.
+ */
+ hlist_add_head_rcu(&entry->index_hash,
+ index_bucket(table, entry->index));
+ spin_unlock_bh(&table->lock);
+
+ rcu_read_unlock_bh();
+
+ return entry->index;
+}
+
+bool wg_index_hashtable_replace(struct index_hashtable *table,
+ struct index_hashtable_entry *old,
+ struct index_hashtable_entry *new)
+{
+ if (unlikely(hlist_unhashed(&old->index_hash)))
+ return false;
+ spin_lock_bh(&table->lock);
+ new->index = old->index;
+ hlist_replace_rcu(&old->index_hash, &new->index_hash);
+
+ /* Calling init here NULLs out index_hash, and in fact after this
+ * function returns, it's theoretically possible for this to get
+ * reinserted elsewhere. That means the RCU lookup below might either
+ * terminate early or jump between buckets, in which case the packet
+ * simply gets dropped, which isn't terrible.
+ */
+ INIT_HLIST_NODE(&old->index_hash);
+ spin_unlock_bh(&table->lock);
+ return true;
+}
+
+void wg_index_hashtable_remove(struct index_hashtable *table,
+ struct index_hashtable_entry *entry)
+{
+ spin_lock_bh(&table->lock);
+ hlist_del_init_rcu(&entry->index_hash);
+ spin_unlock_bh(&table->lock);
+}
+
+/* Returns a strong reference to a entry->peer */
+struct index_hashtable_entry *
+wg_index_hashtable_lookup(struct index_hashtable *table,
+ const enum index_hashtable_type type_mask,
+ const __le32 index, struct wg_peer **peer)
+{
+ struct index_hashtable_entry *iter_entry, *entry = NULL;
+
+ rcu_read_lock_bh();
+ hlist_for_each_entry_rcu_bh(iter_entry, index_bucket(table, index),
+ index_hash) {
+ if (iter_entry->index == index) {
+ if (likely(iter_entry->type & type_mask))
+ entry = iter_entry;
+ break;
+ }
+ }
+ if (likely(entry)) {
+ entry->peer = wg_peer_get_maybe_zero(entry->peer);
+ if (likely(entry->peer))
+ *peer = entry->peer;
+ else
+ entry = NULL;
+ }
+ rcu_read_unlock_bh();
+ return entry;
+}
diff --git a/drivers/net/wireguard/peerlookup.h b/drivers/net/wireguard/peerlookup.h
new file mode 100644
index 000000000000..ced811797680
--- /dev/null
+++ b/drivers/net/wireguard/peerlookup.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_PEERLOOKUP_H
+#define _WG_PEERLOOKUP_H
+
+#include "messages.h"
+
+#include <linux/hashtable.h>
+#include <linux/mutex.h>
+#include <linux/siphash.h>
+
+struct wg_peer;
+
+struct pubkey_hashtable {
+ /* TODO: move to rhashtable */
+ DECLARE_HASHTABLE(hashtable, 11);
+ siphash_key_t key;
+ struct mutex lock;
+};
+
+struct pubkey_hashtable *wg_pubkey_hashtable_alloc(void);
+void wg_pubkey_hashtable_add(struct pubkey_hashtable *table,
+ struct wg_peer *peer);
+void wg_pubkey_hashtable_remove(struct pubkey_hashtable *table,
+ struct wg_peer *peer);
+struct wg_peer *
+wg_pubkey_hashtable_lookup(struct pubkey_hashtable *table,
+ const u8 pubkey[NOISE_PUBLIC_KEY_LEN]);
+
+struct index_hashtable {
+ /* TODO: move to rhashtable */
+ DECLARE_HASHTABLE(hashtable, 13);
+ spinlock_t lock;
+};
+
+enum index_hashtable_type {
+ INDEX_HASHTABLE_HANDSHAKE = 1U << 0,
+ INDEX_HASHTABLE_KEYPAIR = 1U << 1
+};
+
+struct index_hashtable_entry {
+ struct wg_peer *peer;
+ struct hlist_node index_hash;
+ enum index_hashtable_type type;
+ __le32 index;
+};
+
+struct index_hashtable *wg_index_hashtable_alloc(void);
+__le32 wg_index_hashtable_insert(struct index_hashtable *table,
+ struct index_hashtable_entry *entry);
+bool wg_index_hashtable_replace(struct index_hashtable *table,
+ struct index_hashtable_entry *old,
+ struct index_hashtable_entry *new);
+void wg_index_hashtable_remove(struct index_hashtable *table,
+ struct index_hashtable_entry *entry);
+struct index_hashtable_entry *
+wg_index_hashtable_lookup(struct index_hashtable *table,
+ const enum index_hashtable_type type_mask,
+ const __le32 index, struct wg_peer **peer);
+
+#endif /* _WG_PEERLOOKUP_H */
diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c
new file mode 100644
index 000000000000..5c964fcb994e
--- /dev/null
+++ b/drivers/net/wireguard/queueing.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+
+struct multicore_worker __percpu *
+wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
+{
+ int cpu;
+ struct multicore_worker __percpu *worker =
+ alloc_percpu(struct multicore_worker);
+
+ if (!worker)
+ return NULL;
+
+ for_each_possible_cpu(cpu) {
+ per_cpu_ptr(worker, cpu)->ptr = ptr;
+ INIT_WORK(&per_cpu_ptr(worker, cpu)->work, function);
+ }
+ return worker;
+}
+
+int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ bool multicore, unsigned int len)
+{
+ int ret;
+
+ memset(queue, 0, sizeof(*queue));
+ ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
+ if (ret)
+ return ret;
+ if (function) {
+ if (multicore) {
+ queue->worker = wg_packet_percpu_multicore_worker_alloc(
+ function, queue);
+ if (!queue->worker)
+ return -ENOMEM;
+ } else {
+ INIT_WORK(&queue->work, function);
+ }
+ }
+ return 0;
+}
+
+void wg_packet_queue_free(struct crypt_queue *queue, bool multicore)
+{
+ if (multicore)
+ free_percpu(queue->worker);
+ WARN_ON(!__ptr_ring_empty(&queue->ring));
+ ptr_ring_cleanup(&queue->ring, NULL);
+}
diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h
new file mode 100644
index 000000000000..fecb559cbdb6
--- /dev/null
+++ b/drivers/net/wireguard/queueing.h
@@ -0,0 +1,194 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_QUEUEING_H
+#define _WG_QUEUEING_H
+
+#include "peer.h"
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+struct wg_device;
+struct wg_peer;
+struct multicore_worker;
+struct crypt_queue;
+struct sk_buff;
+
+/* queueing.c APIs: */
+int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
+ bool multicore, unsigned int len);
+void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
+struct multicore_worker __percpu *
+wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
+
+/* receive.c APIs: */
+void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb);
+void wg_packet_handshake_receive_worker(struct work_struct *work);
+/* NAPI poll function: */
+int wg_packet_rx_poll(struct napi_struct *napi, int budget);
+/* Workqueue worker: */
+void wg_packet_decrypt_worker(struct work_struct *work);
+
+/* send.c APIs: */
+void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
+ bool is_retry);
+void wg_packet_send_handshake_response(struct wg_peer *peer);
+void wg_packet_send_handshake_cookie(struct wg_device *wg,
+ struct sk_buff *initiating_skb,
+ __le32 sender_index);
+void wg_packet_send_keepalive(struct wg_peer *peer);
+void wg_packet_purge_staged_packets(struct wg_peer *peer);
+void wg_packet_send_staged_packets(struct wg_peer *peer);
+/* Workqueue workers: */
+void wg_packet_handshake_send_worker(struct work_struct *work);
+void wg_packet_tx_worker(struct work_struct *work);
+void wg_packet_encrypt_worker(struct work_struct *work);
+
+enum packet_state {
+ PACKET_STATE_UNCRYPTED,
+ PACKET_STATE_CRYPTED,
+ PACKET_STATE_DEAD
+};
+
+struct packet_cb {
+ u64 nonce;
+ struct noise_keypair *keypair;
+ atomic_t state;
+ u32 mtu;
+ u8 ds;
+};
+
+#define PACKET_CB(skb) ((struct packet_cb *)((skb)->cb))
+#define PACKET_PEER(skb) (PACKET_CB(skb)->keypair->entry.peer)
+
+/* Returns either the correct skb->protocol value, or 0 if invalid. */
+static inline __be16 wg_skb_examine_untrusted_ip_hdr(struct sk_buff *skb)
+{
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct iphdr)) <=
+ skb_tail_pointer(skb) &&
+ ip_hdr(skb)->version == 4)
+ return htons(ETH_P_IP);
+ if (skb_network_header(skb) >= skb->head &&
+ (skb_network_header(skb) + sizeof(struct ipv6hdr)) <=
+ skb_tail_pointer(skb) &&
+ ipv6_hdr(skb)->version == 6)
+ return htons(ETH_P_IPV6);
+ return 0;
+}
+
+static inline void wg_reset_packet(struct sk_buff *skb)
+{
+ skb_scrub_packet(skb, true);
+ memset(&skb->headers_start, 0,
+ offsetof(struct sk_buff, headers_end) -
+ offsetof(struct sk_buff, headers_start));
+ skb->queue_mapping = 0;
+ skb->nohdr = 0;
+ skb->peeked = 0;
+ skb->mac_len = 0;
+ skb->dev = NULL;
+#ifdef CONFIG_NET_SCHED
+ skb->tc_index = 0;
+ skb_reset_tc(skb);
+#endif
+ skb->hdr_len = skb_headroom(skb);
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb_probe_transport_header(skb);
+ skb_reset_inner_headers(skb);
+}
+
+static inline int wg_cpumask_choose_online(int *stored_cpu, unsigned int id)
+{
+ unsigned int cpu = *stored_cpu, cpu_index, i;
+
+ if (unlikely(cpu == nr_cpumask_bits ||
+ !cpumask_test_cpu(cpu, cpu_online_mask))) {
+ cpu_index = id % cpumask_weight(cpu_online_mask);
+ cpu = cpumask_first(cpu_online_mask);
+ for (i = 0; i < cpu_index; ++i)
+ cpu = cpumask_next(cpu, cpu_online_mask);
+ *stored_cpu = cpu;
+ }
+ return cpu;
+}
+
+/* This function is racy, in the sense that next is unlocked, so it could return
+ * the same CPU twice. A race-free version of this would be to instead store an
+ * atomic sequence number, do an increment-and-return, and then iterate through
+ * every possible CPU until we get to that index -- choose_cpu. However that's
+ * a bit slower, and it doesn't seem like this potential race actually
+ * introduces any performance loss, so we live with it.
+ */
+static inline int wg_cpumask_next_online(int *next)
+{
+ int cpu = *next;
+
+ while (unlikely(!cpumask_test_cpu(cpu, cpu_online_mask)))
+ cpu = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+ *next = cpumask_next(cpu, cpu_online_mask) % nr_cpumask_bits;
+ return cpu;
+}
+
+static inline int wg_queue_enqueue_per_device_and_peer(
+ struct crypt_queue *device_queue, struct crypt_queue *peer_queue,
+ struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
+{
+ int cpu;
+
+ atomic_set_release(&PACKET_CB(skb)->state, PACKET_STATE_UNCRYPTED);
+ /* We first queue this up for the peer ingestion, but the consumer
+ * will wait for the state to change to CRYPTED or DEAD before.
+ */
+ if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
+ return -ENOSPC;
+ /* Then we queue it up in the device queue, which consumes the
+ * packet as soon as it can.
+ */
+ cpu = wg_cpumask_next_online(next_cpu);
+ if (unlikely(ptr_ring_produce_bh(&device_queue->ring, skb)))
+ return -EPIPE;
+ queue_work_on(cpu, wq, &per_cpu_ptr(device_queue->worker, cpu)->work);
+ return 0;
+}
+
+static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
+ struct sk_buff *skb,
+ enum packet_state state)
+{
+ /* We take a reference, because as soon as we call atomic_set, the
+ * peer can be freed from below us.
+ */
+ struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
+
+ atomic_set_release(&PACKET_CB(skb)->state, state);
+ queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
+ peer->internal_id),
+ peer->device->packet_crypt_wq, &queue->work);
+ wg_peer_put(peer);
+}
+
+static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
+ enum packet_state state)
+{
+ /* We take a reference, because as soon as we call atomic_set, the
+ * peer can be freed from below us.
+ */
+ struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
+
+ atomic_set_release(&PACKET_CB(skb)->state, state);
+ napi_schedule(&peer->napi);
+ wg_peer_put(peer);
+}
+
+#ifdef DEBUG
+bool wg_packet_counter_selftest(void);
+#endif
+
+#endif /* _WG_QUEUEING_H */
diff --git a/drivers/net/wireguard/ratelimiter.c b/drivers/net/wireguard/ratelimiter.c
new file mode 100644
index 000000000000..3fedd1d21f5e
--- /dev/null
+++ b/drivers/net/wireguard/ratelimiter.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "ratelimiter.h"
+#include <linux/siphash.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+
+static struct kmem_cache *entry_cache;
+static hsiphash_key_t key;
+static spinlock_t table_lock = __SPIN_LOCK_UNLOCKED("ratelimiter_table_lock");
+static DEFINE_MUTEX(init_lock);
+static u64 init_refcnt; /* Protected by init_lock, hence not atomic. */
+static atomic_t total_entries = ATOMIC_INIT(0);
+static unsigned int max_entries, table_size;
+static void wg_ratelimiter_gc_entries(struct work_struct *);
+static DECLARE_DEFERRABLE_WORK(gc_work, wg_ratelimiter_gc_entries);
+static struct hlist_head *table_v4;
+#if IS_ENABLED(CONFIG_IPV6)
+static struct hlist_head *table_v6;
+#endif
+
+struct ratelimiter_entry {
+ u64 last_time_ns, tokens, ip;
+ void *net;
+ spinlock_t lock;
+ struct hlist_node hash;
+ struct rcu_head rcu;
+};
+
+enum {
+ PACKETS_PER_SECOND = 20,
+ PACKETS_BURSTABLE = 5,
+ PACKET_COST = NSEC_PER_SEC / PACKETS_PER_SECOND,
+ TOKEN_MAX = PACKET_COST * PACKETS_BURSTABLE
+};
+
+static void entry_free(struct rcu_head *rcu)
+{
+ kmem_cache_free(entry_cache,
+ container_of(rcu, struct ratelimiter_entry, rcu));
+ atomic_dec(&total_entries);
+}
+
+static void entry_uninit(struct ratelimiter_entry *entry)
+{
+ hlist_del_rcu(&entry->hash);
+ call_rcu(&entry->rcu, entry_free);
+}
+
+/* Calling this function with a NULL work uninits all entries. */
+static void wg_ratelimiter_gc_entries(struct work_struct *work)
+{
+ const u64 now = ktime_get_coarse_boottime_ns();
+ struct ratelimiter_entry *entry;
+ struct hlist_node *temp;
+ unsigned int i;
+
+ for (i = 0; i < table_size; ++i) {
+ spin_lock(&table_lock);
+ hlist_for_each_entry_safe(entry, temp, &table_v4[i], hash) {
+ if (unlikely(!work) ||
+ now - entry->last_time_ns > NSEC_PER_SEC)
+ entry_uninit(entry);
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ hlist_for_each_entry_safe(entry, temp, &table_v6[i], hash) {
+ if (unlikely(!work) ||
+ now - entry->last_time_ns > NSEC_PER_SEC)
+ entry_uninit(entry);
+ }
+#endif
+ spin_unlock(&table_lock);
+ if (likely(work))
+ cond_resched();
+ }
+ if (likely(work))
+ queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
+}
+
+bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net)
+{
+ /* We only take the bottom half of the net pointer, so that we can hash
+ * 3 words in the end. This way, siphash's len param fits into the final
+ * u32, and we don't incur an extra round.
+ */
+ const u32 net_word = (unsigned long)net;
+ struct ratelimiter_entry *entry;
+ struct hlist_head *bucket;
+ u64 ip;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ ip = (u64 __force)ip_hdr(skb)->saddr;
+ bucket = &table_v4[hsiphash_2u32(net_word, ip, &key) &
+ (table_size - 1)];
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ /* Only use 64 bits, so as to ratelimit the whole /64. */
+ memcpy(&ip, &ipv6_hdr(skb)->saddr, sizeof(ip));
+ bucket = &table_v6[hsiphash_3u32(net_word, ip >> 32, ip, &key) &
+ (table_size - 1)];
+ }
+#endif
+ else
+ return false;
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(entry, bucket, hash) {
+ if (entry->net == net && entry->ip == ip) {
+ u64 now, tokens;
+ bool ret;
+ /* Quasi-inspired by nft_limit.c, but this is actually a
+ * slightly different algorithm. Namely, we incorporate
+ * the burst as part of the maximum tokens, rather than
+ * as part of the rate.
+ */
+ spin_lock(&entry->lock);
+ now = ktime_get_coarse_boottime_ns();
+ tokens = min_t(u64, TOKEN_MAX,
+ entry->tokens + now -
+ entry->last_time_ns);
+ entry->last_time_ns = now;
+ ret = tokens >= PACKET_COST;
+ entry->tokens = ret ? tokens - PACKET_COST : tokens;
+ spin_unlock(&entry->lock);
+ rcu_read_unlock();
+ return ret;
+ }
+ }
+ rcu_read_unlock();
+
+ if (atomic_inc_return(&total_entries) > max_entries)
+ goto err_oom;
+
+ entry = kmem_cache_alloc(entry_cache, GFP_KERNEL);
+ if (unlikely(!entry))
+ goto err_oom;
+
+ entry->net = net;
+ entry->ip = ip;
+ INIT_HLIST_NODE(&entry->hash);
+ spin_lock_init(&entry->lock);
+ entry->last_time_ns = ktime_get_coarse_boottime_ns();
+ entry->tokens = TOKEN_MAX - PACKET_COST;
+ spin_lock(&table_lock);
+ hlist_add_head_rcu(&entry->hash, bucket);
+ spin_unlock(&table_lock);
+ return true;
+
+err_oom:
+ atomic_dec(&total_entries);
+ return false;
+}
+
+int wg_ratelimiter_init(void)
+{
+ mutex_lock(&init_lock);
+ if (++init_refcnt != 1)
+ goto out;
+
+ entry_cache = KMEM_CACHE(ratelimiter_entry, 0);
+ if (!entry_cache)
+ goto err;
+
+ /* xt_hashlimit.c uses a slightly different algorithm for ratelimiting,
+ * but what it shares in common is that it uses a massive hashtable. So,
+ * we borrow their wisdom about good table sizes on different systems
+ * dependent on RAM. This calculation here comes from there.
+ */
+ table_size = (totalram_pages() > (1U << 30) / PAGE_SIZE) ? 8192 :
+ max_t(unsigned long, 16, roundup_pow_of_two(
+ (totalram_pages() << PAGE_SHIFT) /
+ (1U << 14) / sizeof(struct hlist_head)));
+ max_entries = table_size * 8;
+
+ table_v4 = kvzalloc(table_size * sizeof(*table_v4), GFP_KERNEL);
+ if (unlikely(!table_v4))
+ goto err_kmemcache;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ table_v6 = kvzalloc(table_size * sizeof(*table_v6), GFP_KERNEL);
+ if (unlikely(!table_v6)) {
+ kvfree(table_v4);
+ goto err_kmemcache;
+ }
+#endif
+
+ queue_delayed_work(system_power_efficient_wq, &gc_work, HZ);
+ get_random_bytes(&key, sizeof(key));
+out:
+ mutex_unlock(&init_lock);
+ return 0;
+
+err_kmemcache:
+ kmem_cache_destroy(entry_cache);
+err:
+ --init_refcnt;
+ mutex_unlock(&init_lock);
+ return -ENOMEM;
+}
+
+void wg_ratelimiter_uninit(void)
+{
+ mutex_lock(&init_lock);
+ if (!init_refcnt || --init_refcnt)
+ goto out;
+
+ cancel_delayed_work_sync(&gc_work);
+ wg_ratelimiter_gc_entries(NULL);
+ rcu_barrier();
+ kvfree(table_v4);
+#if IS_ENABLED(CONFIG_IPV6)
+ kvfree(table_v6);
+#endif
+ kmem_cache_destroy(entry_cache);
+out:
+ mutex_unlock(&init_lock);
+}
+
+#include "selftest/ratelimiter.c"
diff --git a/drivers/net/wireguard/ratelimiter.h b/drivers/net/wireguard/ratelimiter.h
new file mode 100644
index 000000000000..83067f71ea99
--- /dev/null
+++ b/drivers/net/wireguard/ratelimiter.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_RATELIMITER_H
+#define _WG_RATELIMITER_H
+
+#include <linux/skbuff.h>
+
+int wg_ratelimiter_init(void);
+void wg_ratelimiter_uninit(void);
+bool wg_ratelimiter_allow(struct sk_buff *skb, struct net *net);
+
+#ifdef DEBUG
+bool wg_ratelimiter_selftest(void);
+#endif
+
+#endif /* _WG_RATELIMITER_H */
diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c
new file mode 100644
index 000000000000..9c6bab9c981f
--- /dev/null
+++ b/drivers/net/wireguard/receive.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "device.h"
+#include "peer.h"
+#include "timers.h"
+#include "messages.h"
+#include "cookie.h"
+#include "socket.h"
+
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <net/ip_tunnels.h>
+
+/* Must be called with bh disabled. */
+static void update_rx_stats(struct wg_peer *peer, size_t len)
+{
+ struct pcpu_sw_netstats *tstats =
+ get_cpu_ptr(peer->device->dev->tstats);
+
+ u64_stats_update_begin(&tstats->syncp);
+ ++tstats->rx_packets;
+ tstats->rx_bytes += len;
+ peer->rx_bytes += len;
+ u64_stats_update_end(&tstats->syncp);
+ put_cpu_ptr(tstats);
+}
+
+#define SKB_TYPE_LE32(skb) (((struct message_header *)(skb)->data)->type)
+
+static size_t validate_header_len(struct sk_buff *skb)
+{
+ if (unlikely(skb->len < sizeof(struct message_header)))
+ return 0;
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_DATA) &&
+ skb->len >= MESSAGE_MINIMUM_LENGTH)
+ return sizeof(struct message_data);
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION) &&
+ skb->len == sizeof(struct message_handshake_initiation))
+ return sizeof(struct message_handshake_initiation);
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE) &&
+ skb->len == sizeof(struct message_handshake_response))
+ return sizeof(struct message_handshake_response);
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE) &&
+ skb->len == sizeof(struct message_handshake_cookie))
+ return sizeof(struct message_handshake_cookie);
+ return 0;
+}
+
+static int prepare_skb_header(struct sk_buff *skb, struct wg_device *wg)
+{
+ size_t data_offset, data_len, header_len;
+ struct udphdr *udp;
+
+ if (unlikely(wg_skb_examine_untrusted_ip_hdr(skb) != skb->protocol ||
+ skb_transport_header(skb) < skb->head ||
+ (skb_transport_header(skb) + sizeof(struct udphdr)) >
+ skb_tail_pointer(skb)))
+ return -EINVAL; /* Bogus IP header */
+ udp = udp_hdr(skb);
+ data_offset = (u8 *)udp - skb->data;
+ if (unlikely(data_offset > U16_MAX ||
+ data_offset + sizeof(struct udphdr) > skb->len))
+ /* Packet has offset at impossible location or isn't big enough
+ * to have UDP fields.
+ */
+ return -EINVAL;
+ data_len = ntohs(udp->len);
+ if (unlikely(data_len < sizeof(struct udphdr) ||
+ data_len > skb->len - data_offset))
+ /* UDP packet is reporting too small of a size or lying about
+ * its size.
+ */
+ return -EINVAL;
+ data_len -= sizeof(struct udphdr);
+ data_offset = (u8 *)udp + sizeof(struct udphdr) - skb->data;
+ if (unlikely(!pskb_may_pull(skb,
+ data_offset + sizeof(struct message_header)) ||
+ pskb_trim(skb, data_len + data_offset) < 0))
+ return -EINVAL;
+ skb_pull(skb, data_offset);
+ if (unlikely(skb->len != data_len))
+ /* Final len does not agree with calculated len */
+ return -EINVAL;
+ header_len = validate_header_len(skb);
+ if (unlikely(!header_len))
+ return -EINVAL;
+ __skb_push(skb, data_offset);
+ if (unlikely(!pskb_may_pull(skb, data_offset + header_len)))
+ return -EINVAL;
+ __skb_pull(skb, data_offset);
+ return 0;
+}
+
+static void wg_receive_handshake_packet(struct wg_device *wg,
+ struct sk_buff *skb)
+{
+ enum cookie_mac_state mac_state;
+ struct wg_peer *peer = NULL;
+ /* This is global, so that our load calculation applies to the whole
+ * system. We don't care about races with it at all.
+ */
+ static u64 last_under_load;
+ bool packet_needs_cookie;
+ bool under_load;
+
+ if (SKB_TYPE_LE32(skb) == cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE)) {
+ net_dbg_skb_ratelimited("%s: Receiving cookie response from %pISpfsc\n",
+ wg->dev->name, skb);
+ wg_cookie_message_consume(
+ (struct message_handshake_cookie *)skb->data, wg);
+ return;
+ }
+
+ under_load = skb_queue_len(&wg->incoming_handshakes) >=
+ MAX_QUEUED_INCOMING_HANDSHAKES / 8;
+ if (under_load)
+ last_under_load = ktime_get_coarse_boottime_ns();
+ else if (last_under_load)
+ under_load = !wg_birthdate_has_expired(last_under_load, 1);
+ mac_state = wg_cookie_validate_packet(&wg->cookie_checker, skb,
+ under_load);
+ if ((under_load && mac_state == VALID_MAC_WITH_COOKIE) ||
+ (!under_load && mac_state == VALID_MAC_BUT_NO_COOKIE)) {
+ packet_needs_cookie = false;
+ } else if (under_load && mac_state == VALID_MAC_BUT_NO_COOKIE) {
+ packet_needs_cookie = true;
+ } else {
+ net_dbg_skb_ratelimited("%s: Invalid MAC of handshake, dropping packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ return;
+ }
+
+ switch (SKB_TYPE_LE32(skb)) {
+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION): {
+ struct message_handshake_initiation *message =
+ (struct message_handshake_initiation *)skb->data;
+
+ if (packet_needs_cookie) {
+ wg_packet_send_handshake_cookie(wg, skb,
+ message->sender_index);
+ return;
+ }
+ peer = wg_noise_handshake_consume_initiation(message, wg);
+ if (unlikely(!peer)) {
+ net_dbg_skb_ratelimited("%s: Invalid handshake initiation from %pISpfsc\n",
+ wg->dev->name, skb);
+ return;
+ }
+ wg_socket_set_peer_endpoint_from_skb(peer, skb);
+ net_dbg_ratelimited("%s: Receiving handshake initiation from peer %llu (%pISpfsc)\n",
+ wg->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ wg_packet_send_handshake_response(peer);
+ break;
+ }
+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE): {
+ struct message_handshake_response *message =
+ (struct message_handshake_response *)skb->data;
+
+ if (packet_needs_cookie) {
+ wg_packet_send_handshake_cookie(wg, skb,
+ message->sender_index);
+ return;
+ }
+ peer = wg_noise_handshake_consume_response(message, wg);
+ if (unlikely(!peer)) {
+ net_dbg_skb_ratelimited("%s: Invalid handshake response from %pISpfsc\n",
+ wg->dev->name, skb);
+ return;
+ }
+ wg_socket_set_peer_endpoint_from_skb(peer, skb);
+ net_dbg_ratelimited("%s: Receiving handshake response from peer %llu (%pISpfsc)\n",
+ wg->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ if (wg_noise_handshake_begin_session(&peer->handshake,
+ &peer->keypairs)) {
+ wg_timers_session_derived(peer);
+ wg_timers_handshake_complete(peer);
+ /* Calling this function will either send any existing
+ * packets in the queue and not send a keepalive, which
+ * is the best case, Or, if there's nothing in the
+ * queue, it will send a keepalive, in order to give
+ * immediate confirmation of the session.
+ */
+ wg_packet_send_keepalive(peer);
+ }
+ break;
+ }
+ }
+
+ if (unlikely(!peer)) {
+ WARN(1, "Somehow a wrong type of packet wound up in the handshake queue!\n");
+ return;
+ }
+
+ local_bh_disable();
+ update_rx_stats(peer, skb->len);
+ local_bh_enable();
+
+ wg_timers_any_authenticated_packet_received(peer);
+ wg_timers_any_authenticated_packet_traversal(peer);
+ wg_peer_put(peer);
+}
+
+void wg_packet_handshake_receive_worker(struct work_struct *work)
+{
+ struct wg_device *wg = container_of(work, struct multicore_worker,
+ work)->ptr;
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&wg->incoming_handshakes)) != NULL) {
+ wg_receive_handshake_packet(wg, skb);
+ dev_kfree_skb(skb);
+ cond_resched();
+ }
+}
+
+static void keep_key_fresh(struct wg_peer *peer)
+{
+ struct noise_keypair *keypair;
+ bool send = false;
+
+ if (peer->sent_lastminute_handshake)
+ return;
+
+ rcu_read_lock_bh();
+ keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
+ if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
+ keypair->i_am_the_initiator &&
+ unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
+ REJECT_AFTER_TIME - KEEPALIVE_TIMEOUT - REKEY_TIMEOUT)))
+ send = true;
+ rcu_read_unlock_bh();
+
+ if (send) {
+ peer->sent_lastminute_handshake = true;
+ wg_packet_send_queued_handshake_initiation(peer, false);
+ }
+}
+
+static bool decrypt_packet(struct sk_buff *skb, struct noise_symmetric_key *key)
+{
+ struct scatterlist sg[MAX_SKB_FRAGS + 8];
+ struct sk_buff *trailer;
+ unsigned int offset;
+ int num_frags;
+
+ if (unlikely(!key))
+ return false;
+
+ if (unlikely(!READ_ONCE(key->is_valid) ||
+ wg_birthdate_has_expired(key->birthdate, REJECT_AFTER_TIME) ||
+ key->counter.receive.counter >= REJECT_AFTER_MESSAGES)) {
+ WRITE_ONCE(key->is_valid, false);
+ return false;
+ }
+
+ PACKET_CB(skb)->nonce =
+ le64_to_cpu(((struct message_data *)skb->data)->counter);
+
+ /* We ensure that the network header is part of the packet before we
+ * call skb_cow_data, so that there's no chance that data is removed
+ * from the skb, so that later we can extract the original endpoint.
+ */
+ offset = skb->data - skb_network_header(skb);
+ skb_push(skb, offset);
+ num_frags = skb_cow_data(skb, 0, &trailer);
+ offset += sizeof(struct message_data);
+ skb_pull(skb, offset);
+ if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
+ return false;
+
+ sg_init_table(sg, num_frags);
+ if (skb_to_sgvec(skb, sg, 0, skb->len) <= 0)
+ return false;
+
+ if (!chacha20poly1305_decrypt_sg_inplace(sg, skb->len, NULL, 0,
+ PACKET_CB(skb)->nonce,
+ key->key))
+ return false;
+
+ /* Another ugly situation of pushing and pulling the header so as to
+ * keep endpoint information intact.
+ */
+ skb_push(skb, offset);
+ if (pskb_trim(skb, skb->len - noise_encrypted_len(0)))
+ return false;
+ skb_pull(skb, offset);
+
+ return true;
+}
+
+/* This is RFC6479, a replay detection bitmap algorithm that avoids bitshifts */
+static bool counter_validate(union noise_counter *counter, u64 their_counter)
+{
+ unsigned long index, index_current, top, i;
+ bool ret = false;
+
+ spin_lock_bh(&counter->receive.lock);
+
+ if (unlikely(counter->receive.counter >= REJECT_AFTER_MESSAGES + 1 ||
+ their_counter >= REJECT_AFTER_MESSAGES))
+ goto out;
+
+ ++their_counter;
+
+ if (unlikely((COUNTER_WINDOW_SIZE + their_counter) <
+ counter->receive.counter))
+ goto out;
+
+ index = their_counter >> ilog2(BITS_PER_LONG);
+
+ if (likely(their_counter > counter->receive.counter)) {
+ index_current = counter->receive.counter >> ilog2(BITS_PER_LONG);
+ top = min_t(unsigned long, index - index_current,
+ COUNTER_BITS_TOTAL / BITS_PER_LONG);
+ for (i = 1; i <= top; ++i)
+ counter->receive.backtrack[(i + index_current) &
+ ((COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1)] = 0;
+ counter->receive.counter = their_counter;
+ }
+
+ index &= (COUNTER_BITS_TOTAL / BITS_PER_LONG) - 1;
+ ret = !test_and_set_bit(their_counter & (BITS_PER_LONG - 1),
+ &counter->receive.backtrack[index]);
+
+out:
+ spin_unlock_bh(&counter->receive.lock);
+ return ret;
+}
+
+#include "selftest/counter.c"
+
+static void wg_packet_consume_data_done(struct wg_peer *peer,
+ struct sk_buff *skb,
+ struct endpoint *endpoint)
+{
+ struct net_device *dev = peer->device->dev;
+ unsigned int len, len_before_trim;
+ struct wg_peer *routed_peer;
+
+ wg_socket_set_peer_endpoint(peer, endpoint);
+
+ if (unlikely(wg_noise_received_with_keypair(&peer->keypairs,
+ PACKET_CB(skb)->keypair))) {
+ wg_timers_handshake_complete(peer);
+ wg_packet_send_staged_packets(peer);
+ }
+
+ keep_key_fresh(peer);
+
+ wg_timers_any_authenticated_packet_received(peer);
+ wg_timers_any_authenticated_packet_traversal(peer);
+
+ /* A packet with length 0 is a keepalive packet */
+ if (unlikely(!skb->len)) {
+ update_rx_stats(peer, message_data_len(0));
+ net_dbg_ratelimited("%s: Receiving keepalive packet from peer %llu (%pISpfsc)\n",
+ dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ goto packet_processed;
+ }
+
+ wg_timers_data_received(peer);
+
+ if (unlikely(skb_network_header(skb) < skb->head))
+ goto dishonest_packet_size;
+ if (unlikely(!(pskb_network_may_pull(skb, sizeof(struct iphdr)) &&
+ (ip_hdr(skb)->version == 4 ||
+ (ip_hdr(skb)->version == 6 &&
+ pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))))))
+ goto dishonest_packet_type;
+
+ skb->dev = dev;
+ /* We've already verified the Poly1305 auth tag, which means this packet
+ * was not modified in transit. We can therefore tell the networking
+ * stack that all checksums of every layer of encapsulation have already
+ * been checked "by the hardware" and therefore is unnecessary to check
+ * again in software.
+ */
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ skb->csum_level = ~0; /* All levels */
+ skb->protocol = wg_skb_examine_untrusted_ip_hdr(skb);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ len = ntohs(ip_hdr(skb)->tot_len);
+ if (unlikely(len < sizeof(struct iphdr)))
+ goto dishonest_packet_size;
+ if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
+ IP_ECN_set_ce(ip_hdr(skb));
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ len = ntohs(ipv6_hdr(skb)->payload_len) +
+ sizeof(struct ipv6hdr);
+ if (INET_ECN_is_ce(PACKET_CB(skb)->ds))
+ IP6_ECN_set_ce(skb, ipv6_hdr(skb));
+ } else {
+ goto dishonest_packet_type;
+ }
+
+ if (unlikely(len > skb->len))
+ goto dishonest_packet_size;
+ len_before_trim = skb->len;
+ if (unlikely(pskb_trim(skb, len)))
+ goto packet_processed;
+
+ routed_peer = wg_allowedips_lookup_src(&peer->device->peer_allowedips,
+ skb);
+ wg_peer_put(routed_peer); /* We don't need the extra reference. */
+
+ if (unlikely(routed_peer != peer))
+ goto dishonest_packet_peer;
+
+ if (unlikely(napi_gro_receive(&peer->napi, skb) == GRO_DROP)) {
+ ++dev->stats.rx_dropped;
+ net_dbg_ratelimited("%s: Failed to give packet to userspace from peer %llu (%pISpfsc)\n",
+ dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ } else {
+ update_rx_stats(peer, message_data_len(len_before_trim));
+ }
+ return;
+
+dishonest_packet_peer:
+ net_dbg_skb_ratelimited("%s: Packet has unallowed src IP (%pISc) from peer %llu (%pISpfsc)\n",
+ dev->name, skb, peer->internal_id,
+ &peer->endpoint.addr);
+ ++dev->stats.rx_errors;
+ ++dev->stats.rx_frame_errors;
+ goto packet_processed;
+dishonest_packet_type:
+ net_dbg_ratelimited("%s: Packet is neither ipv4 nor ipv6 from peer %llu (%pISpfsc)\n",
+ dev->name, peer->internal_id, &peer->endpoint.addr);
+ ++dev->stats.rx_errors;
+ ++dev->stats.rx_frame_errors;
+ goto packet_processed;
+dishonest_packet_size:
+ net_dbg_ratelimited("%s: Packet has incorrect size from peer %llu (%pISpfsc)\n",
+ dev->name, peer->internal_id, &peer->endpoint.addr);
+ ++dev->stats.rx_errors;
+ ++dev->stats.rx_length_errors;
+ goto packet_processed;
+packet_processed:
+ dev_kfree_skb(skb);
+}
+
+int wg_packet_rx_poll(struct napi_struct *napi, int budget)
+{
+ struct wg_peer *peer = container_of(napi, struct wg_peer, napi);
+ struct crypt_queue *queue = &peer->rx_queue;
+ struct noise_keypair *keypair;
+ struct endpoint endpoint;
+ enum packet_state state;
+ struct sk_buff *skb;
+ int work_done = 0;
+ bool free;
+
+ if (unlikely(budget <= 0))
+ return 0;
+
+ while ((skb = __ptr_ring_peek(&queue->ring)) != NULL &&
+ (state = atomic_read_acquire(&PACKET_CB(skb)->state)) !=
+ PACKET_STATE_UNCRYPTED) {
+ __ptr_ring_discard_one(&queue->ring);
+ peer = PACKET_PEER(skb);
+ keypair = PACKET_CB(skb)->keypair;
+ free = true;
+
+ if (unlikely(state != PACKET_STATE_CRYPTED))
+ goto next;
+
+ if (unlikely(!counter_validate(&keypair->receiving.counter,
+ PACKET_CB(skb)->nonce))) {
+ net_dbg_ratelimited("%s: Packet has invalid nonce %llu (max %llu)\n",
+ peer->device->dev->name,
+ PACKET_CB(skb)->nonce,
+ keypair->receiving.counter.receive.counter);
+ goto next;
+ }
+
+ if (unlikely(wg_socket_endpoint_from_skb(&endpoint, skb)))
+ goto next;
+
+ wg_reset_packet(skb);
+ wg_packet_consume_data_done(peer, skb, &endpoint);
+ free = false;
+
+next:
+ wg_noise_keypair_put(keypair, false);
+ wg_peer_put(peer);
+ if (unlikely(free))
+ dev_kfree_skb(skb);
+
+ if (++work_done >= budget)
+ break;
+ }
+
+ if (work_done < budget)
+ napi_complete_done(napi, work_done);
+
+ return work_done;
+}
+
+void wg_packet_decrypt_worker(struct work_struct *work)
+{
+ struct crypt_queue *queue = container_of(work, struct multicore_worker,
+ work)->ptr;
+ struct sk_buff *skb;
+
+ while ((skb = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+ enum packet_state state = likely(decrypt_packet(skb,
+ &PACKET_CB(skb)->keypair->receiving)) ?
+ PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
+ wg_queue_enqueue_per_peer_napi(skb, state);
+ }
+}
+
+static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
+{
+ __le32 idx = ((struct message_data *)skb->data)->key_idx;
+ struct wg_peer *peer = NULL;
+ int ret;
+
+ rcu_read_lock_bh();
+ PACKET_CB(skb)->keypair =
+ (struct noise_keypair *)wg_index_hashtable_lookup(
+ wg->index_hashtable, INDEX_HASHTABLE_KEYPAIR, idx,
+ &peer);
+ if (unlikely(!wg_noise_keypair_get(PACKET_CB(skb)->keypair)))
+ goto err_keypair;
+
+ if (unlikely(READ_ONCE(peer->is_dead)))
+ goto err;
+
+ ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue,
+ &peer->rx_queue, skb,
+ wg->packet_crypt_wq,
+ &wg->decrypt_queue.last_cpu);
+ if (unlikely(ret == -EPIPE))
+ wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD);
+ if (likely(!ret || ret == -EPIPE)) {
+ rcu_read_unlock_bh();
+ return;
+ }
+err:
+ wg_noise_keypair_put(PACKET_CB(skb)->keypair, false);
+err_keypair:
+ rcu_read_unlock_bh();
+ wg_peer_put(peer);
+ dev_kfree_skb(skb);
+}
+
+void wg_packet_receive(struct wg_device *wg, struct sk_buff *skb)
+{
+ if (unlikely(prepare_skb_header(skb, wg) < 0))
+ goto err;
+ switch (SKB_TYPE_LE32(skb)) {
+ case cpu_to_le32(MESSAGE_HANDSHAKE_INITIATION):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_RESPONSE):
+ case cpu_to_le32(MESSAGE_HANDSHAKE_COOKIE): {
+ int cpu;
+
+ if (skb_queue_len(&wg->incoming_handshakes) >
+ MAX_QUEUED_INCOMING_HANDSHAKES ||
+ unlikely(!rng_is_initialized())) {
+ net_dbg_skb_ratelimited("%s: Dropping handshake packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ goto err;
+ }
+ skb_queue_tail(&wg->incoming_handshakes, skb);
+ /* Queues up a call to packet_process_queued_handshake_
+ * packets(skb):
+ */
+ cpu = wg_cpumask_next_online(&wg->incoming_handshake_cpu);
+ queue_work_on(cpu, wg->handshake_receive_wq,
+ &per_cpu_ptr(wg->incoming_handshakes_worker, cpu)->work);
+ break;
+ }
+ case cpu_to_le32(MESSAGE_DATA):
+ PACKET_CB(skb)->ds = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
+ wg_packet_consume_data(wg, skb);
+ break;
+ default:
+ net_dbg_skb_ratelimited("%s: Invalid packet from %pISpfsc\n",
+ wg->dev->name, skb);
+ goto err;
+ }
+ return;
+
+err:
+ dev_kfree_skb(skb);
+}
diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c
new file mode 100644
index 000000000000..846db14cb046
--- /dev/null
+++ b/drivers/net/wireguard/selftest/allowedips.c
@@ -0,0 +1,683 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ *
+ * This contains some basic static unit tests for the allowedips data structure.
+ * It also has two additional modes that are disabled and meant to be used by
+ * folks directly playing with this file. If you define the macro
+ * DEBUG_PRINT_TRIE_GRAPHVIZ to be 1, then every time there's a full tree in
+ * memory, it will be printed out as KERN_DEBUG in a format that can be passed
+ * to graphviz (the dot command) to visualize it. If you define the macro
+ * DEBUG_RANDOM_TRIE to be 1, then there will be an extremely costly set of
+ * randomized tests done against a trivial implementation, which may take
+ * upwards of a half-hour to complete. There's no set of users who should be
+ * enabling these, and the only developers that should go anywhere near these
+ * nobs are the ones who are reading this comment.
+ */
+
+#ifdef DEBUG
+
+#include <linux/siphash.h>
+
+static __init void swap_endian_and_apply_cidr(u8 *dst, const u8 *src, u8 bits,
+ u8 cidr)
+{
+ swap_endian(dst, src, bits);
+ memset(dst + (cidr + 7) / 8, 0, bits / 8 - (cidr + 7) / 8);
+ if (cidr)
+ dst[(cidr + 7) / 8 - 1] &= ~0U << ((8 - (cidr % 8)) % 8);
+}
+
+static __init void print_node(struct allowedips_node *node, u8 bits)
+{
+ char *fmt_connection = KERN_DEBUG "\t\"%p/%d\" -> \"%p/%d\";\n";
+ char *fmt_declaration = KERN_DEBUG
+ "\t\"%p/%d\"[style=%s, color=\"#%06x\"];\n";
+ char *style = "dotted";
+ u8 ip1[16], ip2[16];
+ u32 color = 0;
+
+ if (bits == 32) {
+ fmt_connection = KERN_DEBUG "\t\"%pI4/%d\" -> \"%pI4/%d\";\n";
+ fmt_declaration = KERN_DEBUG
+ "\t\"%pI4/%d\"[style=%s, color=\"#%06x\"];\n";
+ } else if (bits == 128) {
+ fmt_connection = KERN_DEBUG "\t\"%pI6/%d\" -> \"%pI6/%d\";\n";
+ fmt_declaration = KERN_DEBUG
+ "\t\"%pI6/%d\"[style=%s, color=\"#%06x\"];\n";
+ }
+ if (node->peer) {
+ hsiphash_key_t key = { { 0 } };
+
+ memcpy(&key, &node->peer, sizeof(node->peer));
+ color = hsiphash_1u32(0xdeadbeef, &key) % 200 << 16 |
+ hsiphash_1u32(0xbabecafe, &key) % 200 << 8 |
+ hsiphash_1u32(0xabad1dea, &key) % 200;
+ style = "bold";
+ }
+ swap_endian_and_apply_cidr(ip1, node->bits, bits, node->cidr);
+ printk(fmt_declaration, ip1, node->cidr, style, color);
+ if (node->bit[0]) {
+ swap_endian_and_apply_cidr(ip2,
+ rcu_dereference_raw(node->bit[0])->bits, bits,
+ node->cidr);
+ printk(fmt_connection, ip1, node->cidr, ip2,
+ rcu_dereference_raw(node->bit[0])->cidr);
+ print_node(rcu_dereference_raw(node->bit[0]), bits);
+ }
+ if (node->bit[1]) {
+ swap_endian_and_apply_cidr(ip2,
+ rcu_dereference_raw(node->bit[1])->bits,
+ bits, node->cidr);
+ printk(fmt_connection, ip1, node->cidr, ip2,
+ rcu_dereference_raw(node->bit[1])->cidr);
+ print_node(rcu_dereference_raw(node->bit[1]), bits);
+ }
+}
+
+static __init void print_tree(struct allowedips_node __rcu *top, u8 bits)
+{
+ printk(KERN_DEBUG "digraph trie {\n");
+ print_node(rcu_dereference_raw(top), bits);
+ printk(KERN_DEBUG "}\n");
+}
+
+enum {
+ NUM_PEERS = 2000,
+ NUM_RAND_ROUTES = 400,
+ NUM_MUTATED_ROUTES = 100,
+ NUM_QUERIES = NUM_RAND_ROUTES * NUM_MUTATED_ROUTES * 30
+};
+
+struct horrible_allowedips {
+ struct hlist_head head;
+};
+
+struct horrible_allowedips_node {
+ struct hlist_node table;
+ union nf_inet_addr ip;
+ union nf_inet_addr mask;
+ u8 ip_version;
+ void *value;
+};
+
+static __init void horrible_allowedips_init(struct horrible_allowedips *table)
+{
+ INIT_HLIST_HEAD(&table->head);
+}
+
+static __init void horrible_allowedips_free(struct horrible_allowedips *table)
+{
+ struct horrible_allowedips_node *node;
+ struct hlist_node *h;
+
+ hlist_for_each_entry_safe(node, h, &table->head, table) {
+ hlist_del(&node->table);
+ kfree(node);
+ }
+}
+
+static __init inline union nf_inet_addr horrible_cidr_to_mask(u8 cidr)
+{
+ union nf_inet_addr mask;
+
+ memset(&mask, 0x00, 128 / 8);
+ memset(&mask, 0xff, cidr / 8);
+ if (cidr % 32)
+ mask.all[cidr / 32] = (__force u32)htonl(
+ (0xFFFFFFFFUL << (32 - (cidr % 32))) & 0xFFFFFFFFUL);
+ return mask;
+}
+
+static __init inline u8 horrible_mask_to_cidr(union nf_inet_addr subnet)
+{
+ return hweight32(subnet.all[0]) + hweight32(subnet.all[1]) +
+ hweight32(subnet.all[2]) + hweight32(subnet.all[3]);
+}
+
+static __init inline void
+horrible_mask_self(struct horrible_allowedips_node *node)
+{
+ if (node->ip_version == 4) {
+ node->ip.ip &= node->mask.ip;
+ } else if (node->ip_version == 6) {
+ node->ip.ip6[0] &= node->mask.ip6[0];
+ node->ip.ip6[1] &= node->mask.ip6[1];
+ node->ip.ip6[2] &= node->mask.ip6[2];
+ node->ip.ip6[3] &= node->mask.ip6[3];
+ }
+}
+
+static __init inline bool
+horrible_match_v4(const struct horrible_allowedips_node *node,
+ struct in_addr *ip)
+{
+ return (ip->s_addr & node->mask.ip) == node->ip.ip;
+}
+
+static __init inline bool
+horrible_match_v6(const struct horrible_allowedips_node *node,
+ struct in6_addr *ip)
+{
+ return (ip->in6_u.u6_addr32[0] & node->mask.ip6[0]) ==
+ node->ip.ip6[0] &&
+ (ip->in6_u.u6_addr32[1] & node->mask.ip6[1]) ==
+ node->ip.ip6[1] &&
+ (ip->in6_u.u6_addr32[2] & node->mask.ip6[2]) ==
+ node->ip.ip6[2] &&
+ (ip->in6_u.u6_addr32[3] & node->mask.ip6[3]) == node->ip.ip6[3];
+}
+
+static __init void
+horrible_insert_ordered(struct horrible_allowedips *table,
+ struct horrible_allowedips_node *node)
+{
+ struct horrible_allowedips_node *other = NULL, *where = NULL;
+ u8 my_cidr = horrible_mask_to_cidr(node->mask);
+
+ hlist_for_each_entry(other, &table->head, table) {
+ if (!memcmp(&other->mask, &node->mask,
+ sizeof(union nf_inet_addr)) &&
+ !memcmp(&other->ip, &node->ip,
+ sizeof(union nf_inet_addr)) &&
+ other->ip_version == node->ip_version) {
+ other->value = node->value;
+ kfree(node);
+ return;
+ }
+ where = other;
+ if (horrible_mask_to_cidr(other->mask) <= my_cidr)
+ break;
+ }
+ if (!other && !where)
+ hlist_add_head(&node->table, &table->head);
+ else if (!other)
+ hlist_add_behind(&node->table, &where->table);
+ else
+ hlist_add_before(&node->table, &where->table);
+}
+
+static __init int
+horrible_allowedips_insert_v4(struct horrible_allowedips *table,
+ struct in_addr *ip, u8 cidr, void *value)
+{
+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
+ GFP_KERNEL);
+
+ if (unlikely(!node))
+ return -ENOMEM;
+ node->ip.in = *ip;
+ node->mask = horrible_cidr_to_mask(cidr);
+ node->ip_version = 4;
+ node->value = value;
+ horrible_mask_self(node);
+ horrible_insert_ordered(table, node);
+ return 0;
+}
+
+static __init int
+horrible_allowedips_insert_v6(struct horrible_allowedips *table,
+ struct in6_addr *ip, u8 cidr, void *value)
+{
+ struct horrible_allowedips_node *node = kzalloc(sizeof(*node),
+ GFP_KERNEL);
+
+ if (unlikely(!node))
+ return -ENOMEM;
+ node->ip.in6 = *ip;
+ node->mask = horrible_cidr_to_mask(cidr);
+ node->ip_version = 6;
+ node->value = value;
+ horrible_mask_self(node);
+ horrible_insert_ordered(table, node);
+ return 0;
+}
+
+static __init void *
+horrible_allowedips_lookup_v4(struct horrible_allowedips *table,
+ struct in_addr *ip)
+{
+ struct horrible_allowedips_node *node;
+ void *ret = NULL;
+
+ hlist_for_each_entry(node, &table->head, table) {
+ if (node->ip_version != 4)
+ continue;
+ if (horrible_match_v4(node, ip)) {
+ ret = node->value;
+ break;
+ }
+ }
+ return ret;
+}
+
+static __init void *
+horrible_allowedips_lookup_v6(struct horrible_allowedips *table,
+ struct in6_addr *ip)
+{
+ struct horrible_allowedips_node *node;
+ void *ret = NULL;
+
+ hlist_for_each_entry(node, &table->head, table) {
+ if (node->ip_version != 6)
+ continue;
+ if (horrible_match_v6(node, ip)) {
+ ret = node->value;
+ break;
+ }
+ }
+ return ret;
+}
+
+static __init bool randomized_test(void)
+{
+ unsigned int i, j, k, mutate_amount, cidr;
+ u8 ip[16], mutate_mask[16], mutated[16];
+ struct wg_peer **peers, *peer;
+ struct horrible_allowedips h;
+ DEFINE_MUTEX(mutex);
+ struct allowedips t;
+ bool ret = false;
+
+ mutex_init(&mutex);
+
+ wg_allowedips_init(&t);
+ horrible_allowedips_init(&h);
+
+ peers = kcalloc(NUM_PEERS, sizeof(*peers), GFP_KERNEL);
+ if (unlikely(!peers)) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free;
+ }
+ for (i = 0; i < NUM_PEERS; ++i) {
+ peers[i] = kzalloc(sizeof(*peers[i]), GFP_KERNEL);
+ if (unlikely(!peers[i])) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free;
+ }
+ kref_init(&peers[i]->refcount);
+ }
+
+ mutex_lock(&mutex);
+
+ for (i = 0; i < NUM_RAND_ROUTES; ++i) {
+ prandom_bytes(ip, 4);
+ cidr = prandom_u32_max(32) + 1;
+ peer = peers[prandom_u32_max(NUM_PEERS)];
+ if (wg_allowedips_insert_v4(&t, (struct in_addr *)ip, cidr,
+ peer, &mutex) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ if (horrible_allowedips_insert_v4(&h, (struct in_addr *)ip,
+ cidr, peer) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
+ memcpy(mutated, ip, 4);
+ prandom_bytes(mutate_mask, 4);
+ mutate_amount = prandom_u32_max(32);
+ for (k = 0; k < mutate_amount / 8; ++k)
+ mutate_mask[k] = 0xff;
+ mutate_mask[k] = 0xff
+ << ((8 - (mutate_amount % 8)) % 8);
+ for (; k < 4; ++k)
+ mutate_mask[k] = 0;
+ for (k = 0; k < 4; ++k)
+ mutated[k] = (mutated[k] & mutate_mask[k]) |
+ (~mutate_mask[k] &
+ prandom_u32_max(256));
+ cidr = prandom_u32_max(32) + 1;
+ peer = peers[prandom_u32_max(NUM_PEERS)];
+ if (wg_allowedips_insert_v4(&t,
+ (struct in_addr *)mutated,
+ cidr, peer, &mutex) < 0) {
+ pr_err("allowedips random malloc: FAIL\n");
+ goto free_locked;
+ }
+ if (horrible_allowedips_insert_v4(&h,
+ (struct in_addr *)mutated, cidr, peer)) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ }
+ }
+
+ for (i = 0; i < NUM_RAND_ROUTES; ++i) {
+ prandom_bytes(ip, 16);
+ cidr = prandom_u32_max(128) + 1;
+ peer = peers[prandom_u32_max(NUM_PEERS)];
+ if (wg_allowedips_insert_v6(&t, (struct in6_addr *)ip, cidr,
+ peer, &mutex) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ if (horrible_allowedips_insert_v6(&h, (struct in6_addr *)ip,
+ cidr, peer) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ for (j = 0; j < NUM_MUTATED_ROUTES; ++j) {
+ memcpy(mutated, ip, 16);
+ prandom_bytes(mutate_mask, 16);
+ mutate_amount = prandom_u32_max(128);
+ for (k = 0; k < mutate_amount / 8; ++k)
+ mutate_mask[k] = 0xff;
+ mutate_mask[k] = 0xff
+ << ((8 - (mutate_amount % 8)) % 8);
+ for (; k < 4; ++k)
+ mutate_mask[k] = 0;
+ for (k = 0; k < 4; ++k)
+ mutated[k] = (mutated[k] & mutate_mask[k]) |
+ (~mutate_mask[k] &
+ prandom_u32_max(256));
+ cidr = prandom_u32_max(128) + 1;
+ peer = peers[prandom_u32_max(NUM_PEERS)];
+ if (wg_allowedips_insert_v6(&t,
+ (struct in6_addr *)mutated,
+ cidr, peer, &mutex) < 0) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ if (horrible_allowedips_insert_v6(
+ &h, (struct in6_addr *)mutated, cidr,
+ peer)) {
+ pr_err("allowedips random self-test malloc: FAIL\n");
+ goto free_locked;
+ }
+ }
+ }
+
+ mutex_unlock(&mutex);
+
+ if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
+ print_tree(t.root4, 32);
+ print_tree(t.root6, 128);
+ }
+
+ for (i = 0; i < NUM_QUERIES; ++i) {
+ prandom_bytes(ip, 4);
+ if (lookup(t.root4, 32, ip) !=
+ horrible_allowedips_lookup_v4(&h, (struct in_addr *)ip)) {
+ pr_err("allowedips random self-test: FAIL\n");
+ goto free;
+ }
+ }
+
+ for (i = 0; i < NUM_QUERIES; ++i) {
+ prandom_bytes(ip, 16);
+ if (lookup(t.root6, 128, ip) !=
+ horrible_allowedips_lookup_v6(&h, (struct in6_addr *)ip)) {
+ pr_err("allowedips random self-test: FAIL\n");
+ goto free;
+ }
+ }
+ ret = true;
+
+free:
+ mutex_lock(&mutex);
+free_locked:
+ wg_allowedips_free(&t, &mutex);
+ mutex_unlock(&mutex);
+ horrible_allowedips_free(&h);
+ if (peers) {
+ for (i = 0; i < NUM_PEERS; ++i)
+ kfree(peers[i]);
+ }
+ kfree(peers);
+ return ret;
+}
+
+static __init inline struct in_addr *ip4(u8 a, u8 b, u8 c, u8 d)
+{
+ static struct in_addr ip;
+ u8 *split = (u8 *)&ip;
+
+ split[0] = a;
+ split[1] = b;
+ split[2] = c;
+ split[3] = d;
+ return &ip;
+}
+
+static __init inline struct in6_addr *ip6(u32 a, u32 b, u32 c, u32 d)
+{
+ static struct in6_addr ip;
+ __be32 *split = (__be32 *)&ip;
+
+ split[0] = cpu_to_be32(a);
+ split[1] = cpu_to_be32(b);
+ split[2] = cpu_to_be32(c);
+ split[3] = cpu_to_be32(d);
+ return &ip;
+}
+
+static __init struct wg_peer *init_peer(void)
+{
+ struct wg_peer *peer = kzalloc(sizeof(*peer), GFP_KERNEL);
+
+ if (!peer)
+ return NULL;
+ kref_init(&peer->refcount);
+ INIT_LIST_HEAD(&peer->allowedips_list);
+ return peer;
+}
+
+#define insert(version, mem, ipa, ipb, ipc, ipd, cidr) \
+ wg_allowedips_insert_v##version(&t, ip##version(ipa, ipb, ipc, ipd), \
+ cidr, mem, &mutex)
+
+#define maybe_fail() do { \
+ ++i; \
+ if (!_s) { \
+ pr_info("allowedips self-test %zu: FAIL\n", i); \
+ success = false; \
+ } \
+ } while (0)
+
+#define test(version, mem, ipa, ipb, ipc, ipd) do { \
+ bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
+ ip##version(ipa, ipb, ipc, ipd)) == (mem); \
+ maybe_fail(); \
+ } while (0)
+
+#define test_negative(version, mem, ipa, ipb, ipc, ipd) do { \
+ bool _s = lookup(t.root##version, (version) == 4 ? 32 : 128, \
+ ip##version(ipa, ipb, ipc, ipd)) != (mem); \
+ maybe_fail(); \
+ } while (0)
+
+#define test_boolean(cond) do { \
+ bool _s = (cond); \
+ maybe_fail(); \
+ } while (0)
+
+bool __init wg_allowedips_selftest(void)
+{
+ bool found_a = false, found_b = false, found_c = false, found_d = false,
+ found_e = false, found_other = false;
+ struct wg_peer *a = init_peer(), *b = init_peer(), *c = init_peer(),
+ *d = init_peer(), *e = init_peer(), *f = init_peer(),
+ *g = init_peer(), *h = init_peer();
+ struct allowedips_node *iter_node;
+ bool success = false;
+ struct allowedips t;
+ DEFINE_MUTEX(mutex);
+ struct in6_addr ip;
+ size_t i = 0, count = 0;
+ __be64 part;
+
+ mutex_init(&mutex);
+ mutex_lock(&mutex);
+ wg_allowedips_init(&t);
+
+ if (!a || !b || !c || !d || !e || !f || !g || !h) {
+ pr_err("allowedips self-test malloc: FAIL\n");
+ goto free;
+ }
+
+ insert(4, a, 192, 168, 4, 0, 24);
+ insert(4, b, 192, 168, 4, 4, 32);
+ insert(4, c, 192, 168, 0, 0, 16);
+ insert(4, d, 192, 95, 5, 64, 27);
+ /* replaces previous entry, and maskself is required */
+ insert(4, c, 192, 95, 5, 65, 27);
+ insert(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
+ insert(6, c, 0x26075300, 0x60006b00, 0, 0, 64);
+ insert(4, e, 0, 0, 0, 0, 0);
+ insert(6, e, 0, 0, 0, 0, 0);
+ /* replaces previous entry */
+ insert(6, f, 0, 0, 0, 0, 0);
+ insert(6, g, 0x24046800, 0, 0, 0, 32);
+ /* maskself is required */
+ insert(6, h, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 64);
+ insert(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef, 128);
+ insert(6, c, 0x24446800, 0x40e40800, 0xdeaebeef, 0xdefbeef, 128);
+ insert(6, b, 0x24446800, 0xf0e40800, 0xeeaebeef, 0, 98);
+ insert(4, g, 64, 15, 112, 0, 20);
+ /* maskself is required */
+ insert(4, h, 64, 15, 123, 211, 25);
+ insert(4, a, 10, 0, 0, 0, 25);
+ insert(4, b, 10, 0, 0, 128, 25);
+ insert(4, a, 10, 1, 0, 0, 30);
+ insert(4, b, 10, 1, 0, 4, 30);
+ insert(4, c, 10, 1, 0, 8, 29);
+ insert(4, d, 10, 1, 0, 16, 29);
+
+ if (IS_ENABLED(DEBUG_PRINT_TRIE_GRAPHVIZ)) {
+ print_tree(t.root4, 32);
+ print_tree(t.root6, 128);
+ }
+
+ success = true;
+
+ test(4, a, 192, 168, 4, 20);
+ test(4, a, 192, 168, 4, 0);
+ test(4, b, 192, 168, 4, 4);
+ test(4, c, 192, 168, 200, 182);
+ test(4, c, 192, 95, 5, 68);
+ test(4, e, 192, 95, 5, 96);
+ test(6, d, 0x26075300, 0x60006b00, 0, 0xc05f0543);
+ test(6, c, 0x26075300, 0x60006b00, 0, 0xc02e01ee);
+ test(6, f, 0x26075300, 0x60006b01, 0, 0);
+ test(6, g, 0x24046800, 0x40040806, 0, 0x1006);
+ test(6, g, 0x24046800, 0x40040806, 0x1234, 0x5678);
+ test(6, f, 0x240467ff, 0x40040806, 0x1234, 0x5678);
+ test(6, f, 0x24046801, 0x40040806, 0x1234, 0x5678);
+ test(6, h, 0x24046800, 0x40040800, 0x1234, 0x5678);
+ test(6, h, 0x24046800, 0x40040800, 0, 0);
+ test(6, h, 0x24046800, 0x40040800, 0x10101010, 0x10101010);
+ test(6, a, 0x24046800, 0x40040800, 0xdeadbeef, 0xdeadbeef);
+ test(4, g, 64, 15, 116, 26);
+ test(4, g, 64, 15, 127, 3);
+ test(4, g, 64, 15, 123, 1);
+ test(4, h, 64, 15, 123, 128);
+ test(4, h, 64, 15, 123, 129);
+ test(4, a, 10, 0, 0, 52);
+ test(4, b, 10, 0, 0, 220);
+ test(4, a, 10, 1, 0, 2);
+ test(4, b, 10, 1, 0, 6);
+ test(4, c, 10, 1, 0, 10);
+ test(4, d, 10, 1, 0, 20);
+
+ insert(4, a, 1, 0, 0, 0, 32);
+ insert(4, a, 64, 0, 0, 0, 32);
+ insert(4, a, 128, 0, 0, 0, 32);
+ insert(4, a, 192, 0, 0, 0, 32);
+ insert(4, a, 255, 0, 0, 0, 32);
+ wg_allowedips_remove_by_peer(&t, a, &mutex);
+ test_negative(4, a, 1, 0, 0, 0);
+ test_negative(4, a, 64, 0, 0, 0);
+ test_negative(4, a, 128, 0, 0, 0);
+ test_negative(4, a, 192, 0, 0, 0);
+ test_negative(4, a, 255, 0, 0, 0);
+
+ wg_allowedips_free(&t, &mutex);
+ wg_allowedips_init(&t);
+ insert(4, a, 192, 168, 0, 0, 16);
+ insert(4, a, 192, 168, 0, 0, 24);
+ wg_allowedips_remove_by_peer(&t, a, &mutex);
+ test_negative(4, a, 192, 168, 0, 1);
+
+ /* These will hit the WARN_ON(len >= 128) in free_node if something
+ * goes wrong.
+ */
+ for (i = 0; i < 128; ++i) {
+ part = cpu_to_be64(~(1LLU << (i % 64)));
+ memset(&ip, 0xff, 16);
+ memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
+ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+ }
+
+ wg_allowedips_free(&t, &mutex);
+
+ wg_allowedips_init(&t);
+ insert(4, a, 192, 95, 5, 93, 27);
+ insert(6, a, 0x26075300, 0x60006b00, 0, 0xc05f0543, 128);
+ insert(4, a, 10, 1, 0, 20, 29);
+ insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 83);
+ insert(6, a, 0x26075300, 0x6d8a6bf8, 0xdab1f1df, 0xc05f1523, 21);
+ list_for_each_entry(iter_node, &a->allowedips_list, peer_list) {
+ u8 cidr, ip[16] __aligned(__alignof(u64));
+ int family = wg_allowedips_read_node(iter_node, ip, &cidr);
+
+ count++;
+
+ if (cidr == 27 && family == AF_INET &&
+ !memcmp(ip, ip4(192, 95, 5, 64), sizeof(struct in_addr)))
+ found_a = true;
+ else if (cidr == 128 && family == AF_INET6 &&
+ !memcmp(ip, ip6(0x26075300, 0x60006b00, 0, 0xc05f0543),
+ sizeof(struct in6_addr)))
+ found_b = true;
+ else if (cidr == 29 && family == AF_INET &&
+ !memcmp(ip, ip4(10, 1, 0, 16), sizeof(struct in_addr)))
+ found_c = true;
+ else if (cidr == 83 && family == AF_INET6 &&
+ !memcmp(ip, ip6(0x26075300, 0x6d8a6bf8, 0xdab1e000, 0),
+ sizeof(struct in6_addr)))
+ found_d = true;
+ else if (cidr == 21 && family == AF_INET6 &&
+ !memcmp(ip, ip6(0x26075000, 0, 0, 0),
+ sizeof(struct in6_addr)))
+ found_e = true;
+ else
+ found_other = true;
+ }
+ test_boolean(count == 5);
+ test_boolean(found_a);
+ test_boolean(found_b);
+ test_boolean(found_c);
+ test_boolean(found_d);
+ test_boolean(found_e);
+ test_boolean(!found_other);
+
+ if (IS_ENABLED(DEBUG_RANDOM_TRIE) && success)
+ success = randomized_test();
+
+ if (success)
+ pr_info("allowedips self-tests: pass\n");
+
+free:
+ wg_allowedips_free(&t, &mutex);
+ kfree(a);
+ kfree(b);
+ kfree(c);
+ kfree(d);
+ kfree(e);
+ kfree(f);
+ kfree(g);
+ kfree(h);
+ mutex_unlock(&mutex);
+
+ return success;
+}
+
+#undef test_negative
+#undef test
+#undef remove
+#undef insert
+#undef init_peer
+
+#endif
diff --git a/drivers/net/wireguard/selftest/counter.c b/drivers/net/wireguard/selftest/counter.c
new file mode 100644
index 000000000000..f4fbb9072ed7
--- /dev/null
+++ b/drivers/net/wireguard/selftest/counter.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+bool __init wg_packet_counter_selftest(void)
+{
+ unsigned int test_num = 0, i;
+ union noise_counter counter;
+ bool success = true;
+
+#define T_INIT do { \
+ memset(&counter, 0, sizeof(union noise_counter)); \
+ spin_lock_init(&counter.receive.lock); \
+ } while (0)
+#define T_LIM (COUNTER_WINDOW_SIZE + 1)
+#define T(n, v) do { \
+ ++test_num; \
+ if (counter_validate(&counter, n) != (v)) { \
+ pr_err("nonce counter self-test %u: FAIL\n", \
+ test_num); \
+ success = false; \
+ } \
+ } while (0)
+
+ T_INIT;
+ /* 1 */ T(0, true);
+ /* 2 */ T(1, true);
+ /* 3 */ T(1, false);
+ /* 4 */ T(9, true);
+ /* 5 */ T(8, true);
+ /* 6 */ T(7, true);
+ /* 7 */ T(7, false);
+ /* 8 */ T(T_LIM, true);
+ /* 9 */ T(T_LIM - 1, true);
+ /* 10 */ T(T_LIM - 1, false);
+ /* 11 */ T(T_LIM - 2, true);
+ /* 12 */ T(2, true);
+ /* 13 */ T(2, false);
+ /* 14 */ T(T_LIM + 16, true);
+ /* 15 */ T(3, false);
+ /* 16 */ T(T_LIM + 16, false);
+ /* 17 */ T(T_LIM * 4, true);
+ /* 18 */ T(T_LIM * 4 - (T_LIM - 1), true);
+ /* 19 */ T(10, false);
+ /* 20 */ T(T_LIM * 4 - T_LIM, false);
+ /* 21 */ T(T_LIM * 4 - (T_LIM + 1), false);
+ /* 22 */ T(T_LIM * 4 - (T_LIM - 2), true);
+ /* 23 */ T(T_LIM * 4 + 1 - T_LIM, false);
+ /* 24 */ T(0, false);
+ /* 25 */ T(REJECT_AFTER_MESSAGES, false);
+ /* 26 */ T(REJECT_AFTER_MESSAGES - 1, true);
+ /* 27 */ T(REJECT_AFTER_MESSAGES, false);
+ /* 28 */ T(REJECT_AFTER_MESSAGES - 1, false);
+ /* 29 */ T(REJECT_AFTER_MESSAGES - 2, true);
+ /* 30 */ T(REJECT_AFTER_MESSAGES + 1, false);
+ /* 31 */ T(REJECT_AFTER_MESSAGES + 2, false);
+ /* 32 */ T(REJECT_AFTER_MESSAGES - 2, false);
+ /* 33 */ T(REJECT_AFTER_MESSAGES - 3, true);
+ /* 34 */ T(0, false);
+
+ T_INIT;
+ for (i = 1; i <= COUNTER_WINDOW_SIZE; ++i)
+ T(i, true);
+ T(0, true);
+ T(0, false);
+
+ T_INIT;
+ for (i = 2; i <= COUNTER_WINDOW_SIZE + 1; ++i)
+ T(i, true);
+ T(1, true);
+ T(0, false);
+
+ T_INIT;
+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 0;)
+ T(i, true);
+
+ T_INIT;
+ for (i = COUNTER_WINDOW_SIZE + 2; i-- > 1;)
+ T(i, true);
+ T(0, false);
+
+ T_INIT;
+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
+ T(i, true);
+ T(COUNTER_WINDOW_SIZE + 1, true);
+ T(0, false);
+
+ T_INIT;
+ for (i = COUNTER_WINDOW_SIZE + 1; i-- > 1;)
+ T(i, true);
+ T(0, true);
+ T(COUNTER_WINDOW_SIZE + 1, true);
+
+#undef T
+#undef T_LIM
+#undef T_INIT
+
+ if (success)
+ pr_info("nonce counter self-tests: pass\n");
+ return success;
+}
+#endif
diff --git a/drivers/net/wireguard/selftest/ratelimiter.c b/drivers/net/wireguard/selftest/ratelimiter.c
new file mode 100644
index 000000000000..bcd6462e4540
--- /dev/null
+++ b/drivers/net/wireguard/selftest/ratelimiter.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifdef DEBUG
+
+#include <linux/jiffies.h>
+
+static const struct {
+ bool result;
+ unsigned int msec_to_sleep_before;
+} expected_results[] __initconst = {
+ [0 ... PACKETS_BURSTABLE - 1] = { true, 0 },
+ [PACKETS_BURSTABLE] = { false, 0 },
+ [PACKETS_BURSTABLE + 1] = { true, MSEC_PER_SEC / PACKETS_PER_SECOND },
+ [PACKETS_BURSTABLE + 2] = { false, 0 },
+ [PACKETS_BURSTABLE + 3] = { true, (MSEC_PER_SEC / PACKETS_PER_SECOND) * 2 },
+ [PACKETS_BURSTABLE + 4] = { true, 0 },
+ [PACKETS_BURSTABLE + 5] = { false, 0 }
+};
+
+static __init unsigned int maximum_jiffies_at_index(int index)
+{
+ unsigned int total_msecs = 2 * MSEC_PER_SEC / PACKETS_PER_SECOND / 3;
+ int i;
+
+ for (i = 0; i <= index; ++i)
+ total_msecs += expected_results[i].msec_to_sleep_before;
+ return msecs_to_jiffies(total_msecs);
+}
+
+static __init int timings_test(struct sk_buff *skb4, struct iphdr *hdr4,
+ struct sk_buff *skb6, struct ipv6hdr *hdr6,
+ int *test)
+{
+ unsigned long loop_start_time;
+ int i;
+
+ wg_ratelimiter_gc_entries(NULL);
+ rcu_barrier();
+ loop_start_time = jiffies;
+
+ for (i = 0; i < ARRAY_SIZE(expected_results); ++i) {
+ if (expected_results[i].msec_to_sleep_before)
+ msleep(expected_results[i].msec_to_sleep_before);
+
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+ if (wg_ratelimiter_allow(skb4, &init_net) !=
+ expected_results[i].result)
+ return -EXFULL;
+ ++(*test);
+
+ hdr4->saddr = htonl(ntohl(hdr4->saddr) + i + 1);
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+ if (!wg_ratelimiter_allow(skb4, &init_net))
+ return -EXFULL;
+ ++(*test);
+
+ hdr4->saddr = htonl(ntohl(hdr4->saddr) - i - 1);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ hdr6->saddr.in6_u.u6_addr32[2] = htonl(i);
+ hdr6->saddr.in6_u.u6_addr32[3] = htonl(i);
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+ if (wg_ratelimiter_allow(skb6, &init_net) !=
+ expected_results[i].result)
+ return -EXFULL;
+ ++(*test);
+
+ hdr6->saddr.in6_u.u6_addr32[0] =
+ htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) + i + 1);
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+ if (!wg_ratelimiter_allow(skb6, &init_net))
+ return -EXFULL;
+ ++(*test);
+
+ hdr6->saddr.in6_u.u6_addr32[0] =
+ htonl(ntohl(hdr6->saddr.in6_u.u6_addr32[0]) - i - 1);
+
+ if (time_is_before_jiffies(loop_start_time +
+ maximum_jiffies_at_index(i)))
+ return -ETIMEDOUT;
+#endif
+ }
+ return 0;
+}
+
+static __init int capacity_test(struct sk_buff *skb4, struct iphdr *hdr4,
+ int *test)
+{
+ int i;
+
+ wg_ratelimiter_gc_entries(NULL);
+ rcu_barrier();
+
+ if (atomic_read(&total_entries))
+ return -EXFULL;
+ ++(*test);
+
+ for (i = 0; i <= max_entries; ++i) {
+ hdr4->saddr = htonl(i);
+ if (wg_ratelimiter_allow(skb4, &init_net) != (i != max_entries))
+ return -EXFULL;
+ ++(*test);
+ }
+ return 0;
+}
+
+bool __init wg_ratelimiter_selftest(void)
+{
+ enum { TRIALS_BEFORE_GIVING_UP = 5000 };
+ bool success = false;
+ int test = 0, trials;
+ struct sk_buff *skb4, *skb6;
+ struct iphdr *hdr4;
+ struct ipv6hdr *hdr6;
+
+ if (IS_ENABLED(CONFIG_KASAN) || IS_ENABLED(CONFIG_UBSAN))
+ return true;
+
+ BUILD_BUG_ON(MSEC_PER_SEC % PACKETS_PER_SECOND != 0);
+
+ if (wg_ratelimiter_init())
+ goto out;
+ ++test;
+ if (wg_ratelimiter_init()) {
+ wg_ratelimiter_uninit();
+ goto out;
+ }
+ ++test;
+ if (wg_ratelimiter_init()) {
+ wg_ratelimiter_uninit();
+ wg_ratelimiter_uninit();
+ goto out;
+ }
+ ++test;
+
+ skb4 = alloc_skb(sizeof(struct iphdr), GFP_KERNEL);
+ if (unlikely(!skb4))
+ goto err_nofree;
+ skb4->protocol = htons(ETH_P_IP);
+ hdr4 = (struct iphdr *)skb_put(skb4, sizeof(*hdr4));
+ hdr4->saddr = htonl(8182);
+ skb_reset_network_header(skb4);
+ ++test;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ skb6 = alloc_skb(sizeof(struct ipv6hdr), GFP_KERNEL);
+ if (unlikely(!skb6)) {
+ kfree_skb(skb4);
+ goto err_nofree;
+ }
+ skb6->protocol = htons(ETH_P_IPV6);
+ hdr6 = (struct ipv6hdr *)skb_put(skb6, sizeof(*hdr6));
+ hdr6->saddr.in6_u.u6_addr32[0] = htonl(1212);
+ hdr6->saddr.in6_u.u6_addr32[1] = htonl(289188);
+ skb_reset_network_header(skb6);
+ ++test;
+#endif
+
+ for (trials = TRIALS_BEFORE_GIVING_UP;;) {
+ int test_count = 0, ret;
+
+ ret = timings_test(skb4, hdr4, skb6, hdr6, &test_count);
+ if (ret == -ETIMEDOUT) {
+ if (!trials--) {
+ test += test_count;
+ goto err;
+ }
+ msleep(500);
+ continue;
+ } else if (ret < 0) {
+ test += test_count;
+ goto err;
+ } else {
+ test += test_count;
+ break;
+ }
+ }
+
+ for (trials = TRIALS_BEFORE_GIVING_UP;;) {
+ int test_count = 0;
+
+ if (capacity_test(skb4, hdr4, &test_count) < 0) {
+ if (!trials--) {
+ test += test_count;
+ goto err;
+ }
+ msleep(50);
+ continue;
+ }
+ test += test_count;
+ break;
+ }
+
+ success = true;
+
+err:
+ kfree_skb(skb4);
+#if IS_ENABLED(CONFIG_IPV6)
+ kfree_skb(skb6);
+#endif
+err_nofree:
+ wg_ratelimiter_uninit();
+ wg_ratelimiter_uninit();
+ wg_ratelimiter_uninit();
+ /* Uninit one extra time to check underflow detection. */
+ wg_ratelimiter_uninit();
+out:
+ if (success)
+ pr_info("ratelimiter self-tests: pass\n");
+ else
+ pr_err("ratelimiter self-test %d: FAIL\n", test);
+
+ return success;
+}
+#endif
diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c
new file mode 100644
index 000000000000..c13260563446
--- /dev/null
+++ b/drivers/net/wireguard/send.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "queueing.h"
+#include "timers.h"
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "messages.h"
+#include "cookie.h"
+
+#include <linux/uio.h>
+#include <linux/inetdevice.h>
+#include <linux/socket.h>
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+#include <net/sock.h>
+
+static void wg_packet_send_handshake_initiation(struct wg_peer *peer)
+{
+ struct message_handshake_initiation packet;
+
+ if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
+ REKEY_TIMEOUT))
+ return; /* This function is rate limited. */
+
+ atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
+ net_dbg_ratelimited("%s: Sending handshake initiation to peer %llu (%pISpfsc)\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+
+ if (wg_noise_handshake_create_initiation(&packet, &peer->handshake)) {
+ wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
+ wg_timers_any_authenticated_packet_traversal(peer);
+ wg_timers_any_authenticated_packet_sent(peer);
+ atomic64_set(&peer->last_sent_handshake,
+ ktime_get_coarse_boottime_ns());
+ wg_socket_send_buffer_to_peer(peer, &packet, sizeof(packet),
+ HANDSHAKE_DSCP);
+ wg_timers_handshake_initiated(peer);
+ }
+}
+
+void wg_packet_handshake_send_worker(struct work_struct *work)
+{
+ struct wg_peer *peer = container_of(work, struct wg_peer,
+ transmit_handshake_work);
+
+ wg_packet_send_handshake_initiation(peer);
+ wg_peer_put(peer);
+}
+
+void wg_packet_send_queued_handshake_initiation(struct wg_peer *peer,
+ bool is_retry)
+{
+ if (!is_retry)
+ peer->timer_handshake_attempts = 0;
+
+ rcu_read_lock_bh();
+ /* We check last_sent_handshake here in addition to the actual function
+ * we're queueing up, so that we don't queue things if not strictly
+ * necessary:
+ */
+ if (!wg_birthdate_has_expired(atomic64_read(&peer->last_sent_handshake),
+ REKEY_TIMEOUT) ||
+ unlikely(READ_ONCE(peer->is_dead)))
+ goto out;
+
+ wg_peer_get(peer);
+ /* Queues up calling packet_send_queued_handshakes(peer), where we do a
+ * peer_put(peer) after:
+ */
+ if (!queue_work(peer->device->handshake_send_wq,
+ &peer->transmit_handshake_work))
+ /* If the work was already queued, we want to drop the
+ * extra reference:
+ */
+ wg_peer_put(peer);
+out:
+ rcu_read_unlock_bh();
+}
+
+void wg_packet_send_handshake_response(struct wg_peer *peer)
+{
+ struct message_handshake_response packet;
+
+ atomic64_set(&peer->last_sent_handshake, ktime_get_coarse_boottime_ns());
+ net_dbg_ratelimited("%s: Sending handshake response to peer %llu (%pISpfsc)\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+
+ if (wg_noise_handshake_create_response(&packet, &peer->handshake)) {
+ wg_cookie_add_mac_to_packet(&packet, sizeof(packet), peer);
+ if (wg_noise_handshake_begin_session(&peer->handshake,
+ &peer->keypairs)) {
+ wg_timers_session_derived(peer);
+ wg_timers_any_authenticated_packet_traversal(peer);
+ wg_timers_any_authenticated_packet_sent(peer);
+ atomic64_set(&peer->last_sent_handshake,
+ ktime_get_coarse_boottime_ns());
+ wg_socket_send_buffer_to_peer(peer, &packet,
+ sizeof(packet),
+ HANDSHAKE_DSCP);
+ }
+ }
+}
+
+void wg_packet_send_handshake_cookie(struct wg_device *wg,
+ struct sk_buff *initiating_skb,
+ __le32 sender_index)
+{
+ struct message_handshake_cookie packet;
+
+ net_dbg_skb_ratelimited("%s: Sending cookie response for denied handshake message for %pISpfsc\n",
+ wg->dev->name, initiating_skb);
+ wg_cookie_message_create(&packet, initiating_skb, sender_index,
+ &wg->cookie_checker);
+ wg_socket_send_buffer_as_reply_to_skb(wg, initiating_skb, &packet,
+ sizeof(packet));
+}
+
+static void keep_key_fresh(struct wg_peer *peer)
+{
+ struct noise_keypair *keypair;
+ bool send = false;
+
+ rcu_read_lock_bh();
+ keypair = rcu_dereference_bh(peer->keypairs.current_keypair);
+ if (likely(keypair && READ_ONCE(keypair->sending.is_valid)) &&
+ (unlikely(atomic64_read(&keypair->sending.counter.counter) >
+ REKEY_AFTER_MESSAGES) ||
+ (keypair->i_am_the_initiator &&
+ unlikely(wg_birthdate_has_expired(keypair->sending.birthdate,
+ REKEY_AFTER_TIME)))))
+ send = true;
+ rcu_read_unlock_bh();
+
+ if (send)
+ wg_packet_send_queued_handshake_initiation(peer, false);
+}
+
+static unsigned int calculate_skb_padding(struct sk_buff *skb)
+{
+ /* We do this modulo business with the MTU, just in case the networking
+ * layer gives us a packet that's bigger than the MTU. In that case, we
+ * wouldn't want the final subtraction to overflow in the case of the
+ * padded_size being clamped.
+ */
+ unsigned int last_unit = skb->len % PACKET_CB(skb)->mtu;
+ unsigned int padded_size = ALIGN(last_unit, MESSAGE_PADDING_MULTIPLE);
+
+ if (padded_size > PACKET_CB(skb)->mtu)
+ padded_size = PACKET_CB(skb)->mtu;
+ return padded_size - last_unit;
+}
+
+static bool encrypt_packet(struct sk_buff *skb, struct noise_keypair *keypair)
+{
+ unsigned int padding_len, plaintext_len, trailer_len;
+ struct scatterlist sg[MAX_SKB_FRAGS + 8];
+ struct message_data *header;
+ struct sk_buff *trailer;
+ int num_frags;
+
+ /* Calculate lengths. */
+ padding_len = calculate_skb_padding(skb);
+ trailer_len = padding_len + noise_encrypted_len(0);
+ plaintext_len = skb->len + padding_len;
+
+ /* Expand data section to have room for padding and auth tag. */
+ num_frags = skb_cow_data(skb, trailer_len, &trailer);
+ if (unlikely(num_frags < 0 || num_frags > ARRAY_SIZE(sg)))
+ return false;
+
+ /* Set the padding to zeros, and make sure it and the auth tag are part
+ * of the skb.
+ */
+ memset(skb_tail_pointer(trailer), 0, padding_len);
+
+ /* Expand head section to have room for our header and the network
+ * stack's headers.
+ */
+ if (unlikely(skb_cow_head(skb, DATA_PACKET_HEAD_ROOM) < 0))
+ return false;
+
+ /* Finalize checksum calculation for the inner packet, if required. */
+ if (unlikely(skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_help(skb)))
+ return false;
+
+ /* Only after checksumming can we safely add on the padding at the end
+ * and the header.
+ */
+ skb_set_inner_network_header(skb, 0);
+ header = (struct message_data *)skb_push(skb, sizeof(*header));
+ header->header.type = cpu_to_le32(MESSAGE_DATA);
+ header->key_idx = keypair->remote_index;
+ header->counter = cpu_to_le64(PACKET_CB(skb)->nonce);
+ pskb_put(skb, trailer, trailer_len);
+
+ /* Now we can encrypt the scattergather segments */
+ sg_init_table(sg, num_frags);
+ if (skb_to_sgvec(skb, sg, sizeof(struct message_data),
+ noise_encrypted_len(plaintext_len)) <= 0)
+ return false;
+ return chacha20poly1305_encrypt_sg_inplace(sg, plaintext_len, NULL, 0,
+ PACKET_CB(skb)->nonce,
+ keypair->sending.key);
+}
+
+void wg_packet_send_keepalive(struct wg_peer *peer)
+{
+ struct sk_buff *skb;
+
+ if (skb_queue_empty(&peer->staged_packet_queue)) {
+ skb = alloc_skb(DATA_PACKET_HEAD_ROOM + MESSAGE_MINIMUM_LENGTH,
+ GFP_ATOMIC);
+ if (unlikely(!skb))
+ return;
+ skb_reserve(skb, DATA_PACKET_HEAD_ROOM);
+ skb->dev = peer->device->dev;
+ PACKET_CB(skb)->mtu = skb->dev->mtu;
+ skb_queue_tail(&peer->staged_packet_queue, skb);
+ net_dbg_ratelimited("%s: Sending keepalive packet to peer %llu (%pISpfsc)\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr);
+ }
+
+ wg_packet_send_staged_packets(peer);
+}
+
+static void wg_packet_create_data_done(struct sk_buff *first,
+ struct wg_peer *peer)
+{
+ struct sk_buff *skb, *next;
+ bool is_keepalive, data_sent = false;
+
+ wg_timers_any_authenticated_packet_traversal(peer);
+ wg_timers_any_authenticated_packet_sent(peer);
+ skb_list_walk_safe(first, skb, next) {
+ is_keepalive = skb->len == message_data_len(0);
+ if (likely(!wg_socket_send_skb_to_peer(peer, skb,
+ PACKET_CB(skb)->ds) && !is_keepalive))
+ data_sent = true;
+ }
+
+ if (likely(data_sent))
+ wg_timers_data_sent(peer);
+
+ keep_key_fresh(peer);
+}
+
+void wg_packet_tx_worker(struct work_struct *work)
+{
+ struct crypt_queue *queue = container_of(work, struct crypt_queue,
+ work);
+ struct noise_keypair *keypair;
+ enum packet_state state;
+ struct sk_buff *first;
+ struct wg_peer *peer;
+
+ while ((first = __ptr_ring_peek(&queue->ring)) != NULL &&
+ (state = atomic_read_acquire(&PACKET_CB(first)->state)) !=
+ PACKET_STATE_UNCRYPTED) {
+ __ptr_ring_discard_one(&queue->ring);
+ peer = PACKET_PEER(first);
+ keypair = PACKET_CB(first)->keypair;
+
+ if (likely(state == PACKET_STATE_CRYPTED))
+ wg_packet_create_data_done(first, peer);
+ else
+ kfree_skb_list(first);
+
+ wg_noise_keypair_put(keypair, false);
+ wg_peer_put(peer);
+ }
+}
+
+void wg_packet_encrypt_worker(struct work_struct *work)
+{
+ struct crypt_queue *queue = container_of(work, struct multicore_worker,
+ work)->ptr;
+ struct sk_buff *first, *skb, *next;
+
+ while ((first = ptr_ring_consume_bh(&queue->ring)) != NULL) {
+ enum packet_state state = PACKET_STATE_CRYPTED;
+
+ skb_list_walk_safe(first, skb, next) {
+ if (likely(encrypt_packet(skb,
+ PACKET_CB(first)->keypair))) {
+ wg_reset_packet(skb);
+ } else {
+ state = PACKET_STATE_DEAD;
+ break;
+ }
+ }
+ wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
+ state);
+
+ }
+}
+
+static void wg_packet_create_data(struct sk_buff *first)
+{
+ struct wg_peer *peer = PACKET_PEER(first);
+ struct wg_device *wg = peer->device;
+ int ret = -EINVAL;
+
+ rcu_read_lock_bh();
+ if (unlikely(READ_ONCE(peer->is_dead)))
+ goto err;
+
+ ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue,
+ &peer->tx_queue, first,
+ wg->packet_crypt_wq,
+ &wg->encrypt_queue.last_cpu);
+ if (unlikely(ret == -EPIPE))
+ wg_queue_enqueue_per_peer(&peer->tx_queue, first,
+ PACKET_STATE_DEAD);
+err:
+ rcu_read_unlock_bh();
+ if (likely(!ret || ret == -EPIPE))
+ return;
+ wg_noise_keypair_put(PACKET_CB(first)->keypair, false);
+ wg_peer_put(peer);
+ kfree_skb_list(first);
+}
+
+void wg_packet_purge_staged_packets(struct wg_peer *peer)
+{
+ spin_lock_bh(&peer->staged_packet_queue.lock);
+ peer->device->dev->stats.tx_dropped += peer->staged_packet_queue.qlen;
+ __skb_queue_purge(&peer->staged_packet_queue);
+ spin_unlock_bh(&peer->staged_packet_queue.lock);
+}
+
+void wg_packet_send_staged_packets(struct wg_peer *peer)
+{
+ struct noise_symmetric_key *key;
+ struct noise_keypair *keypair;
+ struct sk_buff_head packets;
+ struct sk_buff *skb;
+
+ /* Steal the current queue into our local one. */
+ __skb_queue_head_init(&packets);
+ spin_lock_bh(&peer->staged_packet_queue.lock);
+ skb_queue_splice_init(&peer->staged_packet_queue, &packets);
+ spin_unlock_bh(&peer->staged_packet_queue.lock);
+ if (unlikely(skb_queue_empty(&packets)))
+ return;
+
+ /* First we make sure we have a valid reference to a valid key. */
+ rcu_read_lock_bh();
+ keypair = wg_noise_keypair_get(
+ rcu_dereference_bh(peer->keypairs.current_keypair));
+ rcu_read_unlock_bh();
+ if (unlikely(!keypair))
+ goto out_nokey;
+ key = &keypair->sending;
+ if (unlikely(!READ_ONCE(key->is_valid)))
+ goto out_nokey;
+ if (unlikely(wg_birthdate_has_expired(key->birthdate,
+ REJECT_AFTER_TIME)))
+ goto out_invalid;
+
+ /* After we know we have a somewhat valid key, we now try to assign
+ * nonces to all of the packets in the queue. If we can't assign nonces
+ * for all of them, we just consider it a failure and wait for the next
+ * handshake.
+ */
+ skb_queue_walk(&packets, skb) {
+ /* 0 for no outer TOS: no leak. TODO: at some later point, we
+ * might consider using flowi->tos as outer instead.
+ */
+ PACKET_CB(skb)->ds = ip_tunnel_ecn_encap(0, ip_hdr(skb), skb);
+ PACKET_CB(skb)->nonce =
+ atomic64_inc_return(&key->counter.counter) - 1;
+ if (unlikely(PACKET_CB(skb)->nonce >= REJECT_AFTER_MESSAGES))
+ goto out_invalid;
+ }
+
+ packets.prev->next = NULL;
+ wg_peer_get(keypair->entry.peer);
+ PACKET_CB(packets.next)->keypair = keypair;
+ wg_packet_create_data(packets.next);
+ return;
+
+out_invalid:
+ WRITE_ONCE(key->is_valid, false);
+out_nokey:
+ wg_noise_keypair_put(keypair, false);
+
+ /* We orphan the packets if we're waiting on a handshake, so that they
+ * don't block a socket's pool.
+ */
+ skb_queue_walk(&packets, skb)
+ skb_orphan(skb);
+ /* Then we put them back on the top of the queue. We're not too
+ * concerned about accidentally getting things a little out of order if
+ * packets are being added really fast, because this queue is for before
+ * packets can even be sent and it's small anyway.
+ */
+ spin_lock_bh(&peer->staged_packet_queue.lock);
+ skb_queue_splice(&packets, &peer->staged_packet_queue);
+ spin_unlock_bh(&peer->staged_packet_queue.lock);
+
+ /* If we're exiting because there's something wrong with the key, it
+ * means we should initiate a new handshake.
+ */
+ wg_packet_send_queued_handshake_initiation(peer, false);
+}
diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
new file mode 100644
index 000000000000..262f3b5c819d
--- /dev/null
+++ b/drivers/net/wireguard/socket.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "device.h"
+#include "peer.h"
+#include "socket.h"
+#include "queueing.h"
+#include "messages.h"
+
+#include <linux/ctype.h>
+#include <linux/net.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+#include <linux/inetdevice.h>
+#include <net/udp_tunnel.h>
+#include <net/ipv6.h>
+
+static int send4(struct wg_device *wg, struct sk_buff *skb,
+ struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
+{
+ struct flowi4 fl = {
+ .saddr = endpoint->src4.s_addr,
+ .daddr = endpoint->addr4.sin_addr.s_addr,
+ .fl4_dport = endpoint->addr4.sin_port,
+ .flowi4_mark = wg->fwmark,
+ .flowi4_proto = IPPROTO_UDP
+ };
+ struct rtable *rt = NULL;
+ struct sock *sock;
+ int ret = 0;
+
+ skb_mark_not_on_list(skb);
+ skb->dev = wg->dev;
+ skb->mark = wg->fwmark;
+
+ rcu_read_lock_bh();
+ sock = rcu_dereference_bh(wg->sock4);
+
+ if (unlikely(!sock)) {
+ ret = -ENONET;
+ goto err;
+ }
+
+ fl.fl4_sport = inet_sk(sock)->inet_sport;
+
+ if (cache)
+ rt = dst_cache_get_ip4(cache, &fl.saddr);
+
+ if (!rt) {
+ security_sk_classify_flow(sock, flowi4_to_flowi(&fl));
+ if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0,
+ fl.saddr, RT_SCOPE_HOST))) {
+ endpoint->src4.s_addr = 0;
+ *(__force __be32 *)&endpoint->src_if4 = 0;
+ fl.saddr = 0;
+ if (cache)
+ dst_cache_reset(cache);
+ }
+ rt = ip_route_output_flow(sock_net(sock), &fl, sock);
+ if (unlikely(endpoint->src_if4 && ((IS_ERR(rt) &&
+ PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) &&
+ rt->dst.dev->ifindex != endpoint->src_if4)))) {
+ endpoint->src4.s_addr = 0;
+ *(__force __be32 *)&endpoint->src_if4 = 0;
+ fl.saddr = 0;
+ if (cache)
+ dst_cache_reset(cache);
+ if (!IS_ERR(rt))
+ ip_rt_put(rt);
+ rt = ip_route_output_flow(sock_net(sock), &fl, sock);
+ }
+ if (unlikely(IS_ERR(rt))) {
+ ret = PTR_ERR(rt);
+ net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
+ wg->dev->name, &endpoint->addr, ret);
+ goto err;
+ } else if (unlikely(rt->dst.dev == skb->dev)) {
+ ip_rt_put(rt);
+ ret = -ELOOP;
+ net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
+ wg->dev->name, &endpoint->addr);
+ goto err;
+ }
+ if (cache)
+ dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
+ }
+
+ skb->ignore_df = 1;
+ udp_tunnel_xmit_skb(rt, sock, skb, fl.saddr, fl.daddr, ds,
+ ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
+ fl.fl4_dport, false, false);
+ goto out;
+
+err:
+ kfree_skb(skb);
+out:
+ rcu_read_unlock_bh();
+ return ret;
+}
+
+static int send6(struct wg_device *wg, struct sk_buff *skb,
+ struct endpoint *endpoint, u8 ds, struct dst_cache *cache)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct flowi6 fl = {
+ .saddr = endpoint->src6,
+ .daddr = endpoint->addr6.sin6_addr,
+ .fl6_dport = endpoint->addr6.sin6_port,
+ .flowi6_mark = wg->fwmark,
+ .flowi6_oif = endpoint->addr6.sin6_scope_id,
+ .flowi6_proto = IPPROTO_UDP
+ /* TODO: addr->sin6_flowinfo */
+ };
+ struct dst_entry *dst = NULL;
+ struct sock *sock;
+ int ret = 0;
+
+ skb_mark_not_on_list(skb);
+ skb->dev = wg->dev;
+ skb->mark = wg->fwmark;
+
+ rcu_read_lock_bh();
+ sock = rcu_dereference_bh(wg->sock6);
+
+ if (unlikely(!sock)) {
+ ret = -ENONET;
+ goto err;
+ }
+
+ fl.fl6_sport = inet_sk(sock)->inet_sport;
+
+ if (cache)
+ dst = dst_cache_get_ip6(cache, &fl.saddr);
+
+ if (!dst) {
+ security_sk_classify_flow(sock, flowi6_to_flowi(&fl));
+ if (unlikely(!ipv6_addr_any(&fl.saddr) &&
+ !ipv6_chk_addr(sock_net(sock), &fl.saddr, NULL, 0))) {
+ endpoint->src6 = fl.saddr = in6addr_any;
+ if (cache)
+ dst_cache_reset(cache);
+ }
+ dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl,
+ NULL);
+ if (unlikely(IS_ERR(dst))) {
+ ret = PTR_ERR(dst);
+ net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
+ wg->dev->name, &endpoint->addr, ret);
+ goto err;
+ } else if (unlikely(dst->dev == skb->dev)) {
+ dst_release(dst);
+ ret = -ELOOP;
+ net_dbg_ratelimited("%s: Avoiding routing loop to %pISpfsc\n",
+ wg->dev->name, &endpoint->addr);
+ goto err;
+ }
+ if (cache)
+ dst_cache_set_ip6(cache, dst, &fl.saddr);
+ }
+
+ skb->ignore_df = 1;
+ udp_tunnel6_xmit_skb(dst, sock, skb, skb->dev, &fl.saddr, &fl.daddr, ds,
+ ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
+ fl.fl6_dport, false);
+ goto out;
+
+err:
+ kfree_skb(skb);
+out:
+ rcu_read_unlock_bh();
+ return ret;
+#else
+ return -EAFNOSUPPORT;
+#endif
+}
+
+int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb, u8 ds)
+{
+ size_t skb_len = skb->len;
+ int ret = -EAFNOSUPPORT;
+
+ read_lock_bh(&peer->endpoint_lock);
+ if (peer->endpoint.addr.sa_family == AF_INET)
+ ret = send4(peer->device, skb, &peer->endpoint, ds,
+ &peer->endpoint_cache);
+ else if (peer->endpoint.addr.sa_family == AF_INET6)
+ ret = send6(peer->device, skb, &peer->endpoint, ds,
+ &peer->endpoint_cache);
+ else
+ dev_kfree_skb(skb);
+ if (likely(!ret))
+ peer->tx_bytes += skb_len;
+ read_unlock_bh(&peer->endpoint_lock);
+
+ return ret;
+}
+
+int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *buffer,
+ size_t len, u8 ds)
+{
+ struct sk_buff *skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
+
+ if (unlikely(!skb))
+ return -ENOMEM;
+
+ skb_reserve(skb, SKB_HEADER_LEN);
+ skb_set_inner_network_header(skb, 0);
+ skb_put_data(skb, buffer, len);
+ return wg_socket_send_skb_to_peer(peer, skb, ds);
+}
+
+int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
+ struct sk_buff *in_skb, void *buffer,
+ size_t len)
+{
+ int ret = 0;
+ struct sk_buff *skb;
+ struct endpoint endpoint;
+
+ if (unlikely(!in_skb))
+ return -EINVAL;
+ ret = wg_socket_endpoint_from_skb(&endpoint, in_skb);
+ if (unlikely(ret < 0))
+ return ret;
+
+ skb = alloc_skb(len + SKB_HEADER_LEN, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return -ENOMEM;
+ skb_reserve(skb, SKB_HEADER_LEN);
+ skb_set_inner_network_header(skb, 0);
+ skb_put_data(skb, buffer, len);
+
+ if (endpoint.addr.sa_family == AF_INET)
+ ret = send4(wg, skb, &endpoint, 0, NULL);
+ else if (endpoint.addr.sa_family == AF_INET6)
+ ret = send6(wg, skb, &endpoint, 0, NULL);
+ /* No other possibilities if the endpoint is valid, which it is,
+ * as we checked above.
+ */
+
+ return ret;
+}
+
+int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
+ const struct sk_buff *skb)
+{
+ memset(endpoint, 0, sizeof(*endpoint));
+ if (skb->protocol == htons(ETH_P_IP)) {
+ endpoint->addr4.sin_family = AF_INET;
+ endpoint->addr4.sin_port = udp_hdr(skb)->source;
+ endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ endpoint->src4.s_addr = ip_hdr(skb)->daddr;
+ endpoint->src_if4 = skb->skb_iif;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ endpoint->addr6.sin6_family = AF_INET6;
+ endpoint->addr6.sin6_port = udp_hdr(skb)->source;
+ endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
+ endpoint->addr6.sin6_scope_id = ipv6_iface_scope_id(
+ &ipv6_hdr(skb)->saddr, skb->skb_iif);
+ endpoint->src6 = ipv6_hdr(skb)->daddr;
+ } else {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static bool endpoint_eq(const struct endpoint *a, const struct endpoint *b)
+{
+ return (a->addr.sa_family == AF_INET && b->addr.sa_family == AF_INET &&
+ a->addr4.sin_port == b->addr4.sin_port &&
+ a->addr4.sin_addr.s_addr == b->addr4.sin_addr.s_addr &&
+ a->src4.s_addr == b->src4.s_addr && a->src_if4 == b->src_if4) ||
+ (a->addr.sa_family == AF_INET6 &&
+ b->addr.sa_family == AF_INET6 &&
+ a->addr6.sin6_port == b->addr6.sin6_port &&
+ ipv6_addr_equal(&a->addr6.sin6_addr, &b->addr6.sin6_addr) &&
+ a->addr6.sin6_scope_id == b->addr6.sin6_scope_id &&
+ ipv6_addr_equal(&a->src6, &b->src6)) ||
+ unlikely(!a->addr.sa_family && !b->addr.sa_family);
+}
+
+void wg_socket_set_peer_endpoint(struct wg_peer *peer,
+ const struct endpoint *endpoint)
+{
+ /* First we check unlocked, in order to optimize, since it's pretty rare
+ * that an endpoint will change. If we happen to be mid-write, and two
+ * CPUs wind up writing the same thing or something slightly different,
+ * it doesn't really matter much either.
+ */
+ if (endpoint_eq(endpoint, &peer->endpoint))
+ return;
+ write_lock_bh(&peer->endpoint_lock);
+ if (endpoint->addr.sa_family == AF_INET) {
+ peer->endpoint.addr4 = endpoint->addr4;
+ peer->endpoint.src4 = endpoint->src4;
+ peer->endpoint.src_if4 = endpoint->src_if4;
+ } else if (endpoint->addr.sa_family == AF_INET6) {
+ peer->endpoint.addr6 = endpoint->addr6;
+ peer->endpoint.src6 = endpoint->src6;
+ } else {
+ goto out;
+ }
+ dst_cache_reset(&peer->endpoint_cache);
+out:
+ write_unlock_bh(&peer->endpoint_lock);
+}
+
+void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
+ const struct sk_buff *skb)
+{
+ struct endpoint endpoint;
+
+ if (!wg_socket_endpoint_from_skb(&endpoint, skb))
+ wg_socket_set_peer_endpoint(peer, &endpoint);
+}
+
+void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer)
+{
+ write_lock_bh(&peer->endpoint_lock);
+ memset(&peer->endpoint.src6, 0, sizeof(peer->endpoint.src6));
+ dst_cache_reset(&peer->endpoint_cache);
+ write_unlock_bh(&peer->endpoint_lock);
+}
+
+static int wg_receive(struct sock *sk, struct sk_buff *skb)
+{
+ struct wg_device *wg;
+
+ if (unlikely(!sk))
+ goto err;
+ wg = sk->sk_user_data;
+ if (unlikely(!wg))
+ goto err;
+ skb_mark_not_on_list(skb);
+ wg_packet_receive(wg, skb);
+ return 0;
+
+err:
+ kfree_skb(skb);
+ return 0;
+}
+
+static void sock_free(struct sock *sock)
+{
+ if (unlikely(!sock))
+ return;
+ sk_clear_memalloc(sock);
+ udp_tunnel_sock_release(sock->sk_socket);
+}
+
+static void set_sock_opts(struct socket *sock)
+{
+ sock->sk->sk_allocation = GFP_ATOMIC;
+ sock->sk->sk_sndbuf = INT_MAX;
+ sk_set_memalloc(sock->sk);
+}
+
+int wg_socket_init(struct wg_device *wg, u16 port)
+{
+ int ret;
+ struct udp_tunnel_sock_cfg cfg = {
+ .sk_user_data = wg,
+ .encap_type = 1,
+ .encap_rcv = wg_receive
+ };
+ struct socket *new4 = NULL, *new6 = NULL;
+ struct udp_port_cfg port4 = {
+ .family = AF_INET,
+ .local_ip.s_addr = htonl(INADDR_ANY),
+ .local_udp_port = htons(port),
+ .use_udp_checksums = true
+ };
+#if IS_ENABLED(CONFIG_IPV6)
+ int retries = 0;
+ struct udp_port_cfg port6 = {
+ .family = AF_INET6,
+ .local_ip6 = IN6ADDR_ANY_INIT,
+ .use_udp6_tx_checksums = true,
+ .use_udp6_rx_checksums = true,
+ .ipv6_v6only = true
+ };
+#endif
+
+#if IS_ENABLED(CONFIG_IPV6)
+retry:
+#endif
+
+ ret = udp_sock_create(wg->creating_net, &port4, &new4);
+ if (ret < 0) {
+ pr_err("%s: Could not create IPv4 socket\n", wg->dev->name);
+ return ret;
+ }
+ set_sock_opts(new4);
+ setup_udp_tunnel_sock(wg->creating_net, new4, &cfg);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (ipv6_mod_enabled()) {
+ port6.local_udp_port = inet_sk(new4->sk)->inet_sport;
+ ret = udp_sock_create(wg->creating_net, &port6, &new6);
+ if (ret < 0) {
+ udp_tunnel_sock_release(new4);
+ if (ret == -EADDRINUSE && !port && retries++ < 100)
+ goto retry;
+ pr_err("%s: Could not create IPv6 socket\n",
+ wg->dev->name);
+ return ret;
+ }
+ set_sock_opts(new6);
+ setup_udp_tunnel_sock(wg->creating_net, new6, &cfg);
+ }
+#endif
+
+ wg_socket_reinit(wg, new4->sk, new6 ? new6->sk : NULL);
+ return 0;
+}
+
+void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
+ struct sock *new6)
+{
+ struct sock *old4, *old6;
+
+ mutex_lock(&wg->socket_update_lock);
+ old4 = rcu_dereference_protected(wg->sock4,
+ lockdep_is_held(&wg->socket_update_lock));
+ old6 = rcu_dereference_protected(wg->sock6,
+ lockdep_is_held(&wg->socket_update_lock));
+ rcu_assign_pointer(wg->sock4, new4);
+ rcu_assign_pointer(wg->sock6, new6);
+ if (new4)
+ wg->incoming_port = ntohs(inet_sk(new4)->inet_sport);
+ mutex_unlock(&wg->socket_update_lock);
+ synchronize_rcu();
+ synchronize_net();
+ sock_free(old4);
+ sock_free(old6);
+}
diff --git a/drivers/net/wireguard/socket.h b/drivers/net/wireguard/socket.h
new file mode 100644
index 000000000000..bab5848efbcd
--- /dev/null
+++ b/drivers/net/wireguard/socket.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_SOCKET_H
+#define _WG_SOCKET_H
+
+#include <linux/netdevice.h>
+#include <linux/udp.h>
+#include <linux/if_vlan.h>
+#include <linux/if_ether.h>
+
+int wg_socket_init(struct wg_device *wg, u16 port);
+void wg_socket_reinit(struct wg_device *wg, struct sock *new4,
+ struct sock *new6);
+int wg_socket_send_buffer_to_peer(struct wg_peer *peer, void *data,
+ size_t len, u8 ds);
+int wg_socket_send_skb_to_peer(struct wg_peer *peer, struct sk_buff *skb,
+ u8 ds);
+int wg_socket_send_buffer_as_reply_to_skb(struct wg_device *wg,
+ struct sk_buff *in_skb,
+ void *out_buffer, size_t len);
+
+int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
+ const struct sk_buff *skb);
+void wg_socket_set_peer_endpoint(struct wg_peer *peer,
+ const struct endpoint *endpoint);
+void wg_socket_set_peer_endpoint_from_skb(struct wg_peer *peer,
+ const struct sk_buff *skb);
+void wg_socket_clear_peer_endpoint_src(struct wg_peer *peer);
+
+#if defined(CONFIG_DYNAMIC_DEBUG) || defined(DEBUG)
+#define net_dbg_skb_ratelimited(fmt, dev, skb, ...) do { \
+ struct endpoint __endpoint; \
+ wg_socket_endpoint_from_skb(&__endpoint, skb); \
+ net_dbg_ratelimited(fmt, dev, &__endpoint.addr, \
+ ##__VA_ARGS__); \
+ } while (0)
+#else
+#define net_dbg_skb_ratelimited(fmt, skb, ...)
+#endif
+
+#endif /* _WG_SOCKET_H */
diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c
new file mode 100644
index 000000000000..d54d32ac9bc4
--- /dev/null
+++ b/drivers/net/wireguard/timers.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#include "timers.h"
+#include "device.h"
+#include "peer.h"
+#include "queueing.h"
+#include "socket.h"
+
+/*
+ * - Timer for retransmitting the handshake if we don't hear back after
+ * `REKEY_TIMEOUT + jitter` ms.
+ *
+ * - Timer for sending empty packet if we have received a packet but after have
+ * not sent one for `KEEPALIVE_TIMEOUT` ms.
+ *
+ * - Timer for initiating new handshake if we have sent a packet but after have
+ * not received one (even empty) for `(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) +
+ * jitter` ms.
+ *
+ * - Timer for zeroing out all ephemeral keys after `(REJECT_AFTER_TIME * 3)` ms
+ * if no new keys have been received.
+ *
+ * - Timer for, if enabled, sending an empty authenticated packet every user-
+ * specified seconds.
+ */
+
+static inline void mod_peer_timer(struct wg_peer *peer,
+ struct timer_list *timer,
+ unsigned long expires)
+{
+ rcu_read_lock_bh();
+ if (likely(netif_running(peer->device->dev) &&
+ !READ_ONCE(peer->is_dead)))
+ mod_timer(timer, expires);
+ rcu_read_unlock_bh();
+}
+
+static void wg_expired_retransmit_handshake(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer,
+ timer_retransmit_handshake);
+
+ if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) {
+ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2);
+
+ del_timer(&peer->timer_send_keepalive);
+ /* We drop all packets without a keypair and don't try again,
+ * if we try unsuccessfully for too long to make a handshake.
+ */
+ wg_packet_purge_staged_packets(peer);
+
+ /* We set a timer for destroying any residue that might be left
+ * of a partial exchange.
+ */
+ if (!timer_pending(&peer->timer_zero_key_material))
+ mod_peer_timer(peer, &peer->timer_zero_key_material,
+ jiffies + REJECT_AFTER_TIME * 3 * HZ);
+ } else {
+ ++peer->timer_handshake_attempts;
+ pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr, REKEY_TIMEOUT,
+ peer->timer_handshake_attempts + 1);
+
+ /* We clear the endpoint address src address, in case this is
+ * the cause of trouble.
+ */
+ wg_socket_clear_peer_endpoint_src(peer);
+
+ wg_packet_send_queued_handshake_initiation(peer, true);
+ }
+}
+
+static void wg_expired_send_keepalive(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer, timer_send_keepalive);
+
+ wg_packet_send_keepalive(peer);
+ if (peer->timer_need_another_keepalive) {
+ peer->timer_need_another_keepalive = false;
+ mod_peer_timer(peer, &peer->timer_send_keepalive,
+ jiffies + KEEPALIVE_TIMEOUT * HZ);
+ }
+}
+
+static void wg_expired_new_handshake(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer, timer_new_handshake);
+
+ pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT);
+ /* We clear the endpoint address src address, in case this is the cause
+ * of trouble.
+ */
+ wg_socket_clear_peer_endpoint_src(peer);
+ wg_packet_send_queued_handshake_initiation(peer, false);
+}
+
+static void wg_expired_zero_key_material(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer, timer_zero_key_material);
+
+ rcu_read_lock_bh();
+ if (!READ_ONCE(peer->is_dead)) {
+ wg_peer_get(peer);
+ if (!queue_work(peer->device->handshake_send_wq,
+ &peer->clear_peer_work))
+ /* If the work was already on the queue, we want to drop
+ * the extra reference.
+ */
+ wg_peer_put(peer);
+ }
+ rcu_read_unlock_bh();
+}
+
+static void wg_queued_expired_zero_key_material(struct work_struct *work)
+{
+ struct wg_peer *peer = container_of(work, struct wg_peer,
+ clear_peer_work);
+
+ pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n",
+ peer->device->dev->name, peer->internal_id,
+ &peer->endpoint.addr, REJECT_AFTER_TIME * 3);
+ wg_noise_handshake_clear(&peer->handshake);
+ wg_noise_keypairs_clear(&peer->keypairs);
+ wg_peer_put(peer);
+}
+
+static void wg_expired_send_persistent_keepalive(struct timer_list *timer)
+{
+ struct wg_peer *peer = from_timer(peer, timer,
+ timer_persistent_keepalive);
+
+ if (likely(peer->persistent_keepalive_interval))
+ wg_packet_send_keepalive(peer);
+}
+
+/* Should be called after an authenticated data packet is sent. */
+void wg_timers_data_sent(struct wg_peer *peer)
+{
+ if (!timer_pending(&peer->timer_new_handshake))
+ mod_peer_timer(peer, &peer->timer_new_handshake,
+ jiffies + (KEEPALIVE_TIMEOUT + REKEY_TIMEOUT) * HZ +
+ prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES));
+}
+
+/* Should be called after an authenticated data packet is received. */
+void wg_timers_data_received(struct wg_peer *peer)
+{
+ if (likely(netif_running(peer->device->dev))) {
+ if (!timer_pending(&peer->timer_send_keepalive))
+ mod_peer_timer(peer, &peer->timer_send_keepalive,
+ jiffies + KEEPALIVE_TIMEOUT * HZ);
+ else
+ peer->timer_need_another_keepalive = true;
+ }
+}
+
+/* Should be called after any type of authenticated packet is sent, whether
+ * keepalive, data, or handshake.
+ */
+void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer)
+{
+ del_timer(&peer->timer_send_keepalive);
+}
+
+/* Should be called after any type of authenticated packet is received, whether
+ * keepalive, data, or handshake.
+ */
+void wg_timers_any_authenticated_packet_received(struct wg_peer *peer)
+{
+ del_timer(&peer->timer_new_handshake);
+}
+
+/* Should be called after a handshake initiation message is sent. */
+void wg_timers_handshake_initiated(struct wg_peer *peer)
+{
+ mod_peer_timer(peer, &peer->timer_retransmit_handshake,
+ jiffies + REKEY_TIMEOUT * HZ +
+ prandom_u32_max(REKEY_TIMEOUT_JITTER_MAX_JIFFIES));
+}
+
+/* Should be called after a handshake response message is received and processed
+ * or when getting key confirmation via the first data message.
+ */
+void wg_timers_handshake_complete(struct wg_peer *peer)
+{
+ del_timer(&peer->timer_retransmit_handshake);
+ peer->timer_handshake_attempts = 0;
+ peer->sent_lastminute_handshake = false;
+ ktime_get_real_ts64(&peer->walltime_last_handshake);
+}
+
+/* Should be called after an ephemeral key is created, which is before sending a
+ * handshake response or after receiving a handshake response.
+ */
+void wg_timers_session_derived(struct wg_peer *peer)
+{
+ mod_peer_timer(peer, &peer->timer_zero_key_material,
+ jiffies + REJECT_AFTER_TIME * 3 * HZ);
+}
+
+/* Should be called before a packet with authentication, whether
+ * keepalive, data, or handshakem is sent, or after one is received.
+ */
+void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer)
+{
+ if (peer->persistent_keepalive_interval)
+ mod_peer_timer(peer, &peer->timer_persistent_keepalive,
+ jiffies + peer->persistent_keepalive_interval * HZ);
+}
+
+void wg_timers_init(struct wg_peer *peer)
+{
+ timer_setup(&peer->timer_retransmit_handshake,
+ wg_expired_retransmit_handshake, 0);
+ timer_setup(&peer->timer_send_keepalive, wg_expired_send_keepalive, 0);
+ timer_setup(&peer->timer_new_handshake, wg_expired_new_handshake, 0);
+ timer_setup(&peer->timer_zero_key_material,
+ wg_expired_zero_key_material, 0);
+ timer_setup(&peer->timer_persistent_keepalive,
+ wg_expired_send_persistent_keepalive, 0);
+ INIT_WORK(&peer->clear_peer_work, wg_queued_expired_zero_key_material);
+ peer->timer_handshake_attempts = 0;
+ peer->sent_lastminute_handshake = false;
+ peer->timer_need_another_keepalive = false;
+}
+
+void wg_timers_stop(struct wg_peer *peer)
+{
+ del_timer_sync(&peer->timer_retransmit_handshake);
+ del_timer_sync(&peer->timer_send_keepalive);
+ del_timer_sync(&peer->timer_new_handshake);
+ del_timer_sync(&peer->timer_zero_key_material);
+ del_timer_sync(&peer->timer_persistent_keepalive);
+ flush_work(&peer->clear_peer_work);
+}
diff --git a/drivers/net/wireguard/timers.h b/drivers/net/wireguard/timers.h
new file mode 100644
index 000000000000..f0653dcb1326
--- /dev/null
+++ b/drivers/net/wireguard/timers.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#ifndef _WG_TIMERS_H
+#define _WG_TIMERS_H
+
+#include <linux/ktime.h>
+
+struct wg_peer;
+
+void wg_timers_init(struct wg_peer *peer);
+void wg_timers_stop(struct wg_peer *peer);
+void wg_timers_data_sent(struct wg_peer *peer);
+void wg_timers_data_received(struct wg_peer *peer);
+void wg_timers_any_authenticated_packet_sent(struct wg_peer *peer);
+void wg_timers_any_authenticated_packet_received(struct wg_peer *peer);
+void wg_timers_handshake_initiated(struct wg_peer *peer);
+void wg_timers_handshake_complete(struct wg_peer *peer);
+void wg_timers_session_derived(struct wg_peer *peer);
+void wg_timers_any_authenticated_packet_traversal(struct wg_peer *peer);
+
+static inline bool wg_birthdate_has_expired(u64 birthday_nanoseconds,
+ u64 expiration_seconds)
+{
+ return (s64)(birthday_nanoseconds + expiration_seconds * NSEC_PER_SEC)
+ <= (s64)ktime_get_coarse_boottime_ns();
+}
+
+#endif /* _WG_TIMERS_H */
diff --git a/drivers/net/wireguard/version.h b/drivers/net/wireguard/version.h
new file mode 100644
index 000000000000..a1a269a11634
--- /dev/null
+++ b/drivers/net/wireguard/version.h
@@ -0,0 +1 @@
+#define WIREGUARD_VERSION "1.0.0"
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 0969e9bf5d5e..7fee35ff966b 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -8962,6 +8962,7 @@ int ath10k_mac_register(struct ath10k *ar)
wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_VHT_IBSS);
wiphy_ext_feature_set(ar->hw->wiphy,
NL80211_EXT_FEATURE_SET_SCAN_DWELL);
+ wiphy_ext_feature_set(ar->hw->wiphy, NL80211_EXT_FEATURE_AQL);
if (test_bit(WMI_SERVICE_TX_DATA_ACK_RSSI, ar->wmi.svc_map) ||
test_bit(WMI_SERVICE_HTT_MGMT_TX_COMP_VALID_FLAGS, ar->wmi.svc_map))
diff --git a/drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c b/drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c
index 956fa7828d0c..56d1a7764b9f 100644
--- a/drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c
+++ b/drivers/net/wireless/ath/ath9k/ath9k_pci_owl_loader.c
@@ -83,7 +83,7 @@ static int ath9k_pci_fixup(struct pci_dev *pdev, const u16 *cal_data,
val = swahb32(val);
}
- __raw_writel(val, mem + reg);
+ iowrite32(val, mem + reg);
usleep_range(100, 120);
}
diff --git a/drivers/net/wireless/ath/wil6210/ethtool.c b/drivers/net/wireless/ath/wil6210/ethtool.c
index 912c4eaf017b..fef10886ca4a 100644
--- a/drivers/net/wireless/ath/wil6210/ethtool.c
+++ b/drivers/net/wireless/ath/wil6210/ethtool.c
@@ -11,26 +11,6 @@
#include "wil6210.h"
-static int wil_ethtoolops_begin(struct net_device *ndev)
-{
- struct wil6210_priv *wil = ndev_to_wil(ndev);
-
- mutex_lock(&wil->mutex);
-
- wil_dbg_misc(wil, "ethtoolops_begin\n");
-
- return 0;
-}
-
-static void wil_ethtoolops_complete(struct net_device *ndev)
-{
- struct wil6210_priv *wil = ndev_to_wil(ndev);
-
- wil_dbg_misc(wil, "ethtoolops_complete\n");
-
- mutex_unlock(&wil->mutex);
-}
-
static int wil_ethtoolops_get_coalesce(struct net_device *ndev,
struct ethtool_coalesce *cp)
{
@@ -39,11 +19,12 @@ static int wil_ethtoolops_get_coalesce(struct net_device *ndev,
u32 rx_itr_en, rx_itr_val = 0;
int ret;
+ mutex_lock(&wil->mutex);
wil_dbg_misc(wil, "ethtoolops_get_coalesce\n");
ret = wil_pm_runtime_get(wil);
if (ret < 0)
- return ret;
+ goto out;
tx_itr_en = wil_r(wil, RGF_DMA_ITR_TX_CNT_CTL);
if (tx_itr_en & BIT_DMA_ITR_TX_CNT_CTL_EN)
@@ -57,7 +38,11 @@ static int wil_ethtoolops_get_coalesce(struct net_device *ndev,
cp->tx_coalesce_usecs = tx_itr_val;
cp->rx_coalesce_usecs = rx_itr_val;
- return 0;
+ ret = 0;
+
+out:
+ mutex_unlock(&wil->mutex);
+ return ret;
}
static int wil_ethtoolops_set_coalesce(struct net_device *ndev,
@@ -67,12 +52,14 @@ static int wil_ethtoolops_set_coalesce(struct net_device *ndev,
struct wireless_dev *wdev = ndev->ieee80211_ptr;
int ret;
+ mutex_lock(&wil->mutex);
wil_dbg_misc(wil, "ethtoolops_set_coalesce: rx %d usec, tx %d usec\n",
cp->rx_coalesce_usecs, cp->tx_coalesce_usecs);
if (wdev->iftype == NL80211_IFTYPE_MONITOR) {
wil_dbg_misc(wil, "No IRQ coalescing in monitor mode\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
/* only @rx_coalesce_usecs and @tx_coalesce_usecs supported,
@@ -88,24 +75,26 @@ static int wil_ethtoolops_set_coalesce(struct net_device *ndev,
ret = wil_pm_runtime_get(wil);
if (ret < 0)
- return ret;
+ goto out;
wil->txrx_ops.configure_interrupt_moderation(wil);
wil_pm_runtime_put(wil);
+ ret = 0;
- return 0;
+out:
+ mutex_unlock(&wil->mutex);
+ return ret;
out_bad:
wil_dbg_misc(wil, "Unsupported coalescing params. Raw command:\n");
print_hex_dump_debug("DBG[MISC] coal ", DUMP_PREFIX_OFFSET, 16, 4,
cp, sizeof(*cp), false);
+ mutex_unlock(&wil->mutex);
return -EINVAL;
}
static const struct ethtool_ops wil_ethtool_ops = {
- .begin = wil_ethtoolops_begin,
- .complete = wil_ethtoolops_complete,
.get_drvinfo = cfg80211_get_drvinfo,
.get_coalesce = wil_ethtoolops_get_coalesce,
.set_coalesce = wil_ethtoolops_set_coalesce,
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
index 0579554ed4b3..3c505636b7cc 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
@@ -5834,7 +5834,7 @@ static int ipw2100_close(struct net_device *dev)
/*
* TODO: Fix this function... its just wrong
*/
-static void ipw2100_tx_timeout(struct net_device *dev)
+static void ipw2100_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ipw2100_priv *priv = libipw_priv(dev);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index dc5c02fbc65a..6a241d37a057 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -847,10 +847,7 @@ iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes,
else if (next)
consume_skb(skb);
- while (next) {
- tmp = next;
- next = tmp->next;
-
+ skb_list_walk_safe(next, tmp, next) {
memcpy(tmp->cb, cb, sizeof(tmp->cb));
/*
* Compute the length of all the data added for the A-MSDU.
@@ -880,9 +877,7 @@ iwl_mvm_tx_tso_segment(struct sk_buff *skb, unsigned int num_subframes,
skb_shinfo(tmp)->gso_size = 0;
}
- tmp->prev = NULL;
- tmp->next = NULL;
-
+ skb_mark_not_on_list(tmp);
__skb_queue_tail(mpdus_skb, tmp);
i++;
}
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 9d8ffbecf5be..97f227f3cbc3 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1142,17 +1142,17 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* same thing for QuZ... */
if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QUZ) {
if (cfg == &iwl_ax101_cfg_qu_hr)
- iwl_trans->cfg = &iwl_ax101_cfg_quz_hr;
+ cfg = &iwl_ax101_cfg_quz_hr;
else if (cfg == &iwl_ax201_cfg_qu_hr)
- iwl_trans->cfg = &iwl_ax201_cfg_quz_hr;
+ cfg = &iwl_ax201_cfg_quz_hr;
else if (cfg == &iwl9461_2ac_cfg_qu_b0_jf_b0)
- iwl_trans->cfg = &iwl9461_2ac_cfg_quz_a0_jf_b0_soc;
+ cfg = &iwl9461_2ac_cfg_quz_a0_jf_b0_soc;
else if (cfg == &iwl9462_2ac_cfg_qu_b0_jf_b0)
- iwl_trans->cfg = &iwl9462_2ac_cfg_quz_a0_jf_b0_soc;
+ cfg = &iwl9462_2ac_cfg_quz_a0_jf_b0_soc;
else if (cfg == &iwl9560_2ac_cfg_qu_b0_jf_b0)
- iwl_trans->cfg = &iwl9560_2ac_cfg_quz_a0_jf_b0_soc;
+ cfg = &iwl9560_2ac_cfg_quz_a0_jf_b0_soc;
else if (cfg == &iwl9560_2ac_160_cfg_qu_b0_jf_b0)
- iwl_trans->cfg = &iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc;
+ cfg = &iwl9560_2ac_160_cfg_quz_a0_jf_b0_soc;
}
#endif
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index c1bb7150489a..19a2c72081ab 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -57,24 +57,6 @@
#include "internal.h"
#include "fw/dbg.h"
-static int iwl_pcie_gen2_force_power_gating(struct iwl_trans *trans)
-{
- iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG,
- HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE);
- udelay(20);
- iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG,
- HPM_HIPM_GEN_CFG_CR_PG_EN |
- HPM_HIPM_GEN_CFG_CR_SLP_EN);
- udelay(20);
- iwl_clear_bits_prph(trans, HPM_HIPM_GEN_CFG,
- HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE);
-
- iwl_trans_sw_reset(trans);
- iwl_clear_bit(trans, CSR_GP_CNTRL, CSR_GP_CNTRL_REG_FLAG_INIT_DONE);
-
- return 0;
-}
-
/*
* Start up NIC's basic functionality after it has been reset
* (e.g. after platform boot, or shutdown via iwl_pcie_apm_stop())
@@ -110,13 +92,6 @@ int iwl_pcie_gen2_apm_init(struct iwl_trans *trans)
iwl_pcie_apm_config(trans);
- if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000 &&
- trans->cfg->integrated) {
- ret = iwl_pcie_gen2_force_power_gating(trans);
- if (ret)
- return ret;
- }
-
ret = iwl_finish_nic_init(trans, trans->trans_cfg);
if (ret)
return ret;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 3db1313f9270..7267134a4b0a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1778,6 +1778,29 @@ static int iwl_trans_pcie_clear_persistence_bit(struct iwl_trans *trans)
return 0;
}
+static int iwl_pcie_gen2_force_power_gating(struct iwl_trans *trans)
+{
+ int ret;
+
+ ret = iwl_finish_nic_init(trans, trans->trans_cfg);
+ if (ret < 0)
+ return ret;
+
+ iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG,
+ HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE);
+ udelay(20);
+ iwl_set_bits_prph(trans, HPM_HIPM_GEN_CFG,
+ HPM_HIPM_GEN_CFG_CR_PG_EN |
+ HPM_HIPM_GEN_CFG_CR_SLP_EN);
+ udelay(20);
+ iwl_clear_bits_prph(trans, HPM_HIPM_GEN_CFG,
+ HPM_HIPM_GEN_CFG_CR_FORCE_ACTIVE);
+
+ iwl_trans_pcie_sw_reset(trans);
+
+ return 0;
+}
+
static int _iwl_trans_pcie_start_hw(struct iwl_trans *trans)
{
struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
@@ -1797,6 +1820,13 @@ static int _iwl_trans_pcie_start_hw(struct iwl_trans *trans)
iwl_trans_pcie_sw_reset(trans);
+ if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_22000 &&
+ trans->cfg->integrated) {
+ err = iwl_pcie_gen2_force_power_gating(trans);
+ if (err)
+ return err;
+ }
+
err = iwl_pcie_apm_init(trans);
if (err)
return err;
diff --git a/drivers/net/wireless/intersil/hostap/hostap_main.c b/drivers/net/wireless/intersil/hostap/hostap_main.c
index 05466281afb6..de97b3304115 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_main.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_main.c
@@ -761,7 +761,7 @@ static void hostap_set_multicast_list(struct net_device *dev)
}
-static void prism2_tx_timeout(struct net_device *dev)
+static void prism2_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct hostap_interface *iface;
local_info_t *local;
diff --git a/drivers/net/wireless/intersil/orinoco/main.c b/drivers/net/wireless/intersil/orinoco/main.c
index 28dac36d7c4c..00264a14e52c 100644
--- a/drivers/net/wireless/intersil/orinoco/main.c
+++ b/drivers/net/wireless/intersil/orinoco/main.c
@@ -647,7 +647,7 @@ static void __orinoco_ev_txexc(struct net_device *dev, struct hermes *hw)
netif_wake_queue(dev);
}
-void orinoco_tx_timeout(struct net_device *dev)
+void orinoco_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct orinoco_private *priv = ndev_priv(dev);
struct net_device_stats *stats = &dev->stats;
diff --git a/drivers/net/wireless/intersil/orinoco/orinoco.h b/drivers/net/wireless/intersil/orinoco/orinoco.h
index 430862a6a24b..cdd026af100b 100644
--- a/drivers/net/wireless/intersil/orinoco/orinoco.h
+++ b/drivers/net/wireless/intersil/orinoco/orinoco.h
@@ -207,7 +207,7 @@ int orinoco_open(struct net_device *dev);
int orinoco_stop(struct net_device *dev);
void orinoco_set_multicast_list(struct net_device *dev);
int orinoco_change_mtu(struct net_device *dev, int new_mtu);
-void orinoco_tx_timeout(struct net_device *dev);
+void orinoco_tx_timeout(struct net_device *dev, unsigned int txqueue);
/********************************************************************/
/* Locking and synchronization functions */
diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.c b/drivers/net/wireless/intersil/prism54/islpci_eth.c
index 2b8fb07d07e7..8d680250a281 100644
--- a/drivers/net/wireless/intersil/prism54/islpci_eth.c
+++ b/drivers/net/wireless/intersil/prism54/islpci_eth.c
@@ -473,7 +473,7 @@ islpci_do_reset_and_wake(struct work_struct *work)
}
void
-islpci_eth_tx_timeout(struct net_device *ndev)
+islpci_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
islpci_private *priv = netdev_priv(ndev);
diff --git a/drivers/net/wireless/intersil/prism54/islpci_eth.h b/drivers/net/wireless/intersil/prism54/islpci_eth.h
index 61f4b43c6054..e433ccdc526b 100644
--- a/drivers/net/wireless/intersil/prism54/islpci_eth.h
+++ b/drivers/net/wireless/intersil/prism54/islpci_eth.h
@@ -53,7 +53,7 @@ struct avs_80211_1_header {
void islpci_eth_cleanup_transmit(islpci_private *, isl38xx_control_block *);
netdev_tx_t islpci_eth_transmit(struct sk_buff *, struct net_device *);
int islpci_eth_receive(islpci_private *);
-void islpci_eth_tx_timeout(struct net_device *);
+void islpci_eth_tx_timeout(struct net_device *, unsigned int txqueue);
void islpci_do_reset_and_wake(struct work_struct *);
#endif /* _ISL_GEN_H */
diff --git a/drivers/net/wireless/marvell/libertas/debugfs.c b/drivers/net/wireless/marvell/libertas/debugfs.c
index fe14814af300..c604613ab506 100644
--- a/drivers/net/wireless/marvell/libertas/debugfs.c
+++ b/drivers/net/wireless/marvell/libertas/debugfs.c
@@ -774,7 +774,7 @@ void lbs_debugfs_remove_one(struct lbs_private *priv)
#ifdef PROC_DEBUG
-#define item_size(n) (FIELD_SIZEOF(struct lbs_private, n))
+#define item_size(n) (sizeof_field(struct lbs_private, n))
#define item_addr(n) (offsetof(struct lbs_private, n))
diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c
index d14e55e3c9da..7d94695e7961 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.c
+++ b/drivers/net/wireless/marvell/mwifiex/main.c
@@ -1020,7 +1020,7 @@ static void mwifiex_set_multicast_list(struct net_device *dev)
* CFG802.11 network device handler for transmission timeout.
*/
static void
-mwifiex_tx_timeout(struct net_device *dev)
+mwifiex_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
index 74e50566db1f..6dd835f1efc2 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c
@@ -229,6 +229,14 @@ static int mwifiex_process_country_ie(struct mwifiex_private *priv,
"11D: skip setting domain info in FW\n");
return 0;
}
+
+ if (country_ie_len >
+ (IEEE80211_COUNTRY_STRING_LEN + MWIFIEX_MAX_TRIPLET_802_11D)) {
+ mwifiex_dbg(priv->adapter, ERROR,
+ "11D: country_ie_len overflow!, deauth AP\n");
+ return -EINVAL;
+ }
+
memcpy(priv->adapter->country_code, &country_ie[2], 2);
domain_info->country_code[0] = country_ie[2];
@@ -272,8 +280,9 @@ int mwifiex_bss_start(struct mwifiex_private *priv, struct cfg80211_bss *bss,
priv->scan_block = false;
if (bss) {
- if (adapter->region_code == 0x00)
- mwifiex_process_country_ie(priv, bss);
+ if (adapter->region_code == 0x00 &&
+ mwifiex_process_country_ie(priv, bss))
+ return -EINVAL;
/* Allocate and fill new bss descriptor */
bss_desc = kzalloc(sizeof(struct mwifiex_bssdescriptor),
diff --git a/drivers/net/wireless/marvell/mwifiex/tdls.c b/drivers/net/wireless/marvell/mwifiex/tdls.c
index 09313047beed..7caf1d26124a 100644
--- a/drivers/net/wireless/marvell/mwifiex/tdls.c
+++ b/drivers/net/wireless/marvell/mwifiex/tdls.c
@@ -953,59 +953,117 @@ void mwifiex_process_tdls_action_frame(struct mwifiex_private *priv,
switch (*pos) {
case WLAN_EID_SUPP_RATES:
+ if (pos[1] > 32)
+ return;
sta_ptr->tdls_cap.rates_len = pos[1];
for (i = 0; i < pos[1]; i++)
sta_ptr->tdls_cap.rates[i] = pos[i + 2];
break;
case WLAN_EID_EXT_SUPP_RATES:
+ if (pos[1] > 32)
+ return;
basic = sta_ptr->tdls_cap.rates_len;
+ if (pos[1] > 32 - basic)
+ return;
for (i = 0; i < pos[1]; i++)
sta_ptr->tdls_cap.rates[basic + i] = pos[i + 2];
sta_ptr->tdls_cap.rates_len += pos[1];
break;
case WLAN_EID_HT_CAPABILITY:
- memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos,
+ if (pos > end - sizeof(struct ieee80211_ht_cap) - 2)
+ return;
+ if (pos[1] != sizeof(struct ieee80211_ht_cap))
+ return;
+ /* copy the ie's value into ht_capb*/
+ memcpy((u8 *)&sta_ptr->tdls_cap.ht_capb, pos + 2,
sizeof(struct ieee80211_ht_cap));
sta_ptr->is_11n_enabled = 1;
break;
case WLAN_EID_HT_OPERATION:
- memcpy(&sta_ptr->tdls_cap.ht_oper, pos,
+ if (pos > end -
+ sizeof(struct ieee80211_ht_operation) - 2)
+ return;
+ if (pos[1] != sizeof(struct ieee80211_ht_operation))
+ return;
+ /* copy the ie's value into ht_oper*/
+ memcpy(&sta_ptr->tdls_cap.ht_oper, pos + 2,
sizeof(struct ieee80211_ht_operation));
break;
case WLAN_EID_BSS_COEX_2040:
+ if (pos > end - 3)
+ return;
+ if (pos[1] != 1)
+ return;
sta_ptr->tdls_cap.coex_2040 = pos[2];
break;
case WLAN_EID_EXT_CAPABILITY:
+ if (pos > end - sizeof(struct ieee_types_header))
+ return;
+ if (pos[1] < sizeof(struct ieee_types_header))
+ return;
+ if (pos[1] > 8)
+ return;
memcpy((u8 *)&sta_ptr->tdls_cap.extcap, pos,
sizeof(struct ieee_types_header) +
min_t(u8, pos[1], 8));
break;
case WLAN_EID_RSN:
+ if (pos > end - sizeof(struct ieee_types_header))
+ return;
+ if (pos[1] < sizeof(struct ieee_types_header))
+ return;
+ if (pos[1] > IEEE_MAX_IE_SIZE -
+ sizeof(struct ieee_types_header))
+ return;
memcpy((u8 *)&sta_ptr->tdls_cap.rsn_ie, pos,
sizeof(struct ieee_types_header) +
min_t(u8, pos[1], IEEE_MAX_IE_SIZE -
sizeof(struct ieee_types_header)));
break;
case WLAN_EID_QOS_CAPA:
+ if (pos > end - 3)
+ return;
+ if (pos[1] != 1)
+ return;
sta_ptr->tdls_cap.qos_info = pos[2];
break;
case WLAN_EID_VHT_OPERATION:
- if (priv->adapter->is_hw_11ac_capable)
- memcpy(&sta_ptr->tdls_cap.vhtoper, pos,
+ if (priv->adapter->is_hw_11ac_capable) {
+ if (pos > end -
+ sizeof(struct ieee80211_vht_operation) - 2)
+ return;
+ if (pos[1] !=
+ sizeof(struct ieee80211_vht_operation))
+ return;
+ /* copy the ie's value into vhtoper*/
+ memcpy(&sta_ptr->tdls_cap.vhtoper, pos + 2,
sizeof(struct ieee80211_vht_operation));
+ }
break;
case WLAN_EID_VHT_CAPABILITY:
if (priv->adapter->is_hw_11ac_capable) {
- memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos,
+ if (pos > end -
+ sizeof(struct ieee80211_vht_cap) - 2)
+ return;
+ if (pos[1] != sizeof(struct ieee80211_vht_cap))
+ return;
+ /* copy the ie's value into vhtcap*/
+ memcpy((u8 *)&sta_ptr->tdls_cap.vhtcap, pos + 2,
sizeof(struct ieee80211_vht_cap));
sta_ptr->is_11ac_enabled = 1;
}
break;
case WLAN_EID_AID:
- if (priv->adapter->is_hw_11ac_capable)
+ if (priv->adapter->is_hw_11ac_capable) {
+ if (pos > end - 4)
+ return;
+ if (pos[1] != 2)
+ return;
sta_ptr->tdls_cap.aid =
get_unaligned_le16((pos + 2));
+ }
+ break;
default:
break;
}
diff --git a/drivers/net/wireless/marvell/mwifiex/util.h b/drivers/net/wireless/marvell/mwifiex/util.h
index c386992abcdb..7cafcecd7b85 100644
--- a/drivers/net/wireless/marvell/mwifiex/util.h
+++ b/drivers/net/wireless/marvell/mwifiex/util.h
@@ -36,11 +36,11 @@ struct mwifiex_cb {
};
/* size/addr for mwifiex_debug_info */
-#define item_size(n) (FIELD_SIZEOF(struct mwifiex_debug_info, n))
+#define item_size(n) (sizeof_field(struct mwifiex_debug_info, n))
#define item_addr(n) (offsetof(struct mwifiex_debug_info, n))
/* size/addr for struct mwifiex_adapter */
-#define adapter_item_size(n) (FIELD_SIZEOF(struct mwifiex_adapter, n))
+#define adapter_item_size(n) (sizeof_field(struct mwifiex_adapter, n))
#define adapter_item_addr(n) (offsetof(struct mwifiex_adapter, n))
struct mwifiex_debug_data {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
index a03e2d01fba7..d1405528b504 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/eeprom.c
@@ -342,8 +342,11 @@ int mt76x0_eeprom_init(struct mt76x02_dev *dev)
dev_info(dev->mt76.dev, "EEPROM ver:%02hhx fae:%02hhx\n",
version, fae);
- mt76x02_mac_setaddr(dev, dev->mt76.eeprom.data + MT_EE_MAC_ADDR);
+ memcpy(dev->mt76.macaddr, (u8 *)dev->mt76.eeprom.data + MT_EE_MAC_ADDR,
+ ETH_ALEN);
mt76_eeprom_override(&dev->mt76);
+ mt76x02_mac_setaddr(dev, dev->mt76.macaddr);
+
mt76x0_set_chip_cap(dev);
mt76x0_set_freq_offset(dev);
mt76x0_set_temp_offset(dev);
diff --git a/drivers/net/wireless/quantenna/qtnfmac/core.c b/drivers/net/wireless/quantenna/qtnfmac/core.c
index 5fb598389487..648dfc38bd70 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/core.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/core.c
@@ -156,7 +156,7 @@ static void qtnf_netdev_get_stats64(struct net_device *ndev,
/* Netdev handler for transmission timeout.
*/
-static void qtnf_netdev_tx_timeout(struct net_device *ndev)
+static void qtnf_netdev_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct qtnf_vif *vif = qtnf_netdev_get_priv(ndev);
struct qtnf_wmac *mac;
diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c
index 007bf6803293..686161db8706 100644
--- a/drivers/net/wireless/wl3501_cs.c
+++ b/drivers/net/wireless/wl3501_cs.c
@@ -1285,7 +1285,7 @@ out:
return rc;
}
-static void wl3501_tx_timeout(struct net_device *dev)
+static void wl3501_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct net_device_stats *stats = &dev->stats;
int rc;
diff --git a/drivers/net/wireless/zydas/zd1201.c b/drivers/net/wireless/zydas/zd1201.c
index 0db7362bedb4..41641fc2be74 100644
--- a/drivers/net/wireless/zydas/zd1201.c
+++ b/drivers/net/wireless/zydas/zd1201.c
@@ -830,7 +830,7 @@ static netdev_tx_t zd1201_hard_start_xmit(struct sk_buff *skb,
return NETDEV_TX_OK;
}
-static void zd1201_tx_timeout(struct net_device *dev)
+static void zd1201_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct zd1201 *zd = netdev_priv(dev);
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 68dd7bb07ca6..0c8a02a1ead7 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -585,6 +585,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
struct net_device *dev = vif->dev;
void *addr;
struct xen_netif_ctrl_sring *shared;
+ RING_IDX rsp_prod, req_prod;
int err;
err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif),
@@ -593,7 +594,14 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref,
goto err;
shared = (struct xen_netif_ctrl_sring *)addr;
- BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE);
+ rsp_prod = READ_ONCE(shared->rsp_prod);
+ req_prod = READ_ONCE(shared->req_prod);
+
+ BACK_RING_ATTACH(&vif->ctrl, shared, rsp_prod, XEN_PAGE_SIZE);
+
+ err = -EIO;
+ if (req_prod - rsp_prod > RING_SIZE(&vif->ctrl))
+ goto err_unmap;
err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn);
if (err < 0)
@@ -628,18 +636,6 @@ err:
static void xenvif_disconnect_queue(struct xenvif_queue *queue)
{
- if (queue->tx_irq) {
- unbind_from_irqhandler(queue->tx_irq, queue);
- if (queue->tx_irq == queue->rx_irq)
- queue->rx_irq = 0;
- queue->tx_irq = 0;
- }
-
- if (queue->rx_irq) {
- unbind_from_irqhandler(queue->rx_irq, queue);
- queue->rx_irq = 0;
- }
-
if (queue->task) {
kthread_stop(queue->task);
queue->task = NULL;
@@ -655,6 +651,18 @@ static void xenvif_disconnect_queue(struct xenvif_queue *queue)
queue->napi.poll = NULL;
}
+ if (queue->tx_irq) {
+ unbind_from_irqhandler(queue->tx_irq, queue);
+ if (queue->tx_irq == queue->rx_irq)
+ queue->rx_irq = 0;
+ queue->tx_irq = 0;
+ }
+
+ if (queue->rx_irq) {
+ unbind_from_irqhandler(queue->rx_irq, queue);
+ queue->rx_irq = 0;
+ }
+
xenvif_unmap_frontend_data_rings(queue);
}
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 0020b2e8c279..315dfc6ea297 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1453,7 +1453,7 @@ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
void *addr;
struct xen_netif_tx_sring *txs;
struct xen_netif_rx_sring *rxs;
-
+ RING_IDX rsp_prod, req_prod;
int err = -ENOMEM;
err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
@@ -1462,7 +1462,14 @@ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
goto err;
txs = (struct xen_netif_tx_sring *)addr;
- BACK_RING_INIT(&queue->tx, txs, XEN_PAGE_SIZE);
+ rsp_prod = READ_ONCE(txs->rsp_prod);
+ req_prod = READ_ONCE(txs->req_prod);
+
+ BACK_RING_ATTACH(&queue->tx, txs, rsp_prod, XEN_PAGE_SIZE);
+
+ err = -EIO;
+ if (req_prod - rsp_prod > RING_SIZE(&queue->tx))
+ goto err;
err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
&rx_ring_ref, 1, &addr);
@@ -1470,7 +1477,14 @@ int xenvif_map_frontend_data_rings(struct xenvif_queue *queue,
goto err;
rxs = (struct xen_netif_rx_sring *)addr;
- BACK_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
+ rsp_prod = READ_ONCE(rxs->rsp_prod);
+ req_prod = READ_ONCE(rxs->req_prod);
+
+ BACK_RING_ATTACH(&queue->rx, rxs, rsp_prod, XEN_PAGE_SIZE);
+
+ err = -EIO;
+ if (req_prod - rsp_prod > RING_SIZE(&queue->rx))
+ goto err;
return 0;
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index f533b7372d59..286054b60d47 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -195,185 +195,6 @@ static void xenvif_debugfs_delif(struct xenvif *vif)
}
#endif /* CONFIG_DEBUG_FS */
-static int netback_remove(struct xenbus_device *dev)
-{
- struct backend_info *be = dev_get_drvdata(&dev->dev);
-
- set_backend_state(be, XenbusStateClosed);
-
- unregister_hotplug_status_watch(be);
- if (be->vif) {
- kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
- xen_unregister_watchers(be->vif);
- xenbus_rm(XBT_NIL, dev->nodename, "hotplug-status");
- xenvif_free(be->vif);
- be->vif = NULL;
- }
- kfree(be->hotplug_script);
- kfree(be);
- dev_set_drvdata(&dev->dev, NULL);
- return 0;
-}
-
-
-/**
- * Entry point to this code when a new device is created. Allocate the basic
- * structures and switch to InitWait.
- */
-static int netback_probe(struct xenbus_device *dev,
- const struct xenbus_device_id *id)
-{
- const char *message;
- struct xenbus_transaction xbt;
- int err;
- int sg;
- const char *script;
- struct backend_info *be = kzalloc(sizeof(struct backend_info),
- GFP_KERNEL);
- if (!be) {
- xenbus_dev_fatal(dev, -ENOMEM,
- "allocating backend structure");
- return -ENOMEM;
- }
-
- be->dev = dev;
- dev_set_drvdata(&dev->dev, be);
-
- be->state = XenbusStateInitialising;
- err = xenbus_switch_state(dev, XenbusStateInitialising);
- if (err)
- goto fail;
-
- sg = 1;
-
- do {
- err = xenbus_transaction_start(&xbt);
- if (err) {
- xenbus_dev_fatal(dev, err, "starting transaction");
- goto fail;
- }
-
- err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
- if (err) {
- message = "writing feature-sg";
- goto abort_transaction;
- }
-
- err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
- "%d", sg);
- if (err) {
- message = "writing feature-gso-tcpv4";
- goto abort_transaction;
- }
-
- err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
- "%d", sg);
- if (err) {
- message = "writing feature-gso-tcpv6";
- goto abort_transaction;
- }
-
- /* We support partial checksum setup for IPv6 packets */
- err = xenbus_printf(xbt, dev->nodename,
- "feature-ipv6-csum-offload",
- "%d", 1);
- if (err) {
- message = "writing feature-ipv6-csum-offload";
- goto abort_transaction;
- }
-
- /* We support rx-copy path. */
- err = xenbus_printf(xbt, dev->nodename,
- "feature-rx-copy", "%d", 1);
- if (err) {
- message = "writing feature-rx-copy";
- goto abort_transaction;
- }
-
- /*
- * We don't support rx-flip path (except old guests who don't
- * grok this feature flag).
- */
- err = xenbus_printf(xbt, dev->nodename,
- "feature-rx-flip", "%d", 0);
- if (err) {
- message = "writing feature-rx-flip";
- goto abort_transaction;
- }
-
- /* We support dynamic multicast-control. */
- err = xenbus_printf(xbt, dev->nodename,
- "feature-multicast-control", "%d", 1);
- if (err) {
- message = "writing feature-multicast-control";
- goto abort_transaction;
- }
-
- err = xenbus_printf(xbt, dev->nodename,
- "feature-dynamic-multicast-control",
- "%d", 1);
- if (err) {
- message = "writing feature-dynamic-multicast-control";
- goto abort_transaction;
- }
-
- err = xenbus_transaction_end(xbt, 0);
- } while (err == -EAGAIN);
-
- if (err) {
- xenbus_dev_fatal(dev, err, "completing transaction");
- goto fail;
- }
-
- /*
- * Split event channels support, this is optional so it is not
- * put inside the above loop.
- */
- err = xenbus_printf(XBT_NIL, dev->nodename,
- "feature-split-event-channels",
- "%u", separate_tx_rx_irq);
- if (err)
- pr_debug("Error writing feature-split-event-channels\n");
-
- /* Multi-queue support: This is an optional feature. */
- err = xenbus_printf(XBT_NIL, dev->nodename,
- "multi-queue-max-queues", "%u", xenvif_max_queues);
- if (err)
- pr_debug("Error writing multi-queue-max-queues\n");
-
- err = xenbus_printf(XBT_NIL, dev->nodename,
- "feature-ctrl-ring",
- "%u", true);
- if (err)
- pr_debug("Error writing feature-ctrl-ring\n");
-
- script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL);
- if (IS_ERR(script)) {
- err = PTR_ERR(script);
- xenbus_dev_fatal(dev, err, "reading script");
- goto fail;
- }
-
- be->hotplug_script = script;
-
-
- /* This kicks hotplug scripts, so do it immediately. */
- err = backend_create_xenvif(be);
- if (err)
- goto fail;
-
- return 0;
-
-abort_transaction:
- xenbus_transaction_end(xbt, 1);
- xenbus_dev_fatal(dev, err, "%s", message);
-fail:
- pr_debug("failed\n");
- netback_remove(dev);
- return err;
-}
-
-
/*
* Handle the creation of the hotplug script environment. We add the script
* and vif variables to the environment, for the benefit of the vif-* hotplug
@@ -827,6 +648,7 @@ static void hotplug_status_changed(struct xenbus_watch *watch,
/* Not interested in this watch anymore. */
unregister_hotplug_status_watch(be);
+ xenbus_rm(XBT_NIL, be->dev->nodename, "hotplug-status");
}
kfree(str);
}
@@ -1128,6 +950,174 @@ static int read_xenbus_vif_flags(struct backend_info *be)
return 0;
}
+static int netback_remove(struct xenbus_device *dev)
+{
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
+
+ unregister_hotplug_status_watch(be);
+ if (be->vif) {
+ kobject_uevent(&dev->dev.kobj, KOBJ_OFFLINE);
+ backend_disconnect(be);
+ xenvif_free(be->vif);
+ be->vif = NULL;
+ }
+ kfree(be->hotplug_script);
+ kfree(be);
+ dev_set_drvdata(&dev->dev, NULL);
+ return 0;
+}
+
+/**
+ * Entry point to this code when a new device is created. Allocate the basic
+ * structures and switch to InitWait.
+ */
+static int netback_probe(struct xenbus_device *dev,
+ const struct xenbus_device_id *id)
+{
+ const char *message;
+ struct xenbus_transaction xbt;
+ int err;
+ int sg;
+ const char *script;
+ struct backend_info *be = kzalloc(sizeof(*be), GFP_KERNEL);
+
+ if (!be) {
+ xenbus_dev_fatal(dev, -ENOMEM,
+ "allocating backend structure");
+ return -ENOMEM;
+ }
+
+ be->dev = dev;
+ dev_set_drvdata(&dev->dev, be);
+
+ sg = 1;
+
+ do {
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_dev_fatal(dev, err, "starting transaction");
+ goto fail;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-sg", "%d", sg);
+ if (err) {
+ message = "writing feature-sg";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv4",
+ "%d", sg);
+ if (err) {
+ message = "writing feature-gso-tcpv4";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
+ "%d", sg);
+ if (err) {
+ message = "writing feature-gso-tcpv6";
+ goto abort_transaction;
+ }
+
+ /* We support partial checksum setup for IPv6 packets */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-ipv6-csum-offload",
+ "%d", 1);
+ if (err) {
+ message = "writing feature-ipv6-csum-offload";
+ goto abort_transaction;
+ }
+
+ /* We support rx-copy path. */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-rx-copy", "%d", 1);
+ if (err) {
+ message = "writing feature-rx-copy";
+ goto abort_transaction;
+ }
+
+ /* We don't support rx-flip path (except old guests who
+ * don't grok this feature flag).
+ */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-rx-flip", "%d", 0);
+ if (err) {
+ message = "writing feature-rx-flip";
+ goto abort_transaction;
+ }
+
+ /* We support dynamic multicast-control. */
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-multicast-control", "%d", 1);
+ if (err) {
+ message = "writing feature-multicast-control";
+ goto abort_transaction;
+ }
+
+ err = xenbus_printf(xbt, dev->nodename,
+ "feature-dynamic-multicast-control",
+ "%d", 1);
+ if (err) {
+ message = "writing feature-dynamic-multicast-control";
+ goto abort_transaction;
+ }
+
+ err = xenbus_transaction_end(xbt, 0);
+ } while (err == -EAGAIN);
+
+ if (err) {
+ xenbus_dev_fatal(dev, err, "completing transaction");
+ goto fail;
+ }
+
+ /* Split event channels support, this is optional so it is not
+ * put inside the above loop.
+ */
+ err = xenbus_printf(XBT_NIL, dev->nodename,
+ "feature-split-event-channels",
+ "%u", separate_tx_rx_irq);
+ if (err)
+ pr_debug("Error writing feature-split-event-channels\n");
+
+ /* Multi-queue support: This is an optional feature. */
+ err = xenbus_printf(XBT_NIL, dev->nodename,
+ "multi-queue-max-queues", "%u", xenvif_max_queues);
+ if (err)
+ pr_debug("Error writing multi-queue-max-queues\n");
+
+ err = xenbus_printf(XBT_NIL, dev->nodename,
+ "feature-ctrl-ring",
+ "%u", true);
+ if (err)
+ pr_debug("Error writing feature-ctrl-ring\n");
+
+ backend_switch_state(be, XenbusStateInitWait);
+
+ script = xenbus_read(XBT_NIL, dev->nodename, "script", NULL);
+ if (IS_ERR(script)) {
+ err = PTR_ERR(script);
+ xenbus_dev_fatal(dev, err, "reading script");
+ goto fail;
+ }
+
+ be->hotplug_script = script;
+
+ /* This kicks hotplug scripts, so do it immediately. */
+ err = backend_create_xenvif(be);
+ if (err)
+ goto fail;
+
+ return 0;
+
+abort_transaction:
+ xenbus_transaction_end(xbt, 1);
+ xenbus_dev_fatal(dev, err, "%s", message);
+fail:
+ pr_debug("failed\n");
+ netback_remove(dev);
+ return err;
+}
+
static const struct xenbus_device_id netback_ids[] = {
{ "vif" },
{ "" }
@@ -1139,6 +1129,7 @@ static struct xenbus_driver netback_driver = {
.remove = netback_remove,
.uevent = netback_uevent,
.otherend_changed = frontend_changed,
+ .allow_rebind = true,
};
int xenvif_xenbus_init(void)
diff --git a/drivers/nfc/nxp-nci/i2c.c b/drivers/nfc/nxp-nci/i2c.c
index 4d1909aecd6c..9f60e4dc5a90 100644
--- a/drivers/nfc/nxp-nci/i2c.c
+++ b/drivers/nfc/nxp-nci/i2c.c
@@ -278,7 +278,7 @@ static int nxp_nci_i2c_probe(struct i2c_client *client,
r = devm_acpi_dev_add_driver_gpios(dev, acpi_nxp_nci_gpios);
if (r)
- return r;
+ dev_dbg(dev, "Unable to add GPIO mapping table\n");
phy->gpiod_en = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
if (IS_ERR(phy->gpiod_en)) {
diff --git a/drivers/nfc/pn544/pn544.c b/drivers/nfc/pn544/pn544.c
index cda996f6954e..2b83156efe3f 100644
--- a/drivers/nfc/pn544/pn544.c
+++ b/drivers/nfc/pn544/pn544.c
@@ -693,7 +693,7 @@ static int pn544_hci_check_presence(struct nfc_hci_dev *hdev,
target->nfcid1_len != 10)
return -EOPNOTSUPP;
- return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE,
+ return nfc_hci_send_cmd(hdev, NFC_HCI_RF_READER_A_GATE,
PN544_RF_READER_CMD_ACTIVATE_NEXT,
target->nfcid1, target->nfcid1_len, NULL);
} else if (target->supported_protocols & (NFC_PROTO_JEWEL_MASK |
diff --git a/drivers/nfc/port100.c b/drivers/nfc/port100.c
index 604dba4f18af..8e4d355dc3ae 100644
--- a/drivers/nfc/port100.c
+++ b/drivers/nfc/port100.c
@@ -565,7 +565,7 @@ static void port100_tx_update_payload_len(void *_frame, int len)
{
struct port100_frame *frame = _frame;
- frame->datalen = cpu_to_le16(le16_to_cpu(frame->datalen) + len);
+ le16_add_cpu(&frame->datalen, len);
}
static bool port100_rx_frame_is_valid(void *_frame)
diff --git a/drivers/nfc/s3fwrn5/firmware.c b/drivers/nfc/s3fwrn5/firmware.c
index be110d9cef02..de613c623a2c 100644
--- a/drivers/nfc/s3fwrn5/firmware.c
+++ b/drivers/nfc/s3fwrn5/firmware.c
@@ -507,7 +507,10 @@ int s3fwrn5_fw_recv_frame(struct nci_dev *ndev, struct sk_buff *skb)
struct s3fwrn5_info *info = nci_get_drvdata(ndev);
struct s3fwrn5_fw_info *fw_info = &info->fw_info;
- BUG_ON(fw_info->rsp);
+ if (WARN_ON(fw_info->rsp)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
fw_info->rsp = skb;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index dfe37a525f3a..667f18f465be 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1735,6 +1735,8 @@ static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
if (ret)
dev_warn(ctrl->device,
"Identify Descriptors failed (%d)\n", ret);
+ if (ret > 0)
+ ret = 0;
}
return ret;
}
@@ -2852,6 +2854,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
* admin connect
*/
if (ctrl->cntlid != le16_to_cpu(id->cntlid)) {
+ dev_err(ctrl->device,
+ "Mismatching cntlid: Connect %u vs Identify "
+ "%u, rejecting\n",
+ ctrl->cntlid, le16_to_cpu(id->cntlid));
ret = -EINVAL;
goto out_free;
}
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 679a721ae229..5a70ac395d53 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -95,7 +95,7 @@ struct nvme_fc_fcp_op {
struct nvme_fcp_op_w_sgl {
struct nvme_fc_fcp_op op;
- struct scatterlist sgl[SG_CHUNK_SIZE];
+ struct scatterlist sgl[NVME_INLINE_SG_CNT];
uint8_t priv[0];
};
@@ -342,7 +342,8 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
!template->ls_req || !template->fcp_io ||
!template->ls_abort || !template->fcp_abort ||
!template->max_hw_queues || !template->max_sgl_segments ||
- !template->max_dif_sgl_segments || !template->dma_boundary) {
+ !template->max_dif_sgl_segments || !template->dma_boundary ||
+ !template->module) {
ret = -EINVAL;
goto out_reghost_failed;
}
@@ -2015,6 +2016,7 @@ nvme_fc_ctrl_free(struct kref *ref)
{
struct nvme_fc_ctrl *ctrl =
container_of(ref, struct nvme_fc_ctrl, ref);
+ struct nvme_fc_lport *lport = ctrl->lport;
unsigned long flags;
if (ctrl->ctrl.tagset) {
@@ -2041,6 +2043,7 @@ nvme_fc_ctrl_free(struct kref *ref)
if (ctrl->ctrl.opts)
nvmf_free_options(ctrl->ctrl.opts);
kfree(ctrl);
+ module_put(lport->ops->module);
}
static void
@@ -2141,7 +2144,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
freq->sg_table.sgl = freq->first_sgl;
ret = sg_alloc_table_chained(&freq->sg_table,
blk_rq_nr_phys_segments(rq), freq->sg_table.sgl,
- SG_CHUNK_SIZE);
+ NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
@@ -2150,7 +2153,7 @@ nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl,
op->nents, rq_dma_dir(rq));
if (unlikely(freq->sg_cnt <= 0)) {
- sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
freq->sg_cnt = 0;
return -EFAULT;
}
@@ -2173,7 +2176,7 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq,
fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents,
rq_dma_dir(rq));
- sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&freq->sg_table, NVME_INLINE_SG_CNT);
freq->sg_cnt = 0;
}
@@ -2910,10 +2913,22 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
static void
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
{
- nvme_stop_keep_alive(&ctrl->ctrl);
+ /*
+ * if state is connecting - the error occurred as part of a
+ * reconnect attempt. The create_association error paths will
+ * clean up any outstanding io.
+ *
+ * if it's a different state - ensure all pending io is
+ * terminated. Given this can delay while waiting for the
+ * aborted io to return, we recheck adapter state below
+ * before changing state.
+ */
+ if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
+ nvme_stop_keep_alive(&ctrl->ctrl);
- /* will block will waiting for io to terminate */
- nvme_fc_delete_association(ctrl);
+ /* will block will waiting for io to terminate */
+ nvme_fc_delete_association(ctrl);
+ }
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
@@ -3059,10 +3074,15 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
goto out_fail;
}
+ if (!try_module_get(lport->ops->module)) {
+ ret = -EUNATCH;
+ goto out_free_ctrl;
+ }
+
idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL);
if (idx < 0) {
ret = -ENOSPC;
- goto out_free_ctrl;
+ goto out_mod_put;
}
ctrl->ctrl.opts = opts;
@@ -3215,6 +3235,8 @@ out_free_queues:
out_free_ida:
put_device(ctrl->dev);
ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum);
+out_mod_put:
+ module_put(lport->ops->module);
out_free_ctrl:
kfree(ctrl);
out_fail:
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 3b9cbe0668fa..1024fec7914c 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -28,6 +28,12 @@ extern unsigned int admin_timeout;
#define NVME_DEFAULT_KATO 5
#define NVME_KATO_GRACE 10
+#ifdef CONFIG_ARCH_NO_SG_CHAIN
+#define NVME_INLINE_SG_CNT 0
+#else
+#define NVME_INLINE_SG_CNT 2
+#endif
+
extern struct workqueue_struct *nvme_wq;
extern struct workqueue_struct *nvme_reset_wq;
extern struct workqueue_struct *nvme_delete_wq;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index dcaad5831cee..365a2ddbeaa7 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -68,14 +68,14 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
-static int write_queues;
-module_param(write_queues, int, 0644);
+static unsigned int write_queues;
+module_param(write_queues, uint, 0644);
MODULE_PARM_DESC(write_queues,
"Number of queues to use for writes. If not set, reads and writes "
"will share a queue set.");
-static int poll_queues;
-module_param(poll_queues, int, 0644);
+static unsigned int poll_queues;
+module_param(poll_queues, uint, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
@@ -176,7 +176,6 @@ struct nvme_queue {
u16 sq_tail;
u16 last_sq_tail;
u16 cq_head;
- u16 last_cq_head;
u16 qid;
u8 cq_phase;
u8 sqes;
@@ -1026,10 +1025,7 @@ static irqreturn_t nvme_irq(int irq, void *data)
* the irq handler, even if that was on another CPU.
*/
rmb();
- if (nvmeq->cq_head != nvmeq->last_cq_head)
- ret = IRQ_HANDLED;
nvme_process_cq(nvmeq, &start, &end, -1);
- nvmeq->last_cq_head = nvmeq->cq_head;
wmb();
if (start != end) {
@@ -1549,7 +1545,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
return result;
- else if (result)
+ if (result)
goto release_cq;
nvmeq->cq_vector = vector;
@@ -2058,7 +2054,6 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
.priv = dev,
};
unsigned int irq_queues, this_p_queues;
- unsigned int nr_cpus = num_possible_cpus();
/*
* Poll queues don't need interrupts, but we need at least one IO
@@ -2069,10 +2064,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
} else {
- if (nr_cpus < nr_io_queues - this_p_queues)
- irq_queues = nr_cpus + 1;
- else
- irq_queues = nr_io_queues - this_p_queues + 1;
+ irq_queues = nr_io_queues - this_p_queues + 1;
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
@@ -3142,6 +3134,9 @@ static int __init nvme_init(void)
BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
+
+ write_queues = min(write_queues, num_possible_cpus());
+ poll_queues = min(poll_queues, num_possible_cpus());
return pci_register_driver(&nvme_driver);
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index dce59459ed41..2a47c6c5007e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -731,7 +731,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
@@ -745,7 +745,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
@@ -1160,7 +1160,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
}
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
- sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
@@ -1276,7 +1276,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
req->sg_table.sgl = req->first_sgl;
ret = sg_alloc_table_chained(&req->sg_table,
blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
- SG_CHUNK_SIZE);
+ NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
@@ -1314,7 +1314,7 @@ out:
out_unmap_sg:
ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
out_free_table:
- sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
return ret;
}
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index b50b53db3746..1c50af6219f3 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -850,6 +850,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
#define FCLOOP_DMABOUND_4G 0xFFFFFFFF
static struct nvme_fc_port_template fctemplate = {
+ .module = THIS_MODULE,
.localport_delete = fcloop_localport_delete,
.remoteport_delete = fcloop_remoteport_delete,
.create_queue = fcloop_create_queue,
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index a758bb3d5dd4..4df4ebde208a 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -76,7 +76,7 @@ static void nvme_loop_complete_rq(struct request *req)
{
struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
- sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE);
+ sg_free_table_chained(&iod->sg_table, NVME_INLINE_SG_CNT);
nvme_complete_rq(req);
}
@@ -156,7 +156,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
iod->sg_table.sgl = iod->first_sgl;
if (sg_alloc_table_chained(&iod->sg_table,
blk_rq_nr_phys_segments(req),
- iod->sg_table.sgl, SG_CHUNK_SIZE)) {
+ iod->sg_table.sgl, NVME_INLINE_SG_CNT)) {
nvme_cleanup_cmd(req);
return BLK_STS_RESOURCE;
}
@@ -342,7 +342,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->admin_tag_set.driver_data = ctrl;
ctrl->admin_tag_set.nr_hw_queues = 1;
ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
@@ -516,7 +516,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
ctrl->tag_set.numa_node = NUMA_NO_NODE;
ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
- SG_CHUNK_SIZE * sizeof(struct scatterlist);
+ NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
ctrl->tag_set.driver_data = ctrl;
ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1;
ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index c6b87ce2b0cc..f5c2a5487761 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -42,14 +42,37 @@ static int of_get_phy_id(struct device_node *device, u32 *phy_id)
return -EINVAL;
}
+static struct mii_timestamper *of_find_mii_timestamper(struct device_node *node)
+{
+ struct of_phandle_args arg;
+ int err;
+
+ err = of_parse_phandle_with_fixed_args(node, "timestamper", 1, 0, &arg);
+
+ if (err == -ENOENT)
+ return NULL;
+ else if (err)
+ return ERR_PTR(err);
+
+ if (arg.args_count != 1)
+ return ERR_PTR(-EINVAL);
+
+ return register_mii_timestamper(arg.np, arg.args[0]);
+}
+
static int of_mdiobus_register_phy(struct mii_bus *mdio,
struct device_node *child, u32 addr)
{
+ struct mii_timestamper *mii_ts;
struct phy_device *phy;
bool is_c45;
int rc;
u32 phy_id;
+ mii_ts = of_find_mii_timestamper(child);
+ if (IS_ERR(mii_ts))
+ return PTR_ERR(mii_ts);
+
is_c45 = of_device_is_compatible(child,
"ethernet-phy-ieee802.3-c45");
@@ -57,11 +80,14 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
phy = phy_device_create(mdio, addr, phy_id, 0, NULL);
else
phy = get_phy_device(mdio, addr, is_c45);
- if (IS_ERR(phy))
+ if (IS_ERR(phy)) {
+ unregister_mii_timestamper(mii_ts);
return PTR_ERR(phy);
+ }
rc = of_irq_get(child, 0);
if (rc == -EPROBE_DEFER) {
+ unregister_mii_timestamper(mii_ts);
phy_device_free(phy);
return rc;
}
@@ -90,10 +116,12 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
* register it */
rc = phy_device_register(phy);
if (rc) {
+ unregister_mii_timestamper(mii_ts);
phy_device_free(phy);
of_node_put(child);
return rc;
}
+ phy->mii_ts = mii_ts;
dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n",
child, addr);
@@ -162,7 +190,7 @@ static const struct of_device_id whitelist_phys[] = {
* A device which is not a phy is expected to have a compatible string
* indicating what sort of device it is.
*/
-static bool of_mdiobus_child_is_phy(struct device_node *child)
+bool of_mdiobus_child_is_phy(struct device_node *child)
{
u32 phy_id;
@@ -187,6 +215,7 @@ static bool of_mdiobus_child_is_phy(struct device_node *child)
return false;
}
+EXPORT_SYMBOL(of_mdiobus_child_is_phy);
/**
* of_mdiobus_register - Register mii_bus and create PHYs from the device tree
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index d93891a05f60..3371e4a06248 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -518,10 +518,11 @@ static int __init of_platform_default_populate_init(void)
{
struct device_node *node;
+ device_links_supplier_sync_state_pause();
+
if (!of_have_populated_dt())
return -ENODEV;
- device_links_supplier_sync_state_pause();
/*
* Handle certain compatibles explicitly, since we don't want to create
* platform_devices for every node in /reserved-memory with a
@@ -545,8 +546,7 @@ arch_initcall_sync(of_platform_default_populate_init);
static int __init of_platform_sync_state_init(void)
{
- if (of_have_populated_dt())
- device_links_supplier_sync_state_resume();
+ device_links_supplier_sync_state_resume();
return 0;
}
late_initcall_sync(of_platform_sync_state_init);
diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c
index d9b63bfa5dd7..94af6f5828a3 100644
--- a/drivers/pci/controller/pcie-rockchip-host.c
+++ b/drivers/pci/controller/pcie-rockchip-host.c
@@ -834,10 +834,12 @@ static int rockchip_pcie_cfg_atu(struct rockchip_pcie *rockchip)
if (!entry)
return -ENODEV;
+ /* store the register number offset to program RC io outbound ATU */
+ offset = size >> 20;
+
size = resource_size(entry->res);
pci_addr = entry->res->start - entry->offset;
- offset = size >> 20;
for (reg_no = 0; reg_no < (size >> 20); reg_no++) {
err = rockchip_pcie_prog_ob_atu(rockchip,
reg_no + 1 + offset,
diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c
index 773128f411f1..d704eccc548f 100644
--- a/drivers/perf/arm_smmuv3_pmu.c
+++ b/drivers/perf/arm_smmuv3_pmu.c
@@ -814,7 +814,7 @@ static int smmu_pmu_probe(struct platform_device *pdev)
if (err) {
dev_err(dev, "Error %d registering hotplug, PMU @%pa\n",
err, &res_0->start);
- goto out_cpuhp_err;
+ return err;
}
err = perf_pmu_register(&smmu_pmu->pmu, name, -1);
@@ -833,8 +833,6 @@ static int smmu_pmu_probe(struct platform_device *pdev)
out_unregister:
cpuhp_state_remove_instance_nocalls(cpuhp_state_num, &smmu_pmu->node);
-out_cpuhp_err:
- put_cpu();
return err;
}
diff --git a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
index e3b87c94aaf6..e41367f36ee1 100644
--- a/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
+++ b/drivers/phy/marvell/phy-mvebu-cp110-comphy.c
@@ -221,7 +221,7 @@ static const struct mvebu_comphy_conf mvebu_comphy_cp110_modes[] = {
ETH_CONF(2, 0, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
ETH_CONF(2, 0, PHY_INTERFACE_MODE_2500BASEX, 0x1, COMPHY_FW_MODE_HS_SGMII),
ETH_CONF(2, 0, PHY_INTERFACE_MODE_RXAUI, 0x1, COMPHY_FW_MODE_RXAUI),
- ETH_CONF(2, 0, PHY_INTERFACE_MODE_10GKR, 0x1, COMPHY_FW_MODE_XFI),
+ ETH_CONF(2, 0, PHY_INTERFACE_MODE_10GBASER, 0x1, COMPHY_FW_MODE_XFI),
GEN_CONF(2, 0, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
GEN_CONF(2, 0, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
GEN_CONF(2, 0, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
@@ -235,14 +235,14 @@ static const struct mvebu_comphy_conf mvebu_comphy_cp110_modes[] = {
/* lane 4 */
ETH_CONF(4, 0, PHY_INTERFACE_MODE_SGMII, 0x2, COMPHY_FW_MODE_SGMII),
ETH_CONF(4, 0, PHY_INTERFACE_MODE_2500BASEX, 0x2, COMPHY_FW_MODE_HS_SGMII),
- ETH_CONF(4, 0, PHY_INTERFACE_MODE_10GKR, 0x2, COMPHY_FW_MODE_XFI),
+ ETH_CONF(4, 0, PHY_INTERFACE_MODE_10GBASER, 0x2, COMPHY_FW_MODE_XFI),
ETH_CONF(4, 0, PHY_INTERFACE_MODE_RXAUI, 0x2, COMPHY_FW_MODE_RXAUI),
GEN_CONF(4, 0, PHY_MODE_USB_DEVICE_SS, COMPHY_FW_MODE_USB3D),
GEN_CONF(4, 1, PHY_MODE_USB_HOST_SS, COMPHY_FW_MODE_USB3H),
GEN_CONF(4, 1, PHY_MODE_PCIE, COMPHY_FW_MODE_PCIE),
ETH_CONF(4, 1, PHY_INTERFACE_MODE_SGMII, 0x1, COMPHY_FW_MODE_SGMII),
ETH_CONF(4, 1, PHY_INTERFACE_MODE_2500BASEX, -1, COMPHY_FW_MODE_HS_SGMII),
- ETH_CONF(4, 1, PHY_INTERFACE_MODE_10GKR, -1, COMPHY_FW_MODE_XFI),
+ ETH_CONF(4, 1, PHY_INTERFACE_MODE_10GBASER, -1, COMPHY_FW_MODE_XFI),
/* lane 5 */
ETH_CONF(5, 1, PHY_INTERFACE_MODE_RXAUI, 0x2, COMPHY_FW_MODE_RXAUI),
GEN_CONF(5, 1, PHY_MODE_SATA, COMPHY_FW_MODE_SATA),
@@ -342,7 +342,7 @@ static int mvebu_comphy_ethernet_init_reset(struct mvebu_comphy_lane *lane)
MVEBU_COMPHY_SERDES_CFG0_RXAUI_MODE);
switch (lane->submode) {
- case PHY_INTERFACE_MODE_10GKR:
+ case PHY_INTERFACE_MODE_10GBASER:
val |= MVEBU_COMPHY_SERDES_CFG0_GEN_RX(0xe) |
MVEBU_COMPHY_SERDES_CFG0_GEN_TX(0xe);
break;
@@ -417,7 +417,7 @@ static int mvebu_comphy_ethernet_init_reset(struct mvebu_comphy_lane *lane)
/* refclk selection */
val = readl(priv->base + MVEBU_COMPHY_MISC_CTRL0(lane->id));
val &= ~MVEBU_COMPHY_MISC_CTRL0_REFCLK_SEL;
- if (lane->submode == PHY_INTERFACE_MODE_10GKR)
+ if (lane->submode == PHY_INTERFACE_MODE_10GBASER)
val |= MVEBU_COMPHY_MISC_CTRL0_ICP_FORCE;
writel(val, priv->base + MVEBU_COMPHY_MISC_CTRL0(lane->id));
@@ -564,7 +564,7 @@ static int mvebu_comphy_set_mode_rxaui(struct phy *phy)
return mvebu_comphy_init_plls(lane);
}
-static int mvebu_comphy_set_mode_10gkr(struct phy *phy)
+static int mvebu_comphy_set_mode_10gbaser(struct phy *phy)
{
struct mvebu_comphy_lane *lane = phy_get_drvdata(phy);
struct mvebu_comphy_priv *priv = lane->priv;
@@ -735,8 +735,8 @@ static int mvebu_comphy_power_on_legacy(struct phy *phy)
case PHY_INTERFACE_MODE_RXAUI:
ret = mvebu_comphy_set_mode_rxaui(phy);
break;
- case PHY_INTERFACE_MODE_10GKR:
- ret = mvebu_comphy_set_mode_10gkr(phy);
+ case PHY_INTERFACE_MODE_10GBASER:
+ ret = mvebu_comphy_set_mode_10gbaser(phy);
break;
default:
return -ENOTSUPP;
@@ -782,8 +782,8 @@ static int mvebu_comphy_power_on(struct phy *phy)
lane->id);
fw_speed = COMPHY_FW_SPEED_3125;
break;
- case PHY_INTERFACE_MODE_10GKR:
- dev_dbg(priv->dev, "set lane %d to 10G-KR mode\n",
+ case PHY_INTERFACE_MODE_10GBASER:
+ dev_dbg(priv->dev, "set lane %d to 10GBASE-R mode\n",
lane->id);
fw_speed = COMPHY_FW_SPEED_103125;
break;
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 3bfbf2ff6e2b..df0ef69dd474 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -422,6 +422,7 @@ config PINCTRL_TB10X
config PINCTRL_EQUILIBRIUM
tristate "Generic pinctrl and GPIO driver for Intel Lightning Mountain SoC"
+ depends on OF && HAS_IOMEM
select PINMUX
select PINCONF
select GPIOLIB
diff --git a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
index c6800d220920..bb07024d22ed 100644
--- a/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
+++ b/drivers/pinctrl/aspeed/pinctrl-aspeed-g6.c
@@ -1088,60 +1088,52 @@ SSSF_PIN_DECL(AF15, GPIOV7, LPCSMI, SIG_DESC_SET(SCU434, 15));
#define AB7 176
SIG_EXPR_LIST_DECL_SESG(AB7, LAD0, LPC, SIG_DESC_SET(SCU434, 16),
- SIG_DESC_CLEAR(SCU510, 6));
-SIG_EXPR_LIST_DECL_SESG(AB7, ESPID0, ESPI, SIG_DESC_SET(SCU434, 16),
SIG_DESC_SET(SCU510, 6));
+SIG_EXPR_LIST_DECL_SESG(AB7, ESPID0, ESPI, SIG_DESC_SET(SCU434, 16));
PIN_DECL_2(AB7, GPIOW0, LAD0, ESPID0);
#define AB8 177
SIG_EXPR_LIST_DECL_SESG(AB8, LAD1, LPC, SIG_DESC_SET(SCU434, 17),
- SIG_DESC_CLEAR(SCU510, 6));
-SIG_EXPR_LIST_DECL_SESG(AB8, ESPID1, ESPI, SIG_DESC_SET(SCU434, 17),
SIG_DESC_SET(SCU510, 6));
+SIG_EXPR_LIST_DECL_SESG(AB8, ESPID1, ESPI, SIG_DESC_SET(SCU434, 17));
PIN_DECL_2(AB8, GPIOW1, LAD1, ESPID1);
#define AC8 178
SIG_EXPR_LIST_DECL_SESG(AC8, LAD2, LPC, SIG_DESC_SET(SCU434, 18),
- SIG_DESC_CLEAR(SCU510, 6));
-SIG_EXPR_LIST_DECL_SESG(AC8, ESPID2, ESPI, SIG_DESC_SET(SCU434, 18),
SIG_DESC_SET(SCU510, 6));
+SIG_EXPR_LIST_DECL_SESG(AC8, ESPID2, ESPI, SIG_DESC_SET(SCU434, 18));
PIN_DECL_2(AC8, GPIOW2, LAD2, ESPID2);
#define AC7 179
SIG_EXPR_LIST_DECL_SESG(AC7, LAD3, LPC, SIG_DESC_SET(SCU434, 19),
- SIG_DESC_CLEAR(SCU510, 6));
-SIG_EXPR_LIST_DECL_SESG(AC7, ESPID3, ESPI, SIG_DESC_SET(SCU434, 19),
SIG_DESC_SET(SCU510, 6));
+SIG_EXPR_LIST_DECL_SESG(AC7, ESPID3, ESPI, SIG_DESC_SET(SCU434, 19));
PIN_DECL_2(AC7, GPIOW3, LAD3, ESPID3);
#define AE7 180
SIG_EXPR_LIST_DECL_SESG(AE7, LCLK, LPC, SIG_DESC_SET(SCU434, 20),
- SIG_DESC_CLEAR(SCU510, 6));
-SIG_EXPR_LIST_DECL_SESG(AE7, ESPICK, ESPI, SIG_DESC_SET(SCU434, 20),
SIG_DESC_SET(SCU510, 6));
+SIG_EXPR_LIST_DECL_SESG(AE7, ESPICK, ESPI, SIG_DESC_SET(SCU434, 20));
PIN_DECL_2(AE7, GPIOW4, LCLK, ESPICK);
#define AF7 181
SIG_EXPR_LIST_DECL_SESG(AF7, LFRAME, LPC, SIG_DESC_SET(SCU434, 21),
- SIG_DESC_CLEAR(SCU510, 6));
-SIG_EXPR_LIST_DECL_SESG(AF7, ESPICS, ESPI, SIG_DESC_SET(SCU434, 21),
SIG_DESC_SET(SCU510, 6));
+SIG_EXPR_LIST_DECL_SESG(AF7, ESPICS, ESPI, SIG_DESC_SET(SCU434, 21));
PIN_DECL_2(AF7, GPIOW5, LFRAME, ESPICS);
#define AD7 182
SIG_EXPR_LIST_DECL_SESG(AD7, LSIRQ, LSIRQ, SIG_DESC_SET(SCU434, 22),
- SIG_DESC_CLEAR(SCU510, 6));
-SIG_EXPR_LIST_DECL_SESG(AD7, ESPIALT, ESPIALT, SIG_DESC_SET(SCU434, 22),
SIG_DESC_SET(SCU510, 6));
+SIG_EXPR_LIST_DECL_SESG(AD7, ESPIALT, ESPIALT, SIG_DESC_SET(SCU434, 22));
PIN_DECL_2(AD7, GPIOW6, LSIRQ, ESPIALT);
FUNC_GROUP_DECL(LSIRQ, AD7);
FUNC_GROUP_DECL(ESPIALT, AD7);
#define AD8 183
SIG_EXPR_LIST_DECL_SESG(AD8, LPCRST, LPC, SIG_DESC_SET(SCU434, 23),
- SIG_DESC_CLEAR(SCU510, 6));
-SIG_EXPR_LIST_DECL_SESG(AD8, ESPIRST, ESPI, SIG_DESC_SET(SCU434, 23),
SIG_DESC_SET(SCU510, 6));
+SIG_EXPR_LIST_DECL_SESG(AD8, ESPIRST, ESPI, SIG_DESC_SET(SCU434, 23));
PIN_DECL_2(AD8, GPIOW7, LPCRST, ESPIRST);
FUNC_GROUP_DECL(LPC, AB7, AB8, AC8, AC7, AE7, AF7, AD8);
diff --git a/drivers/pinctrl/intel/pinctrl-baytrail.c b/drivers/pinctrl/intel/pinctrl-baytrail.c
index 9ffb22211d2b..55141d5de29e 100644
--- a/drivers/pinctrl/intel/pinctrl-baytrail.c
+++ b/drivers/pinctrl/intel/pinctrl-baytrail.c
@@ -110,7 +110,6 @@ struct byt_gpio {
struct platform_device *pdev;
struct pinctrl_dev *pctl_dev;
struct pinctrl_desc pctl_desc;
- raw_spinlock_t lock;
const struct intel_pinctrl_soc_data *soc_data;
struct intel_community *communities_copy;
struct byt_gpio_pin_context *saved_context;
@@ -494,34 +493,34 @@ static const struct intel_pinctrl_soc_data byt_sus_soc_data = {
};
static const struct pinctrl_pin_desc byt_ncore_pins[] = {
- PINCTRL_PIN(0, "GPIO_NCORE0"),
- PINCTRL_PIN(1, "GPIO_NCORE1"),
- PINCTRL_PIN(2, "GPIO_NCORE2"),
- PINCTRL_PIN(3, "GPIO_NCORE3"),
- PINCTRL_PIN(4, "GPIO_NCORE4"),
- PINCTRL_PIN(5, "GPIO_NCORE5"),
- PINCTRL_PIN(6, "GPIO_NCORE6"),
- PINCTRL_PIN(7, "GPIO_NCORE7"),
- PINCTRL_PIN(8, "GPIO_NCORE8"),
- PINCTRL_PIN(9, "GPIO_NCORE9"),
- PINCTRL_PIN(10, "GPIO_NCORE10"),
- PINCTRL_PIN(11, "GPIO_NCORE11"),
- PINCTRL_PIN(12, "GPIO_NCORE12"),
- PINCTRL_PIN(13, "GPIO_NCORE13"),
- PINCTRL_PIN(14, "GPIO_NCORE14"),
- PINCTRL_PIN(15, "GPIO_NCORE15"),
- PINCTRL_PIN(16, "GPIO_NCORE16"),
- PINCTRL_PIN(17, "GPIO_NCORE17"),
- PINCTRL_PIN(18, "GPIO_NCORE18"),
- PINCTRL_PIN(19, "GPIO_NCORE19"),
- PINCTRL_PIN(20, "GPIO_NCORE20"),
- PINCTRL_PIN(21, "GPIO_NCORE21"),
- PINCTRL_PIN(22, "GPIO_NCORE22"),
- PINCTRL_PIN(23, "GPIO_NCORE23"),
- PINCTRL_PIN(24, "GPIO_NCORE24"),
- PINCTRL_PIN(25, "GPIO_NCORE25"),
- PINCTRL_PIN(26, "GPIO_NCORE26"),
- PINCTRL_PIN(27, "GPIO_NCORE27"),
+ PINCTRL_PIN(0, "HV_DDI0_HPD"),
+ PINCTRL_PIN(1, "HV_DDI0_DDC_SDA"),
+ PINCTRL_PIN(2, "HV_DDI0_DDC_SCL"),
+ PINCTRL_PIN(3, "PANEL0_VDDEN"),
+ PINCTRL_PIN(4, "PANEL0_BKLTEN"),
+ PINCTRL_PIN(5, "PANEL0_BKLTCTL"),
+ PINCTRL_PIN(6, "HV_DDI1_HPD"),
+ PINCTRL_PIN(7, "HV_DDI1_DDC_SDA"),
+ PINCTRL_PIN(8, "HV_DDI1_DDC_SCL"),
+ PINCTRL_PIN(9, "PANEL1_VDDEN"),
+ PINCTRL_PIN(10, "PANEL1_BKLTEN"),
+ PINCTRL_PIN(11, "PANEL1_BKLTCTL"),
+ PINCTRL_PIN(12, "GP_INTD_DSI_TE1"),
+ PINCTRL_PIN(13, "HV_DDI2_DDC_SDA"),
+ PINCTRL_PIN(14, "HV_DDI2_DDC_SCL"),
+ PINCTRL_PIN(15, "GP_CAMERASB00"),
+ PINCTRL_PIN(16, "GP_CAMERASB01"),
+ PINCTRL_PIN(17, "GP_CAMERASB02"),
+ PINCTRL_PIN(18, "GP_CAMERASB03"),
+ PINCTRL_PIN(19, "GP_CAMERASB04"),
+ PINCTRL_PIN(20, "GP_CAMERASB05"),
+ PINCTRL_PIN(21, "GP_CAMERASB06"),
+ PINCTRL_PIN(22, "GP_CAMERASB07"),
+ PINCTRL_PIN(23, "GP_CAMERASB08"),
+ PINCTRL_PIN(24, "GP_CAMERASB09"),
+ PINCTRL_PIN(25, "GP_CAMERASB10"),
+ PINCTRL_PIN(26, "GP_CAMERASB11"),
+ PINCTRL_PIN(27, "GP_INTD_DSI_TE2"),
};
static const unsigned int byt_ncore_pins_map[BYT_NGPIO_NCORE] = {
@@ -549,6 +548,8 @@ static const struct intel_pinctrl_soc_data *byt_soc_data[] = {
NULL
};
+static DEFINE_RAW_SPINLOCK(byt_lock);
+
static struct intel_community *byt_get_community(struct byt_gpio *vg,
unsigned int pin)
{
@@ -658,7 +659,7 @@ static void byt_set_group_simple_mux(struct byt_gpio *vg,
unsigned long flags;
int i;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
for (i = 0; i < group.npins; i++) {
void __iomem *padcfg0;
@@ -678,7 +679,7 @@ static void byt_set_group_simple_mux(struct byt_gpio *vg,
writel(value, padcfg0);
}
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
}
static void byt_set_group_mixed_mux(struct byt_gpio *vg,
@@ -688,7 +689,7 @@ static void byt_set_group_mixed_mux(struct byt_gpio *vg,
unsigned long flags;
int i;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
for (i = 0; i < group.npins; i++) {
void __iomem *padcfg0;
@@ -708,7 +709,7 @@ static void byt_set_group_mixed_mux(struct byt_gpio *vg,
writel(value, padcfg0);
}
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
}
static int byt_set_mux(struct pinctrl_dev *pctldev, unsigned int func_selector,
@@ -749,11 +750,11 @@ static void byt_gpio_clear_triggering(struct byt_gpio *vg, unsigned int offset)
unsigned long flags;
u32 value;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
value = readl(reg);
value &= ~(BYT_TRIG_POS | BYT_TRIG_NEG | BYT_TRIG_LVL);
writel(value, reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
}
static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev,
@@ -765,7 +766,7 @@ static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev,
u32 value, gpio_mux;
unsigned long flags;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
/*
* In most cases, func pin mux 000 means GPIO function.
@@ -787,7 +788,7 @@ static int byt_gpio_request_enable(struct pinctrl_dev *pctl_dev,
"pin %u forcibly re-configured as GPIO\n", offset);
}
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
pm_runtime_get(&vg->pdev->dev);
@@ -815,7 +816,7 @@ static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev,
unsigned long flags;
u32 value;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
value = readl(val_reg);
value &= ~BYT_DIR_MASK;
@@ -832,7 +833,7 @@ static int byt_gpio_set_direction(struct pinctrl_dev *pctl_dev,
"Potential Error: Setting GPIO with direct_irq_en to output");
writel(value, val_reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
return 0;
}
@@ -901,11 +902,11 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset,
u32 conf, pull, val, debounce;
u16 arg = 0;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
conf = readl(conf_reg);
pull = conf & BYT_PULL_ASSIGN_MASK;
val = readl(val_reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
switch (param) {
case PIN_CONFIG_BIAS_DISABLE:
@@ -932,9 +933,9 @@ static int byt_pin_config_get(struct pinctrl_dev *pctl_dev, unsigned int offset,
if (!(conf & BYT_DEBOUNCE_EN))
return -EINVAL;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
debounce = readl(db_reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
switch (debounce & BYT_DEBOUNCE_PULSE_MASK) {
case BYT_DEBOUNCE_PULSE_375US:
@@ -986,7 +987,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev,
u32 conf, val, debounce;
int i, ret = 0;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
conf = readl(conf_reg);
val = readl(val_reg);
@@ -1094,7 +1095,7 @@ static int byt_pin_config_set(struct pinctrl_dev *pctl_dev,
if (!ret)
writel(conf, conf_reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
return ret;
}
@@ -1119,9 +1120,9 @@ static int byt_gpio_get(struct gpio_chip *chip, unsigned int offset)
unsigned long flags;
u32 val;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
val = readl(reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
return !!(val & BYT_LEVEL);
}
@@ -1136,13 +1137,13 @@ static void byt_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
if (!reg)
return;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
old_val = readl(reg);
if (value)
writel(old_val | BYT_LEVEL, reg);
else
writel(old_val & ~BYT_LEVEL, reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
}
static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
@@ -1155,9 +1156,9 @@ static int byt_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
if (!reg)
return -EINVAL;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
value = readl(reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
if (!(value & BYT_OUTPUT_EN))
return 0;
@@ -1200,14 +1201,14 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
const char *label;
unsigned int pin;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
pin = vg->soc_data->pins[i].number;
reg = byt_gpio_reg(vg, pin, BYT_CONF0_REG);
if (!reg) {
seq_printf(s,
"Could not retrieve pin %i conf0 reg\n",
pin);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
continue;
}
conf0 = readl(reg);
@@ -1216,11 +1217,11 @@ static void byt_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
if (!reg) {
seq_printf(s,
"Could not retrieve pin %i val reg\n", pin);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
continue;
}
val = readl(reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
comm = byt_get_community(vg, pin);
if (!comm) {
@@ -1304,9 +1305,9 @@ static void byt_irq_ack(struct irq_data *d)
if (!reg)
return;
- raw_spin_lock(&vg->lock);
+ raw_spin_lock(&byt_lock);
writel(BIT(offset % 32), reg);
- raw_spin_unlock(&vg->lock);
+ raw_spin_unlock(&byt_lock);
}
static void byt_irq_mask(struct irq_data *d)
@@ -1330,7 +1331,7 @@ static void byt_irq_unmask(struct irq_data *d)
if (!reg)
return;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
value = readl(reg);
switch (irqd_get_trigger_type(d)) {
@@ -1353,7 +1354,7 @@ static void byt_irq_unmask(struct irq_data *d)
writel(value, reg);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
}
static int byt_irq_type(struct irq_data *d, unsigned int type)
@@ -1367,7 +1368,7 @@ static int byt_irq_type(struct irq_data *d, unsigned int type)
if (!reg || offset >= vg->chip.ngpio)
return -EINVAL;
- raw_spin_lock_irqsave(&vg->lock, flags);
+ raw_spin_lock_irqsave(&byt_lock, flags);
value = readl(reg);
WARN(value & BYT_DIRECT_IRQ_EN,
@@ -1389,7 +1390,7 @@ static int byt_irq_type(struct irq_data *d, unsigned int type)
else if (type & IRQ_TYPE_LEVEL_MASK)
irq_set_handler_locked(d, handle_level_irq);
- raw_spin_unlock_irqrestore(&vg->lock, flags);
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
return 0;
}
@@ -1425,9 +1426,9 @@ static void byt_gpio_irq_handler(struct irq_desc *desc)
continue;
}
- raw_spin_lock(&vg->lock);
+ raw_spin_lock(&byt_lock);
pending = readl(reg);
- raw_spin_unlock(&vg->lock);
+ raw_spin_unlock(&byt_lock);
for_each_set_bit(pin, &pending, 32) {
virq = irq_find_mapping(vg->chip.irq.domain, base + pin);
generic_handle_irq(virq);
@@ -1450,9 +1451,9 @@ static void byt_init_irq_valid_mask(struct gpio_chip *chip,
*/
}
-static void byt_gpio_irq_init_hw(struct byt_gpio *vg)
+static int byt_gpio_irq_init_hw(struct gpio_chip *chip)
{
- struct gpio_chip *gc = &vg->chip;
+ struct byt_gpio *vg = gpiochip_get_data(chip);
struct device *dev = &vg->pdev->dev;
void __iomem *reg;
u32 base, value;
@@ -1476,7 +1477,7 @@ static void byt_gpio_irq_init_hw(struct byt_gpio *vg)
value = readl(reg);
if (value & BYT_DIRECT_IRQ_EN) {
- clear_bit(i, gc->irq.valid_mask);
+ clear_bit(i, chip->irq.valid_mask);
dev_dbg(dev, "excluding GPIO %d from IRQ domain\n", i);
} else if ((value & BYT_PIN_MUX) == byt_get_gpio_mux(vg, i)) {
byt_gpio_clear_triggering(vg, i);
@@ -1504,6 +1505,21 @@ static void byt_gpio_irq_init_hw(struct byt_gpio *vg)
"GPIO interrupt error, pins misconfigured. INT_STAT%u: 0x%08x\n",
base / 32, value);
}
+
+ return 0;
+}
+
+static int byt_gpio_add_pin_ranges(struct gpio_chip *chip)
+{
+ struct byt_gpio *vg = gpiochip_get_data(chip);
+ struct device *dev = &vg->pdev->dev;
+ int ret;
+
+ ret = gpiochip_add_pin_range(chip, dev_name(dev), 0, 0, vg->soc_data->npins);
+ if (ret)
+ dev_err(dev, "failed to add GPIO pin range\n");
+
+ return ret;
}
static int byt_gpio_probe(struct byt_gpio *vg)
@@ -1518,6 +1534,7 @@ static int byt_gpio_probe(struct byt_gpio *vg)
gc->label = dev_name(&vg->pdev->dev);
gc->base = -1;
gc->can_sleep = false;
+ gc->add_pin_ranges = byt_gpio_add_pin_ranges;
gc->parent = &vg->pdev->dev;
gc->ngpio = vg->soc_data->npins;
gc->irq.init_valid_mask = byt_init_irq_valid_mask;
@@ -1528,33 +1545,30 @@ static int byt_gpio_probe(struct byt_gpio *vg)
if (!vg->saved_context)
return -ENOMEM;
#endif
- ret = devm_gpiochip_add_data(&vg->pdev->dev, gc, vg);
- if (ret) {
- dev_err(&vg->pdev->dev, "failed adding byt-gpio chip\n");
- return ret;
- }
-
- ret = gpiochip_add_pin_range(&vg->chip, dev_name(&vg->pdev->dev),
- 0, 0, vg->soc_data->npins);
- if (ret) {
- dev_err(&vg->pdev->dev, "failed to add GPIO pin range\n");
- return ret;
- }
/* set up interrupts */
irq_rc = platform_get_resource(vg->pdev, IORESOURCE_IRQ, 0);
if (irq_rc && irq_rc->start) {
- byt_gpio_irq_init_hw(vg);
- ret = gpiochip_irqchip_add(gc, &byt_irqchip, 0,
- handle_bad_irq, IRQ_TYPE_NONE);
- if (ret) {
- dev_err(&vg->pdev->dev, "failed to add irqchip\n");
- return ret;
- }
+ struct gpio_irq_chip *girq;
+
+ girq = &gc->irq;
+ girq->chip = &byt_irqchip;
+ girq->init_hw = byt_gpio_irq_init_hw;
+ girq->parent_handler = byt_gpio_irq_handler;
+ girq->num_parents = 1;
+ girq->parents = devm_kcalloc(&vg->pdev->dev, girq->num_parents,
+ sizeof(*girq->parents), GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+ girq->parents[0] = (unsigned int)irq_rc->start;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_bad_irq;
+ }
- gpiochip_set_chained_irqchip(gc, &byt_irqchip,
- (unsigned)irq_rc->start,
- byt_gpio_irq_handler);
+ ret = devm_gpiochip_add_data(&vg->pdev->dev, gc, vg);
+ if (ret) {
+ dev_err(&vg->pdev->dev, "failed adding byt-gpio chip\n");
+ return ret;
}
return ret;
@@ -1638,8 +1652,6 @@ static int byt_pinctrl_probe(struct platform_device *pdev)
return PTR_ERR(vg->pctl_dev);
}
- raw_spin_lock_init(&vg->lock);
-
ret = byt_gpio_probe(vg);
if (ret)
return ret;
@@ -1654,8 +1666,11 @@ static int byt_pinctrl_probe(struct platform_device *pdev)
static int byt_gpio_suspend(struct device *dev)
{
struct byt_gpio *vg = dev_get_drvdata(dev);
+ unsigned long flags;
int i;
+ raw_spin_lock_irqsave(&byt_lock, flags);
+
for (i = 0; i < vg->soc_data->npins; i++) {
void __iomem *reg;
u32 value;
@@ -1676,14 +1691,18 @@ static int byt_gpio_suspend(struct device *dev)
vg->saved_context[i].val = value;
}
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
return 0;
}
static int byt_gpio_resume(struct device *dev)
{
struct byt_gpio *vg = dev_get_drvdata(dev);
+ unsigned long flags;
int i;
+ raw_spin_lock_irqsave(&byt_lock, flags);
+
for (i = 0; i < vg->soc_data->npins; i++) {
void __iomem *reg;
u32 value;
@@ -1721,6 +1740,7 @@ static int byt_gpio_resume(struct device *dev)
}
}
+ raw_spin_unlock_irqrestore(&byt_lock, flags);
return 0;
}
#endif
diff --git a/drivers/pinctrl/intel/pinctrl-cherryview.c b/drivers/pinctrl/intel/pinctrl-cherryview.c
index 582fa8a75559..60527b93a711 100644
--- a/drivers/pinctrl/intel/pinctrl-cherryview.c
+++ b/drivers/pinctrl/intel/pinctrl-cherryview.c
@@ -149,6 +149,7 @@ struct chv_pin_context {
* @chip: GPIO chip in this pin controller
* @irqchip: IRQ chip in this pin controller
* @regs: MMIO registers
+ * @irq: Our parent irq
* @intr_lines: Stores mapping between 16 HW interrupt wires and GPIO
* offset (in GPIO number space)
* @community: Community this pinctrl instance represents
@@ -165,6 +166,7 @@ struct chv_pinctrl {
struct gpio_chip chip;
struct irq_chip irqchip;
void __iomem *regs;
+ unsigned int irq;
unsigned int intr_lines[16];
const struct chv_community *community;
u32 saved_intmask;
@@ -1555,39 +1557,9 @@ static void chv_init_irq_valid_mask(struct gpio_chip *chip,
}
}
-static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
+static int chv_gpio_irq_init_hw(struct gpio_chip *chip)
{
- const struct chv_gpio_pinrange *range;
- struct gpio_chip *chip = &pctrl->chip;
- bool need_valid_mask = !dmi_check_system(chv_no_valid_mask);
- const struct chv_community *community = pctrl->community;
- int ret, i, irq_base;
-
- *chip = chv_gpio_chip;
-
- chip->ngpio = community->pins[community->npins - 1].number + 1;
- chip->label = dev_name(pctrl->dev);
- chip->parent = pctrl->dev;
- chip->base = -1;
- if (need_valid_mask)
- chip->irq.init_valid_mask = chv_init_irq_valid_mask;
-
- ret = devm_gpiochip_add_data(pctrl->dev, chip, pctrl);
- if (ret) {
- dev_err(pctrl->dev, "Failed to register gpiochip\n");
- return ret;
- }
-
- for (i = 0; i < community->ngpio_ranges; i++) {
- range = &community->gpio_ranges[i];
- ret = gpiochip_add_pin_range(chip, dev_name(pctrl->dev),
- range->base, range->base,
- range->npins);
- if (ret) {
- dev_err(pctrl->dev, "failed to add GPIO pin range\n");
- return ret;
- }
- }
+ struct chv_pinctrl *pctrl = gpiochip_get_data(chip);
/*
* The same set of machines in chv_no_valid_mask[] have incorrectly
@@ -1596,7 +1568,7 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
*
* See also https://bugzilla.kernel.org/show_bug.cgi?id=197953.
*/
- if (!need_valid_mask) {
+ if (!pctrl->chip.irq.init_valid_mask) {
/*
* Mask all interrupts the community is able to generate
* but leave the ones that can only generate GPEs unmasked.
@@ -1608,15 +1580,47 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
/* Clear all interrupts */
chv_writel(0xffff, pctrl->regs + CHV_INTSTAT);
- if (!need_valid_mask) {
- irq_base = devm_irq_alloc_descs(pctrl->dev, -1, 0,
- community->npins, NUMA_NO_NODE);
- if (irq_base < 0) {
- dev_err(pctrl->dev, "Failed to allocate IRQ numbers\n");
- return irq_base;
+ return 0;
+}
+
+static int chv_gpio_add_pin_ranges(struct gpio_chip *chip)
+{
+ struct chv_pinctrl *pctrl = gpiochip_get_data(chip);
+ const struct chv_community *community = pctrl->community;
+ const struct chv_gpio_pinrange *range;
+ int ret, i;
+
+ for (i = 0; i < community->ngpio_ranges; i++) {
+ range = &community->gpio_ranges[i];
+ ret = gpiochip_add_pin_range(chip, dev_name(pctrl->dev),
+ range->base, range->base,
+ range->npins);
+ if (ret) {
+ dev_err(pctrl->dev, "failed to add GPIO pin range\n");
+ return ret;
}
}
+ return 0;
+}
+
+static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
+{
+ const struct chv_gpio_pinrange *range;
+ struct gpio_chip *chip = &pctrl->chip;
+ bool need_valid_mask = !dmi_check_system(chv_no_valid_mask);
+ const struct chv_community *community = pctrl->community;
+ int ret, i, irq_base;
+
+ *chip = chv_gpio_chip;
+
+ chip->ngpio = community->pins[community->npins - 1].number + 1;
+ chip->label = dev_name(pctrl->dev);
+ chip->add_pin_ranges = chv_gpio_add_pin_ranges;
+ chip->parent = pctrl->dev;
+ chip->base = -1;
+
+ pctrl->irq = irq;
pctrl->irqchip.name = "chv-gpio";
pctrl->irqchip.irq_startup = chv_gpio_irq_startup;
pctrl->irqchip.irq_ack = chv_gpio_irq_ack;
@@ -1625,10 +1629,27 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
pctrl->irqchip.irq_set_type = chv_gpio_irq_type;
pctrl->irqchip.flags = IRQCHIP_SKIP_SET_WAKE;
- ret = gpiochip_irqchip_add(chip, &pctrl->irqchip, 0,
- handle_bad_irq, IRQ_TYPE_NONE);
+ chip->irq.chip = &pctrl->irqchip;
+ chip->irq.init_hw = chv_gpio_irq_init_hw;
+ chip->irq.parent_handler = chv_gpio_irq_handler;
+ chip->irq.num_parents = 1;
+ chip->irq.parents = &pctrl->irq;
+ chip->irq.default_type = IRQ_TYPE_NONE;
+ chip->irq.handler = handle_bad_irq;
+ if (need_valid_mask) {
+ chip->irq.init_valid_mask = chv_init_irq_valid_mask;
+ } else {
+ irq_base = devm_irq_alloc_descs(pctrl->dev, -1, 0,
+ community->npins, NUMA_NO_NODE);
+ if (irq_base < 0) {
+ dev_err(pctrl->dev, "Failed to allocate IRQ numbers\n");
+ return irq_base;
+ }
+ }
+
+ ret = devm_gpiochip_add_data(pctrl->dev, chip, pctrl);
if (ret) {
- dev_err(pctrl->dev, "failed to add IRQ chip\n");
+ dev_err(pctrl->dev, "Failed to register gpiochip\n");
return ret;
}
@@ -1642,8 +1663,6 @@ static int chv_gpio_probe(struct chv_pinctrl *pctrl, int irq)
}
}
- gpiochip_set_chained_irqchip(chip, &pctrl->irqchip, irq,
- chv_gpio_irq_handler);
return 0;
}
diff --git a/drivers/pinctrl/pinctrl-ingenic.c b/drivers/pinctrl/pinctrl-ingenic.c
index 24e0e2ef47a4..369e04350e3d 100644
--- a/drivers/pinctrl/pinctrl-ingenic.c
+++ b/drivers/pinctrl/pinctrl-ingenic.c
@@ -1809,7 +1809,7 @@ static void ingenic_set_bias(struct ingenic_pinctrl *jzpc,
static void ingenic_set_output_level(struct ingenic_pinctrl *jzpc,
unsigned int pin, bool high)
{
- if (jzpc->version >= ID_JZ4770)
+ if (jzpc->version >= ID_JZ4760)
ingenic_config_pin(jzpc, pin, JZ4760_GPIO_PAT0, high);
else
ingenic_config_pin(jzpc, pin, JZ4740_GPIO_DATA, high);
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index e914f6efd39e..9503ddf2edc7 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -85,7 +85,7 @@ bool pinmux_can_be_used_for_gpio(struct pinctrl_dev *pctldev, unsigned pin)
const struct pinmux_ops *ops = pctldev->desc->pmxops;
/* Can't inspect pin, assume it can be used */
- if (!desc)
+ if (!desc || !ops)
return true;
if (ops->strict && desc->mux_usecount)
diff --git a/drivers/platform/mellanox/mlxbf-bootctl.c b/drivers/platform/mellanox/mlxbf-bootctl.c
index 61753b648506..5d21c6adf1ab 100644
--- a/drivers/platform/mellanox/mlxbf-bootctl.c
+++ b/drivers/platform/mellanox/mlxbf-bootctl.c
@@ -309,7 +309,7 @@ static struct platform_driver mlxbf_bootctl_driver = {
.probe = mlxbf_bootctl_probe,
.driver = {
.name = "mlxbf-bootctl",
- .groups = mlxbf_bootctl_groups,
+ .dev_groups = mlxbf_bootctl_groups,
.acpi_match_table = mlxbf_bootctl_acpi_ids,
}
};
diff --git a/drivers/platform/mips/Kconfig b/drivers/platform/mips/Kconfig
index f4d0a86c00d0..5e77b0dc5fd6 100644
--- a/drivers/platform/mips/Kconfig
+++ b/drivers/platform/mips/Kconfig
@@ -18,7 +18,7 @@ if MIPS_PLATFORM_DEVICES
config CPU_HWMON
tristate "Loongson-3 CPU HWMon Driver"
- depends on CONFIG_MACH_LOONGSON64
+ depends on MACH_LOONGSON64
select HWMON
default y
help
diff --git a/drivers/platform/x86/hp-wmi.c b/drivers/platform/x86/hp-wmi.c
index 9579a706fc08..a881b709af25 100644
--- a/drivers/platform/x86/hp-wmi.c
+++ b/drivers/platform/x86/hp-wmi.c
@@ -300,7 +300,7 @@ static int __init hp_wmi_bios_2008_later(void)
static int __init hp_wmi_bios_2009_later(void)
{
- int state = 0;
+ u8 state[128];
int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state,
sizeof(state), sizeof(state));
if (!ret)
diff --git a/drivers/platform/x86/pcengines-apuv2.c b/drivers/platform/x86/pcengines-apuv2.c
index 48b112b4f0b0..9b11ef1a401f 100644
--- a/drivers/platform/x86/pcengines-apuv2.c
+++ b/drivers/platform/x86/pcengines-apuv2.c
@@ -2,7 +2,7 @@
/*
* PC-Engines APUv2/APUv3 board platform driver
- * for gpio buttons and LEDs
+ * for GPIO buttons and LEDs
*
* Copyright (C) 2018 metux IT consult
* Author: Enrico Weigelt <[email protected]>
@@ -23,10 +23,10 @@
/*
* NOTE: this driver only supports APUv2/3 - not APUv1, as this one
- * has completely different register layouts
+ * has completely different register layouts.
*/
-/* register mappings */
+/* Register mappings */
#define APU2_GPIO_REG_LED1 AMD_FCH_GPIO_REG_GPIO57
#define APU2_GPIO_REG_LED2 AMD_FCH_GPIO_REG_GPIO58
#define APU2_GPIO_REG_LED3 AMD_FCH_GPIO_REG_GPIO59_DEVSLP1
@@ -35,7 +35,7 @@
#define APU2_GPIO_REG_MPCIE2 AMD_FCH_GPIO_REG_GPIO59_DEVSLP0
#define APU2_GPIO_REG_MPCIE3 AMD_FCH_GPIO_REG_GPIO51
-/* order in which the gpio lines are defined in the register list */
+/* Order in which the GPIO lines are defined in the register list */
#define APU2_GPIO_LINE_LED1 0
#define APU2_GPIO_LINE_LED2 1
#define APU2_GPIO_LINE_LED3 2
@@ -44,7 +44,7 @@
#define APU2_GPIO_LINE_MPCIE2 5
#define APU2_GPIO_LINE_MPCIE3 6
-/* gpio device */
+/* GPIO device */
static int apu2_gpio_regs[] = {
[APU2_GPIO_LINE_LED1] = APU2_GPIO_REG_LED1,
@@ -72,7 +72,7 @@ static const struct amd_fch_gpio_pdata board_apu2 = {
.gpio_names = apu2_gpio_names,
};
-/* gpio leds device */
+/* GPIO LEDs device */
static const struct gpio_led apu2_leds[] = {
{ .name = "apu:green:1" },
@@ -95,12 +95,12 @@ static struct gpiod_lookup_table gpios_led_table = {
NULL, 1, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_LINE_LED3,
NULL, 2, GPIO_ACTIVE_LOW),
- GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_REG_SIMSWAP,
+ GPIO_LOOKUP_IDX(AMD_FCH_GPIO_DRIVER_NAME, APU2_GPIO_LINE_SIMSWAP,
NULL, 3, GPIO_ACTIVE_LOW),
}
};
-/* gpio keyboard device */
+/* GPIO keyboard device */
static struct gpio_keys_button apu2_keys_buttons[] = {
{
@@ -129,12 +129,12 @@ static struct gpiod_lookup_table gpios_key_table = {
}
};
-/* board setup */
+/* Board setup */
-/* note: matching works on string prefix, so "apu2" must come before "apu" */
+/* Note: matching works on string prefix, so "apu2" must come before "apu" */
static const struct dmi_system_id apu_gpio_dmi_table[] __initconst = {
- /* APU2 w/ legacy bios < 4.0.8 */
+ /* APU2 w/ legacy BIOS < 4.0.8 */
{
.ident = "apu2",
.matches = {
@@ -143,7 +143,7 @@ static const struct dmi_system_id apu_gpio_dmi_table[] __initconst = {
},
.driver_data = (void *)&board_apu2,
},
- /* APU2 w/ legacy bios >= 4.0.8 */
+ /* APU2 w/ legacy BIOS >= 4.0.8 */
{
.ident = "apu2",
.matches = {
@@ -152,7 +152,7 @@ static const struct dmi_system_id apu_gpio_dmi_table[] __initconst = {
},
.driver_data = (void *)&board_apu2,
},
- /* APU2 w/ maainline bios */
+ /* APU2 w/ mainline BIOS */
{
.ident = "apu2",
.matches = {
@@ -162,7 +162,7 @@ static const struct dmi_system_id apu_gpio_dmi_table[] __initconst = {
.driver_data = (void *)&board_apu2,
},
- /* APU3 w/ legacy bios < 4.0.8 */
+ /* APU3 w/ legacy BIOS < 4.0.8 */
{
.ident = "apu3",
.matches = {
@@ -171,7 +171,7 @@ static const struct dmi_system_id apu_gpio_dmi_table[] __initconst = {
},
.driver_data = (void *)&board_apu2,
},
- /* APU3 w/ legacy bios >= 4.0.8 */
+ /* APU3 w/ legacy BIOS >= 4.0.8 */
{
.ident = "apu3",
.matches = {
@@ -180,7 +180,7 @@ static const struct dmi_system_id apu_gpio_dmi_table[] __initconst = {
},
.driver_data = (void *)&board_apu2,
},
- /* APU3 w/ mainline bios */
+ /* APU3 w/ mainline BIOS */
{
.ident = "apu3",
.matches = {
@@ -189,6 +189,33 @@ static const struct dmi_system_id apu_gpio_dmi_table[] __initconst = {
},
.driver_data = (void *)&board_apu2,
},
+ /* APU4 w/ legacy BIOS < 4.0.8 */
+ {
+ .ident = "apu4",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "PC Engines"),
+ DMI_MATCH(DMI_BOARD_NAME, "APU4")
+ },
+ .driver_data = (void *)&board_apu2,
+ },
+ /* APU4 w/ legacy BIOS >= 4.0.8 */
+ {
+ .ident = "apu4",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "PC Engines"),
+ DMI_MATCH(DMI_BOARD_NAME, "apu4")
+ },
+ .driver_data = (void *)&board_apu2,
+ },
+ /* APU4 w/ mainline BIOS */
+ {
+ .ident = "apu4",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "PC Engines"),
+ DMI_MATCH(DMI_BOARD_NAME, "PC Engines apu4")
+ },
+ .driver_data = (void *)&board_apu2,
+ },
{}
};
@@ -223,7 +250,7 @@ static int __init apu_board_init(void)
id = dmi_first_match(apu_gpio_dmi_table);
if (!id) {
- pr_err("failed to detect apu board via dmi\n");
+ pr_err("failed to detect APU board via DMI\n");
return -ENODEV;
}
@@ -262,7 +289,7 @@ module_init(apu_board_init);
module_exit(apu_board_exit);
MODULE_AUTHOR("Enrico Weigelt, metux IT consult <[email protected]>");
-MODULE_DESCRIPTION("PC Engines APUv2/APUv3 board GPIO/LED/keys driver");
+MODULE_DESCRIPTION("PC Engines APUv2/APUv3 board GPIO/LEDs/keys driver");
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(dmi, apu_gpio_dmi_table);
MODULE_ALIAS("platform:pcengines-apuv2");
diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c
index 07d1b911e72f..52ef1419b671 100644
--- a/drivers/platform/x86/pmc_atom.c
+++ b/drivers/platform/x86/pmc_atom.c
@@ -429,6 +429,14 @@ static const struct dmi_system_id critclk_systems[] = {
DMI_MATCH(DMI_PRODUCT_VERSION, "6AV7882-0"),
},
},
+ {
+ .ident = "CONNECT X300",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "SIEMENS AG"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "A5E45074588"),
+ },
+ },
+
{ /*sentinel*/ }
};
diff --git a/drivers/ptp/Kconfig b/drivers/ptp/Kconfig
index b45d2b86d8ca..475c60dccaa4 100644
--- a/drivers/ptp/Kconfig
+++ b/drivers/ptp/Kconfig
@@ -56,20 +56,6 @@ config PTP_1588_CLOCK_QORIQ
To compile this driver as a module, choose M here: the module
will be called ptp-qoriq.
-config PTP_1588_CLOCK_IXP46X
- tristate "Intel IXP46x as PTP clock"
- depends on IXP4XX_ETH
- depends on PTP_1588_CLOCK
- default y
- help
- This driver adds support for using the IXP46X as a PTP
- clock. This clock is only useful if your PTP programs are
- getting hardware time stamps on the PTP Ethernet packets
- using the SO_TIMESTAMPING API.
-
- To compile this driver as a module, choose M here: the module
- will be called ptp_ixp46x.
-
comment "Enable PHYLIB and NETWORK_PHY_TIMESTAMPING to see the additional clocks."
depends on PHYLIB=n || NETWORK_PHY_TIMESTAMPING=n
@@ -89,6 +75,16 @@ config DP83640_PHY
In order for this to work, your MAC driver must also
implement the skb_tx_timestamp() function.
+config PTP_1588_CLOCK_INES
+ tristate "ZHAW InES PTP time stamping IP core"
+ depends on NETWORK_PHY_TIMESTAMPING
+ depends on PHYLIB
+ depends on PTP_1588_CLOCK
+ help
+ This driver adds support for using the ZHAW InES 1588 IP
+ core. This clock is only useful if the MII bus of your MAC
+ is wired up to the core.
+
config PTP_1588_CLOCK_PCH
tristate "Intel PCH EG20T as PTP clock"
depends on X86_32 || COMPILE_TEST
@@ -121,7 +117,7 @@ config PTP_1588_CLOCK_KVM
config PTP_1588_CLOCK_IDTCM
tristate "IDT CLOCKMATRIX as PTP clock"
- depends on PTP_1588_CLOCK
+ depends on PTP_1588_CLOCK && I2C
default n
help
This driver adds support for using IDT CLOCKMATRIX(TM) as a PTP
diff --git a/drivers/ptp/Makefile b/drivers/ptp/Makefile
index 69a06f86a450..8c830336f178 100644
--- a/drivers/ptp/Makefile
+++ b/drivers/ptp/Makefile
@@ -6,10 +6,10 @@
ptp-y := ptp_clock.o ptp_chardev.o ptp_sysfs.o
obj-$(CONFIG_PTP_1588_CLOCK) += ptp.o
obj-$(CONFIG_PTP_1588_CLOCK_DTE) += ptp_dte.o
-obj-$(CONFIG_PTP_1588_CLOCK_IXP46X) += ptp_ixp46x.o
+obj-$(CONFIG_PTP_1588_CLOCK_INES) += ptp_ines.o
obj-$(CONFIG_PTP_1588_CLOCK_PCH) += ptp_pch.o
obj-$(CONFIG_PTP_1588_CLOCK_KVM) += ptp_kvm.o
obj-$(CONFIG_PTP_1588_CLOCK_QORIQ) += ptp-qoriq.o
ptp-qoriq-y += ptp_qoriq.o
ptp-qoriq-$(CONFIG_DEBUG_FS) += ptp_qoriq_debugfs.o
-obj-$(CONFIG_PTP_1588_CLOCK_IDTCM) += ptp_clockmatrix.o \ No newline at end of file
+obj-$(CONFIG_PTP_1588_CLOCK_IDTCM) += ptp_clockmatrix.o
diff --git a/drivers/ptp/idt8a340_reg.h b/drivers/ptp/idt8a340_reg.h
index 9263bc33b8f4..69eedda9f731 100644
--- a/drivers/ptp/idt8a340_reg.h
+++ b/drivers/ptp/idt8a340_reg.h
@@ -77,6 +77,8 @@
#define JTAG_DEVICE_ID 0x001c
#define PRODUCT_ID 0x001e
+#define OTP_SCSR_CONFIG_SELECT 0x0022
+
#define STATUS 0xc03c
#define USER_GPIO0_TO_7_STATUS 0x008a
#define USER_GPIO8_TO_15_STATUS 0x008b
diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index e60eab7f8a61..da97a5bab26e 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -166,9 +166,9 @@ static struct posix_clock_operations ptp_clock_ops = {
.read = ptp_read,
};
-static void delete_ptp_clock(struct posix_clock *pc)
+static void ptp_clock_release(struct device *dev)
{
- struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock);
+ struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev);
mutex_destroy(&ptp->tsevq_mux);
mutex_destroy(&ptp->pincfg_mux);
@@ -213,7 +213,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
}
ptp->clock.ops = ptp_clock_ops;
- ptp->clock.release = delete_ptp_clock;
ptp->info = info;
ptp->devid = MKDEV(major, index);
ptp->index = index;
@@ -236,15 +235,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
if (err)
goto no_pin_groups;
- /* Create a new device in our class. */
- ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid,
- ptp, ptp->pin_attr_groups,
- "ptp%d", ptp->index);
- if (IS_ERR(ptp->dev)) {
- err = PTR_ERR(ptp->dev);
- goto no_device;
- }
-
/* Register a new PPS source. */
if (info->pps) {
struct pps_source_info pps;
@@ -260,8 +250,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
}
}
- /* Create a posix clock. */
- err = posix_clock_register(&ptp->clock, ptp->devid);
+ /* Initialize a new device of our class in our clock structure. */
+ device_initialize(&ptp->dev);
+ ptp->dev.devt = ptp->devid;
+ ptp->dev.class = ptp_class;
+ ptp->dev.parent = parent;
+ ptp->dev.groups = ptp->pin_attr_groups;
+ ptp->dev.release = ptp_clock_release;
+ dev_set_drvdata(&ptp->dev, ptp);
+ dev_set_name(&ptp->dev, "ptp%d", ptp->index);
+
+ /* Create a posix clock and link it to the device. */
+ err = posix_clock_register(&ptp->clock, &ptp->dev);
if (err) {
pr_err("failed to create posix clock\n");
goto no_clock;
@@ -273,8 +273,6 @@ no_clock:
if (ptp->pps_source)
pps_unregister_source(ptp->pps_source);
no_pps:
- device_destroy(ptp_class, ptp->devid);
-no_device:
ptp_cleanup_pin_groups(ptp);
no_pin_groups:
if (ptp->kworker)
@@ -304,7 +302,6 @@ int ptp_clock_unregister(struct ptp_clock *ptp)
if (ptp->pps_source)
pps_unregister_source(ptp->pps_source);
- device_destroy(ptp_class, ptp->devid);
ptp_cleanup_pin_groups(ptp);
posix_clock_unregister(&ptp->clock);
@@ -371,6 +368,12 @@ int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay)
}
EXPORT_SYMBOL(ptp_schedule_worker);
+void ptp_cancel_worker_sync(struct ptp_clock *ptp)
+{
+ kthread_cancel_delayed_work_sync(&ptp->aux_work);
+}
+EXPORT_SYMBOL(ptp_cancel_worker_sync);
+
/* module operations */
static void __exit ptp_exit(void)
diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index a5110b7b4ece..032e112c3dd9 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -405,6 +405,7 @@ static int _idtcm_set_dpll_tod(struct idtcm_channel *channel,
if (wr_trig == HW_TOD_WR_TRIG_SEL_MSB) {
if (idtcm->calculate_overhead_flag) {
+ /* Assumption: I2C @ 400KHz */
total_overhead_ns = ktime_to_ns(ktime_get_raw()
- idtcm->start_time)
+ idtcm->tod_write_overhead_ns
@@ -596,44 +597,7 @@ static int idtcm_state_machine_reset(struct idtcm *idtcm)
static int idtcm_read_hw_rev_id(struct idtcm *idtcm, u8 *hw_rev_id)
{
- return idtcm_read(idtcm,
- GENERAL_STATUS,
- HW_REV_ID,
- hw_rev_id,
- sizeof(u8));
-}
-
-static int idtcm_read_bond_id(struct idtcm *idtcm, u8 *bond_id)
-{
- return idtcm_read(idtcm,
- GENERAL_STATUS,
- BOND_ID,
- bond_id,
- sizeof(u8));
-}
-
-static int idtcm_read_hw_csr_id(struct idtcm *idtcm, u16 *hw_csr_id)
-{
- int err;
- u8 buf[2] = {0};
-
- err = idtcm_read(idtcm, GENERAL_STATUS, HW_CSR_ID, buf, sizeof(buf));
-
- *hw_csr_id = (buf[1] << 8) | buf[0];
-
- return err;
-}
-
-static int idtcm_read_hw_irq_id(struct idtcm *idtcm, u16 *hw_irq_id)
-{
- int err;
- u8 buf[2] = {0};
-
- err = idtcm_read(idtcm, GENERAL_STATUS, HW_IRQ_ID, buf, sizeof(buf));
-
- *hw_irq_id = (buf[1] << 8) | buf[0];
-
- return err;
+ return idtcm_read(idtcm, HW_REVISION, REV_ID, hw_rev_id, sizeof(u8));
}
static int idtcm_read_product_id(struct idtcm *idtcm, u16 *product_id)
@@ -674,20 +638,11 @@ static int idtcm_read_hotfix_release(struct idtcm *idtcm, u8 *hotfix)
sizeof(u8));
}
-static int idtcm_read_pipeline(struct idtcm *idtcm, u32 *pipeline)
+static int idtcm_read_otp_scsr_config_select(struct idtcm *idtcm,
+ u8 *config_select)
{
- int err;
- u8 buf[4] = {0};
-
- err = idtcm_read(idtcm,
- GENERAL_STATUS,
- PIPELINE_ID,
- &buf[0],
- sizeof(buf));
-
- *pipeline = (buf[3] << 24) | (buf[2] << 16) | (buf[1] << 8) | buf[0];
-
- return err;
+ return idtcm_read(idtcm, GENERAL_STATUS, OTP_SCSR_CONFIG_SELECT,
+ config_select, sizeof(u8));
}
static int process_pll_mask(struct idtcm *idtcm, u32 addr, u8 val, u8 *mask)
@@ -1078,31 +1033,25 @@ static void idtcm_display_version_info(struct idtcm *idtcm)
u8 major;
u8 minor;
u8 hotfix;
- u32 pipeline;
u16 product_id;
- u16 csr_id;
- u16 irq_id;
u8 hw_rev_id;
- u8 bond_id;
+ u8 config_select;
+ char *fmt = "%d.%d.%d, Id: 0x%04x HW Rev: %d OTP Config Select: %d\n";
idtcm_read_major_release(idtcm, &major);
idtcm_read_minor_release(idtcm, &minor);
idtcm_read_hotfix_release(idtcm, &hotfix);
- idtcm_read_pipeline(idtcm, &pipeline);
idtcm_read_product_id(idtcm, &product_id);
idtcm_read_hw_rev_id(idtcm, &hw_rev_id);
- idtcm_read_bond_id(idtcm, &bond_id);
- idtcm_read_hw_csr_id(idtcm, &csr_id);
- idtcm_read_hw_irq_id(idtcm, &irq_id);
-
- dev_info(&idtcm->client->dev, "Version: %d.%d.%d, Pipeline %u\t"
- "0x%04x, Rev %d, Bond %d, CSR %d, IRQ %d\n",
- major, minor, hotfix, pipeline,
- product_id, hw_rev_id, bond_id, csr_id, irq_id);
+
+ idtcm_read_otp_scsr_config_select(idtcm, &config_select);
+
+ dev_info(&idtcm->client->dev, fmt, major, minor, hotfix,
+ product_id, hw_rev_id, config_select);
}
-static struct ptp_clock_info idtcm_caps = {
+static const struct ptp_clock_info idtcm_caps = {
.owner = THIS_MODULE,
.max_adj = 244000,
.n_per_out = 1,
diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c
new file mode 100644
index 000000000000..dfda54cbd866
--- /dev/null
+++ b/drivers/ptp/ptp_ines.c
@@ -0,0 +1,852 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Copyright (C) 2018 MOSER-BAER AG
+//
+
+#define pr_fmt(fmt) "InES_PTP: " fmt
+
+#include <linux/ethtool.h>
+#include <linux/export.h>
+#include <linux/if_vlan.h>
+#include <linux/mii_timestamper.h>
+#include <linux/module.h>
+#include <linux/net_tstamp.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/ptp_classify.h>
+#include <linux/ptp_clock_kernel.h>
+#include <linux/stddef.h>
+
+MODULE_DESCRIPTION("Driver for the ZHAW InES PTP time stamping IP core");
+MODULE_AUTHOR("Richard Cochran <[email protected]>");
+MODULE_VERSION("1.0");
+MODULE_LICENSE("GPL");
+
+/* GLOBAL register */
+#define MCAST_MAC_SELECT_SHIFT 2
+#define MCAST_MAC_SELECT_MASK 0x3
+#define IO_RESET BIT(1)
+#define PTP_RESET BIT(0)
+
+/* VERSION register */
+#define IF_MAJOR_VER_SHIFT 12
+#define IF_MAJOR_VER_MASK 0xf
+#define IF_MINOR_VER_SHIFT 8
+#define IF_MINOR_VER_MASK 0xf
+#define FPGA_MAJOR_VER_SHIFT 4
+#define FPGA_MAJOR_VER_MASK 0xf
+#define FPGA_MINOR_VER_SHIFT 0
+#define FPGA_MINOR_VER_MASK 0xf
+
+/* INT_STAT register */
+#define RX_INTR_STATUS_3 BIT(5)
+#define RX_INTR_STATUS_2 BIT(4)
+#define RX_INTR_STATUS_1 BIT(3)
+#define TX_INTR_STATUS_3 BIT(2)
+#define TX_INTR_STATUS_2 BIT(1)
+#define TX_INTR_STATUS_1 BIT(0)
+
+/* INT_MSK register */
+#define RX_INTR_MASK_3 BIT(5)
+#define RX_INTR_MASK_2 BIT(4)
+#define RX_INTR_MASK_1 BIT(3)
+#define TX_INTR_MASK_3 BIT(2)
+#define TX_INTR_MASK_2 BIT(1)
+#define TX_INTR_MASK_1 BIT(0)
+
+/* BUF_STAT register */
+#define RX_FIFO_NE_3 BIT(5)
+#define RX_FIFO_NE_2 BIT(4)
+#define RX_FIFO_NE_1 BIT(3)
+#define TX_FIFO_NE_3 BIT(2)
+#define TX_FIFO_NE_2 BIT(1)
+#define TX_FIFO_NE_1 BIT(0)
+
+/* PORT_CONF register */
+#define CM_ONE_STEP BIT(6)
+#define PHY_SPEED_SHIFT 4
+#define PHY_SPEED_MASK 0x3
+#define P2P_DELAY_WR_POS_SHIFT 2
+#define P2P_DELAY_WR_POS_MASK 0x3
+#define PTP_MODE_SHIFT 0
+#define PTP_MODE_MASK 0x3
+
+/* TS_STAT_TX register */
+#define TS_ENABLE BIT(15)
+#define DATA_READ_POS_SHIFT 8
+#define DATA_READ_POS_MASK 0x1f
+#define DISCARDED_EVENTS_SHIFT 4
+#define DISCARDED_EVENTS_MASK 0xf
+
+#define INES_N_PORTS 3
+#define INES_REGISTER_SIZE 0x80
+#define INES_PORT_OFFSET 0x20
+#define INES_PORT_SIZE 0x20
+#define INES_FIFO_DEPTH 90
+#define INES_MAX_EVENTS 100
+
+#define BC_PTP_V1 0
+#define BC_PTP_V2 1
+#define TC_E2E_PTP_V2 2
+#define TC_P2P_PTP_V2 3
+
+#define OFF_PTP_CLOCK_ID 20
+#define OFF_PTP_PORT_NUM 28
+
+#define PHY_SPEED_10 0
+#define PHY_SPEED_100 1
+#define PHY_SPEED_1000 2
+
+#define PORT_CONF \
+ ((PHY_SPEED_1000 << PHY_SPEED_SHIFT) | (BC_PTP_V2 << PTP_MODE_SHIFT))
+
+#define ines_read32(s, r) __raw_readl((void __iomem *)&s->regs->r)
+#define ines_write32(s, v, r) __raw_writel(v, (void __iomem *)&s->regs->r)
+
+#define MESSAGE_TYPE_SYNC 1
+#define MESSAGE_TYPE_P_DELAY_REQ 2
+#define MESSAGE_TYPE_P_DELAY_RESP 3
+#define MESSAGE_TYPE_DELAY_REQ 4
+
+#define SYNC 0x0
+#define DELAY_REQ 0x1
+#define PDELAY_REQ 0x2
+#define PDELAY_RESP 0x3
+
+static LIST_HEAD(ines_clocks);
+static DEFINE_MUTEX(ines_clocks_lock);
+
+struct ines_global_regs {
+ u32 id;
+ u32 test;
+ u32 global;
+ u32 version;
+ u32 test2;
+ u32 int_stat;
+ u32 int_msk;
+ u32 buf_stat;
+};
+
+struct ines_port_registers {
+ u32 port_conf;
+ u32 p_delay;
+ u32 ts_stat_tx;
+ u32 ts_stat_rx;
+ u32 ts_tx;
+ u32 ts_rx;
+};
+
+struct ines_timestamp {
+ struct list_head list;
+ unsigned long tmo;
+ u16 tag;
+ u64 sec;
+ u64 nsec;
+ u64 clkid;
+ u16 portnum;
+ u16 seqid;
+};
+
+struct ines_port {
+ struct ines_port_registers *regs;
+ struct mii_timestamper mii_ts;
+ struct ines_clock *clock;
+ bool rxts_enabled;
+ bool txts_enabled;
+ unsigned int index;
+ struct delayed_work ts_work;
+ /* lock protects event list and tx_skb */
+ spinlock_t lock;
+ struct sk_buff *tx_skb;
+ struct list_head events;
+ struct list_head pool;
+ struct ines_timestamp pool_data[INES_MAX_EVENTS];
+};
+
+struct ines_clock {
+ struct ines_port port[INES_N_PORTS];
+ struct ines_global_regs __iomem *regs;
+ void __iomem *base;
+ struct device_node *node;
+ struct device *dev;
+ struct list_head list;
+};
+
+static bool ines_match(struct sk_buff *skb, unsigned int ptp_class,
+ struct ines_timestamp *ts, struct device *dev);
+static int ines_rxfifo_read(struct ines_port *port);
+static u64 ines_rxts64(struct ines_port *port, unsigned int words);
+static bool ines_timestamp_expired(struct ines_timestamp *ts);
+static u64 ines_txts64(struct ines_port *port, unsigned int words);
+static void ines_txtstamp_work(struct work_struct *work);
+static bool is_sync_pdelay_resp(struct sk_buff *skb, int type);
+static u8 tag_to_msgtype(u8 tag);
+
+static void ines_clock_cleanup(struct ines_clock *clock)
+{
+ struct ines_port *port;
+ int i;
+
+ for (i = 0; i < INES_N_PORTS; i++) {
+ port = &clock->port[i];
+ cancel_delayed_work_sync(&port->ts_work);
+ }
+}
+
+static int ines_clock_init(struct ines_clock *clock, struct device *device,
+ void __iomem *addr)
+{
+ struct device_node *node = device->of_node;
+ unsigned long port_addr;
+ struct ines_port *port;
+ int i, j;
+
+ INIT_LIST_HEAD(&clock->list);
+ clock->node = node;
+ clock->dev = device;
+ clock->base = addr;
+ clock->regs = clock->base;
+
+ for (i = 0; i < INES_N_PORTS; i++) {
+ port = &clock->port[i];
+ port_addr = (unsigned long) clock->base +
+ INES_PORT_OFFSET + i * INES_PORT_SIZE;
+ port->regs = (struct ines_port_registers *) port_addr;
+ port->clock = clock;
+ port->index = i;
+ INIT_DELAYED_WORK(&port->ts_work, ines_txtstamp_work);
+ spin_lock_init(&port->lock);
+ INIT_LIST_HEAD(&port->events);
+ INIT_LIST_HEAD(&port->pool);
+ for (j = 0; j < INES_MAX_EVENTS; j++)
+ list_add(&port->pool_data[j].list, &port->pool);
+ }
+
+ ines_write32(clock, 0xBEEF, test);
+ ines_write32(clock, 0xBEEF, test2);
+
+ dev_dbg(device, "ID 0x%x\n", ines_read32(clock, id));
+ dev_dbg(device, "TEST 0x%x\n", ines_read32(clock, test));
+ dev_dbg(device, "VERSION 0x%x\n", ines_read32(clock, version));
+ dev_dbg(device, "TEST2 0x%x\n", ines_read32(clock, test2));
+
+ for (i = 0; i < INES_N_PORTS; i++) {
+ port = &clock->port[i];
+ ines_write32(port, PORT_CONF, port_conf);
+ }
+
+ return 0;
+}
+
+static struct ines_port *ines_find_port(struct device_node *node, u32 index)
+{
+ struct ines_port *port = NULL;
+ struct ines_clock *clock;
+ struct list_head *this;
+
+ mutex_lock(&ines_clocks_lock);
+ list_for_each(this, &ines_clocks) {
+ clock = list_entry(this, struct ines_clock, list);
+ if (clock->node == node) {
+ port = &clock->port[index];
+ break;
+ }
+ }
+ mutex_unlock(&ines_clocks_lock);
+ return port;
+}
+
+static u64 ines_find_rxts(struct ines_port *port, struct sk_buff *skb, int type)
+{
+ struct list_head *this, *next;
+ struct ines_timestamp *ts;
+ unsigned long flags;
+ u64 ns = 0;
+
+ if (type == PTP_CLASS_NONE)
+ return 0;
+
+ spin_lock_irqsave(&port->lock, flags);
+ ines_rxfifo_read(port);
+ list_for_each_safe(this, next, &port->events) {
+ ts = list_entry(this, struct ines_timestamp, list);
+ if (ines_timestamp_expired(ts)) {
+ list_del_init(&ts->list);
+ list_add(&ts->list, &port->pool);
+ continue;
+ }
+ if (ines_match(skb, type, ts, port->clock->dev)) {
+ ns = ts->sec * 1000000000ULL + ts->nsec;
+ list_del_init(&ts->list);
+ list_add(&ts->list, &port->pool);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ return ns;
+}
+
+static u64 ines_find_txts(struct ines_port *port, struct sk_buff *skb)
+{
+ unsigned int class = ptp_classify_raw(skb), i;
+ u32 data_rd_pos, buf_stat, mask, ts_stat_tx;
+ struct ines_timestamp ts;
+ unsigned long flags;
+ u64 ns = 0;
+
+ mask = TX_FIFO_NE_1 << port->index;
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ for (i = 0; i < INES_FIFO_DEPTH; i++) {
+
+ buf_stat = ines_read32(port->clock, buf_stat);
+ if (!(buf_stat & mask)) {
+ dev_dbg(port->clock->dev,
+ "Tx timestamp FIFO unexpectedly empty\n");
+ break;
+ }
+ ts_stat_tx = ines_read32(port, ts_stat_tx);
+ data_rd_pos = (ts_stat_tx >> DATA_READ_POS_SHIFT) &
+ DATA_READ_POS_MASK;
+ if (data_rd_pos) {
+ dev_err(port->clock->dev,
+ "unexpected Tx read pos %u\n", data_rd_pos);
+ break;
+ }
+
+ ts.tag = ines_read32(port, ts_tx);
+ ts.sec = ines_txts64(port, 3);
+ ts.nsec = ines_txts64(port, 2);
+ ts.clkid = ines_txts64(port, 4);
+ ts.portnum = ines_read32(port, ts_tx);
+ ts.seqid = ines_read32(port, ts_tx);
+
+ if (ines_match(skb, class, &ts, port->clock->dev)) {
+ ns = ts.sec * 1000000000ULL + ts.nsec;
+ break;
+ }
+ }
+
+ spin_unlock_irqrestore(&port->lock, flags);
+ return ns;
+}
+
+static int ines_hwtstamp(struct mii_timestamper *mii_ts, struct ifreq *ifr)
+{
+ struct ines_port *port = container_of(mii_ts, struct ines_port, mii_ts);
+ u32 cm_one_step = 0, port_conf, ts_stat_rx, ts_stat_tx;
+ struct hwtstamp_config cfg;
+ unsigned long flags;
+
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ /* reserved for future extensions */
+ if (cfg.flags)
+ return -EINVAL;
+
+ switch (cfg.tx_type) {
+ case HWTSTAMP_TX_OFF:
+ ts_stat_tx = 0;
+ break;
+ case HWTSTAMP_TX_ON:
+ ts_stat_tx = TS_ENABLE;
+ break;
+ case HWTSTAMP_TX_ONESTEP_P2P:
+ ts_stat_tx = TS_ENABLE;
+ cm_one_step = CM_ONE_STEP;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ switch (cfg.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ ts_stat_rx = 0;
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ return -ERANGE;
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ ts_stat_rx = TS_ENABLE;
+ cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ port_conf = ines_read32(port, port_conf);
+ port_conf &= ~CM_ONE_STEP;
+ port_conf |= cm_one_step;
+
+ ines_write32(port, port_conf, port_conf);
+ ines_write32(port, ts_stat_rx, ts_stat_rx);
+ ines_write32(port, ts_stat_tx, ts_stat_tx);
+
+ port->rxts_enabled = ts_stat_rx == TS_ENABLE ? true : false;
+ port->txts_enabled = ts_stat_tx == TS_ENABLE ? true : false;
+
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static void ines_link_state(struct mii_timestamper *mii_ts,
+ struct phy_device *phydev)
+{
+ struct ines_port *port = container_of(mii_ts, struct ines_port, mii_ts);
+ u32 port_conf, speed_conf;
+ unsigned long flags;
+
+ switch (phydev->speed) {
+ case SPEED_10:
+ speed_conf = PHY_SPEED_10 << PHY_SPEED_SHIFT;
+ break;
+ case SPEED_100:
+ speed_conf = PHY_SPEED_100 << PHY_SPEED_SHIFT;
+ break;
+ case SPEED_1000:
+ speed_conf = PHY_SPEED_1000 << PHY_SPEED_SHIFT;
+ break;
+ default:
+ dev_err(port->clock->dev, "bad speed: %d\n", phydev->speed);
+ return;
+ }
+ spin_lock_irqsave(&port->lock, flags);
+
+ port_conf = ines_read32(port, port_conf);
+ port_conf &= ~(0x3 << PHY_SPEED_SHIFT);
+ port_conf |= speed_conf;
+
+ ines_write32(port, port_conf, port_conf);
+
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static bool ines_match(struct sk_buff *skb, unsigned int ptp_class,
+ struct ines_timestamp *ts, struct device *dev)
+{
+ u8 *msgtype, *data = skb_mac_header(skb);
+ unsigned int offset = 0;
+ __be16 *portn, *seqid;
+ __be64 *clkid;
+
+ if (unlikely(ptp_class & PTP_CLASS_V1))
+ return false;
+
+ if (ptp_class & PTP_CLASS_VLAN)
+ offset += VLAN_HLEN;
+
+ switch (ptp_class & PTP_CLASS_PMASK) {
+ case PTP_CLASS_IPV4:
+ offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+ break;
+ case PTP_CLASS_IPV6:
+ offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+ break;
+ case PTP_CLASS_L2:
+ offset += ETH_HLEN;
+ break;
+ default:
+ return false;
+ }
+
+ if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+ return false;
+
+ msgtype = data + offset;
+ clkid = (__be64 *)(data + offset + OFF_PTP_CLOCK_ID);
+ portn = (__be16 *)(data + offset + OFF_PTP_PORT_NUM);
+ seqid = (__be16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
+
+ if (tag_to_msgtype(ts->tag & 0x7) != (*msgtype & 0xf)) {
+ dev_dbg(dev, "msgtype mismatch ts %hhu != skb %hhu\n",
+ tag_to_msgtype(ts->tag & 0x7), *msgtype & 0xf);
+ return false;
+ }
+ if (cpu_to_be64(ts->clkid) != *clkid) {
+ dev_dbg(dev, "clkid mismatch ts %llx != skb %llx\n",
+ cpu_to_be64(ts->clkid), *clkid);
+ return false;
+ }
+ if (ts->portnum != ntohs(*portn)) {
+ dev_dbg(dev, "portn mismatch ts %hu != skb %hu\n",
+ ts->portnum, ntohs(*portn));
+ return false;
+ }
+ if (ts->seqid != ntohs(*seqid)) {
+ dev_dbg(dev, "seqid mismatch ts %hu != skb %hu\n",
+ ts->seqid, ntohs(*seqid));
+ return false;
+ }
+
+ return true;
+}
+
+static bool ines_rxtstamp(struct mii_timestamper *mii_ts,
+ struct sk_buff *skb, int type)
+{
+ struct ines_port *port = container_of(mii_ts, struct ines_port, mii_ts);
+ struct skb_shared_hwtstamps *ssh;
+ u64 ns;
+
+ if (!port->rxts_enabled)
+ return false;
+
+ ns = ines_find_rxts(port, skb, type);
+ if (!ns)
+ return false;
+
+ ssh = skb_hwtstamps(skb);
+ ssh->hwtstamp = ns_to_ktime(ns);
+ netif_rx(skb);
+
+ return true;
+}
+
+static int ines_rxfifo_read(struct ines_port *port)
+{
+ u32 data_rd_pos, buf_stat, mask, ts_stat_rx;
+ struct ines_timestamp *ts;
+ unsigned int i;
+
+ mask = RX_FIFO_NE_1 << port->index;
+
+ for (i = 0; i < INES_FIFO_DEPTH; i++) {
+ if (list_empty(&port->pool)) {
+ dev_err(port->clock->dev, "event pool is empty\n");
+ return -1;
+ }
+ buf_stat = ines_read32(port->clock, buf_stat);
+ if (!(buf_stat & mask))
+ break;
+
+ ts_stat_rx = ines_read32(port, ts_stat_rx);
+ data_rd_pos = (ts_stat_rx >> DATA_READ_POS_SHIFT) &
+ DATA_READ_POS_MASK;
+ if (data_rd_pos) {
+ dev_err(port->clock->dev, "unexpected Rx read pos %u\n",
+ data_rd_pos);
+ break;
+ }
+
+ ts = list_first_entry(&port->pool, struct ines_timestamp, list);
+ ts->tmo = jiffies + HZ;
+ ts->tag = ines_read32(port, ts_rx);
+ ts->sec = ines_rxts64(port, 3);
+ ts->nsec = ines_rxts64(port, 2);
+ ts->clkid = ines_rxts64(port, 4);
+ ts->portnum = ines_read32(port, ts_rx);
+ ts->seqid = ines_read32(port, ts_rx);
+
+ list_del_init(&ts->list);
+ list_add_tail(&ts->list, &port->events);
+ }
+
+ return 0;
+}
+
+static u64 ines_rxts64(struct ines_port *port, unsigned int words)
+{
+ unsigned int i;
+ u64 result;
+ u16 word;
+
+ word = ines_read32(port, ts_rx);
+ result = word;
+ words--;
+ for (i = 0; i < words; i++) {
+ word = ines_read32(port, ts_rx);
+ result <<= 16;
+ result |= word;
+ }
+ return result;
+}
+
+static bool ines_timestamp_expired(struct ines_timestamp *ts)
+{
+ return time_after(jiffies, ts->tmo);
+}
+
+static int ines_ts_info(struct mii_timestamper *mii_ts,
+ struct ethtool_ts_info *info)
+{
+ info->so_timestamping =
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+
+ info->phc_index = -1;
+
+ info->tx_types =
+ (1 << HWTSTAMP_TX_OFF) |
+ (1 << HWTSTAMP_TX_ON) |
+ (1 << HWTSTAMP_TX_ONESTEP_P2P);
+
+ info->rx_filters =
+ (1 << HWTSTAMP_FILTER_NONE) |
+ (1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+
+ return 0;
+}
+
+static u64 ines_txts64(struct ines_port *port, unsigned int words)
+{
+ unsigned int i;
+ u64 result;
+ u16 word;
+
+ word = ines_read32(port, ts_tx);
+ result = word;
+ words--;
+ for (i = 0; i < words; i++) {
+ word = ines_read32(port, ts_tx);
+ result <<= 16;
+ result |= word;
+ }
+ return result;
+}
+
+static bool ines_txts_onestep(struct ines_port *port, struct sk_buff *skb, int type)
+{
+ unsigned long flags;
+ u32 port_conf;
+
+ spin_lock_irqsave(&port->lock, flags);
+ port_conf = ines_read32(port, port_conf);
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ if (port_conf & CM_ONE_STEP)
+ return is_sync_pdelay_resp(skb, type);
+
+ return false;
+}
+
+static void ines_txtstamp(struct mii_timestamper *mii_ts,
+ struct sk_buff *skb, int type)
+{
+ struct ines_port *port = container_of(mii_ts, struct ines_port, mii_ts);
+ struct sk_buff *old_skb = NULL;
+ unsigned long flags;
+
+ if (!port->txts_enabled || ines_txts_onestep(port, skb, type)) {
+ kfree_skb(skb);
+ return;
+ }
+
+ spin_lock_irqsave(&port->lock, flags);
+
+ if (port->tx_skb)
+ old_skb = port->tx_skb;
+
+ port->tx_skb = skb;
+
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ if (old_skb)
+ kfree_skb(old_skb);
+
+ schedule_delayed_work(&port->ts_work, 1);
+}
+
+static void ines_txtstamp_work(struct work_struct *work)
+{
+ struct ines_port *port =
+ container_of(work, struct ines_port, ts_work.work);
+ struct skb_shared_hwtstamps ssh;
+ struct sk_buff *skb;
+ unsigned long flags;
+ u64 ns;
+
+ spin_lock_irqsave(&port->lock, flags);
+ skb = port->tx_skb;
+ port->tx_skb = NULL;
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ ns = ines_find_txts(port, skb);
+ if (!ns) {
+ kfree_skb(skb);
+ return;
+ }
+ ssh.hwtstamp = ns_to_ktime(ns);
+ skb_complete_tx_timestamp(skb, &ssh);
+}
+
+static bool is_sync_pdelay_resp(struct sk_buff *skb, int type)
+{
+ u8 *data = skb->data, *msgtype;
+ unsigned int offset = 0;
+
+ if (type & PTP_CLASS_VLAN)
+ offset += VLAN_HLEN;
+
+ switch (type & PTP_CLASS_PMASK) {
+ case PTP_CLASS_IPV4:
+ offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+ break;
+ case PTP_CLASS_IPV6:
+ offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+ break;
+ case PTP_CLASS_L2:
+ offset += ETH_HLEN;
+ break;
+ default:
+ return 0;
+ }
+
+ if (type & PTP_CLASS_V1)
+ offset += OFF_PTP_CONTROL;
+
+ if (skb->len < offset + 1)
+ return 0;
+
+ msgtype = data + offset;
+
+ switch ((*msgtype & 0xf)) {
+ case SYNC:
+ case PDELAY_RESP:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static u8 tag_to_msgtype(u8 tag)
+{
+ switch (tag) {
+ case MESSAGE_TYPE_SYNC:
+ return SYNC;
+ case MESSAGE_TYPE_P_DELAY_REQ:
+ return PDELAY_REQ;
+ case MESSAGE_TYPE_P_DELAY_RESP:
+ return PDELAY_RESP;
+ case MESSAGE_TYPE_DELAY_REQ:
+ return DELAY_REQ;
+ }
+ return 0xf;
+}
+
+static struct mii_timestamper *ines_ptp_probe_channel(struct device *device,
+ unsigned int index)
+{
+ struct device_node *node = device->of_node;
+ struct ines_port *port;
+
+ if (index > INES_N_PORTS - 1) {
+ dev_err(device, "bad port index %u\n", index);
+ return ERR_PTR(-EINVAL);
+ }
+ port = ines_find_port(node, index);
+ if (!port) {
+ dev_err(device, "missing port index %u\n", index);
+ return ERR_PTR(-ENODEV);
+ }
+ port->mii_ts.rxtstamp = ines_rxtstamp;
+ port->mii_ts.txtstamp = ines_txtstamp;
+ port->mii_ts.hwtstamp = ines_hwtstamp;
+ port->mii_ts.link_state = ines_link_state;
+ port->mii_ts.ts_info = ines_ts_info;
+
+ return &port->mii_ts;
+}
+
+static void ines_ptp_release_channel(struct device *device,
+ struct mii_timestamper *mii_ts)
+{
+}
+
+static struct mii_timestamping_ctrl ines_ctrl = {
+ .probe_channel = ines_ptp_probe_channel,
+ .release_channel = ines_ptp_release_channel,
+};
+
+static int ines_ptp_ctrl_probe(struct platform_device *pld)
+{
+ struct ines_clock *clock;
+ struct resource *res;
+ void __iomem *addr;
+ int err = 0;
+
+ res = platform_get_resource(pld, IORESOURCE_MEM, 0);
+ if (!res) {
+ dev_err(&pld->dev, "missing memory resource\n");
+ return -EINVAL;
+ }
+ addr = devm_ioremap_resource(&pld->dev, res);
+ if (IS_ERR(addr)) {
+ err = PTR_ERR(addr);
+ goto out;
+ }
+ clock = kzalloc(sizeof(*clock), GFP_KERNEL);
+ if (!clock) {
+ err = -ENOMEM;
+ goto out;
+ }
+ if (ines_clock_init(clock, &pld->dev, addr)) {
+ kfree(clock);
+ err = -ENOMEM;
+ goto out;
+ }
+ err = register_mii_tstamp_controller(&pld->dev, &ines_ctrl);
+ if (err) {
+ kfree(clock);
+ goto out;
+ }
+ mutex_lock(&ines_clocks_lock);
+ list_add_tail(&ines_clocks, &clock->list);
+ mutex_unlock(&ines_clocks_lock);
+
+ dev_set_drvdata(&pld->dev, clock);
+out:
+ return err;
+}
+
+static int ines_ptp_ctrl_remove(struct platform_device *pld)
+{
+ struct ines_clock *clock = dev_get_drvdata(&pld->dev);
+
+ unregister_mii_tstamp_controller(&pld->dev);
+ mutex_lock(&ines_clocks_lock);
+ list_del(&clock->list);
+ mutex_unlock(&ines_clocks_lock);
+ ines_clock_cleanup(clock);
+ kfree(clock);
+ return 0;
+}
+
+static const struct of_device_id ines_ptp_ctrl_of_match[] = {
+ { .compatible = "ines,ptp-ctrl" },
+ { }
+};
+
+MODULE_DEVICE_TABLE(of, ines_ptp_ctrl_of_match);
+
+static struct platform_driver ines_ptp_ctrl_driver = {
+ .probe = ines_ptp_ctrl_probe,
+ .remove = ines_ptp_ctrl_remove,
+ .driver = {
+ .name = "ines_ptp_ctrl",
+ .of_match_table = of_match_ptr(ines_ptp_ctrl_of_match),
+ },
+};
+module_platform_driver(ines_ptp_ctrl_driver);
diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h
index 9171d42468fd..6b97155148f1 100644
--- a/drivers/ptp/ptp_private.h
+++ b/drivers/ptp/ptp_private.h
@@ -28,7 +28,7 @@ struct timestamp_event_queue {
struct ptp_clock {
struct posix_clock clock;
- struct device *dev;
+ struct device dev;
struct ptp_clock_info *info;
dev_t devid;
int index; /* index into clocks.map */
diff --git a/drivers/ptp/ptp_qoriq.c b/drivers/ptp/ptp_qoriq.c
index a577218d1ab7..b27c46ebfc8f 100644
--- a/drivers/ptp/ptp_qoriq.c
+++ b/drivers/ptp/ptp_qoriq.c
@@ -74,14 +74,13 @@ static void set_fipers(struct ptp_qoriq *ptp_qoriq)
ptp_qoriq->write(&regs->fiper_regs->tmr_fiper2, ptp_qoriq->tmr_fiper2);
}
-static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
- bool update_event)
+int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event)
{
struct ptp_qoriq_registers *regs = &ptp_qoriq->regs;
struct ptp_clock_event event;
void __iomem *reg_etts_l;
void __iomem *reg_etts_h;
- u32 valid, stat, lo, hi;
+ u32 valid, lo, hi;
switch (index) {
case 0:
@@ -101,6 +100,10 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
event.type = PTP_CLOCK_EXTTS;
event.index = index;
+ if (ptp_qoriq->extts_fifo_support)
+ if (!(ptp_qoriq->read(&regs->ctrl_regs->tmr_stat) & valid))
+ return 0;
+
do {
lo = ptp_qoriq->read(reg_etts_l);
hi = ptp_qoriq->read(reg_etts_h);
@@ -111,11 +114,13 @@ static int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index,
ptp_clock_event(ptp_qoriq->clock, &event);
}
- stat = ptp_qoriq->read(&regs->ctrl_regs->tmr_stat);
- } while (ptp_qoriq->extts_fifo_support && (stat & valid));
+ if (!ptp_qoriq->extts_fifo_support)
+ break;
+ } while (ptp_qoriq->read(&regs->ctrl_regs->tmr_stat) & valid);
return 0;
}
+EXPORT_SYMBOL_GPL(extts_clean_up);
/*
* Interrupt service routine
diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c
index 989506bd90b1..16f0c8570036 100644
--- a/drivers/regulator/axp20x-regulator.c
+++ b/drivers/regulator/axp20x-regulator.c
@@ -413,10 +413,13 @@ static int axp20x_set_ramp_delay(struct regulator_dev *rdev, int ramp)
int i;
for (i = 0; i < rate_count; i++) {
- if (ramp <= slew_rates[i])
- cfg = AXP20X_DCDC2_LDO3_V_RAMP_LDO3_RATE(i);
- else
+ if (ramp > slew_rates[i])
break;
+
+ if (id == AXP20X_DCDC2)
+ cfg = AXP20X_DCDC2_LDO3_V_RAMP_DCDC2_RATE(i);
+ else
+ cfg = AXP20X_DCDC2_LDO3_V_RAMP_LDO3_RATE(i);
}
if (cfg == 0xff) {
@@ -605,7 +608,7 @@ static const struct regulator_desc axp22x_regulators[] = {
AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_ELDO1_MASK),
AXP_DESC(AXP22X, ELDO2, "eldo2", "eldoin", 700, 3300, 100,
AXP22X_ELDO2_V_OUT, AXP22X_ELDO2_V_OUT_MASK,
- AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_ELDO1_MASK),
+ AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_ELDO2_MASK),
AXP_DESC(AXP22X, ELDO3, "eldo3", "eldoin", 700, 3300, 100,
AXP22X_ELDO3_V_OUT, AXP22X_ELDO3_V_OUT_MASK,
AXP22X_PWR_OUT_CTRL2, AXP22X_PWR_OUT_ELDO3_MASK),
diff --git a/drivers/regulator/bd70528-regulator.c b/drivers/regulator/bd70528-regulator.c
index ec764022621f..5bf8a2dc5fe7 100644
--- a/drivers/regulator/bd70528-regulator.c
+++ b/drivers/regulator/bd70528-regulator.c
@@ -101,7 +101,6 @@ static const struct regulator_ops bd70528_ldo_ops = {
.set_voltage_sel = regulator_set_voltage_sel_regmap,
.get_voltage_sel = regulator_get_voltage_sel_regmap,
.set_voltage_time_sel = regulator_set_voltage_time_sel,
- .set_ramp_delay = bd70528_set_ramp_delay,
};
static const struct regulator_ops bd70528_led_ops = {
diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index 679ad3d2ed23..03d79fee2987 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1198,6 +1198,10 @@ static int machine_constraints_voltage(struct regulator_dev *rdev,
return -EINVAL;
}
+ /* no need to loop voltages if range is continuous */
+ if (rdev->desc->continuous_voltage_range)
+ return 0;
+
/* initial: [cmin..cmax] valid, [min_uV..max_uV] not */
for (i = 0; i < count; i++) {
int value;
@@ -1938,8 +1942,8 @@ struct regulator *_regulator_get(struct device *dev, const char *id,
regulator = create_regulator(rdev, dev, id);
if (regulator == NULL) {
regulator = ERR_PTR(-ENOMEM);
- put_device(&rdev->dev);
module_put(rdev->owner);
+ put_device(&rdev->dev);
return regulator;
}
@@ -2063,13 +2067,13 @@ static void _regulator_put(struct regulator *regulator)
rdev->open_count--;
rdev->exclusive = 0;
- put_device(&rdev->dev);
regulator_unlock(rdev);
kfree_const(regulator->supply_name);
kfree(regulator);
module_put(rdev->owner);
+ put_device(&rdev->dev);
}
/**
@@ -5002,6 +5006,7 @@ regulator_register(const struct regulator_desc *regulator_desc,
struct regulator_dev *rdev;
bool dangling_cfg_gpiod = false;
bool dangling_of_gpiod = false;
+ bool reg_device_fail = false;
struct device *dev;
int ret, i;
@@ -5187,7 +5192,7 @@ regulator_register(const struct regulator_desc *regulator_desc,
dev_set_drvdata(&rdev->dev, rdev);
ret = device_register(&rdev->dev);
if (ret != 0) {
- put_device(&rdev->dev);
+ reg_device_fail = true;
goto unset_supplies;
}
@@ -5218,7 +5223,10 @@ wash:
clean:
if (dangling_of_gpiod)
gpiod_put(config->ena_gpiod);
- kfree(rdev);
+ if (reg_device_fail)
+ put_device(&rdev->dev);
+ else
+ kfree(rdev);
kfree(config);
rinse:
if (dangling_cfg_gpiod)
diff --git a/drivers/regulator/max77650-regulator.c b/drivers/regulator/max77650-regulator.c
index e57fc9197d62..ac89a412f665 100644
--- a/drivers/regulator/max77650-regulator.c
+++ b/drivers/regulator/max77650-regulator.c
@@ -386,9 +386,16 @@ static int max77650_regulator_probe(struct platform_device *pdev)
return 0;
}
+static const struct of_device_id max77650_regulator_of_match[] = {
+ { .compatible = "maxim,max77650-regulator" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, max77650_regulator_of_match);
+
static struct platform_driver max77650_regulator_driver = {
.driver = {
.name = "max77650-regulator",
+ .of_match_table = max77650_regulator_of_match,
},
.probe = max77650_regulator_probe,
};
diff --git a/drivers/regulator/rn5t618-regulator.c b/drivers/regulator/rn5t618-regulator.c
index 4a91be0ad5ae..5c12d57be040 100644
--- a/drivers/regulator/rn5t618-regulator.c
+++ b/drivers/regulator/rn5t618-regulator.c
@@ -148,6 +148,7 @@ static struct platform_driver rn5t618_regulator_driver = {
module_platform_driver(rn5t618_regulator_driver);
+MODULE_ALIAS("platform:rn5t618-regulator");
MODULE_AUTHOR("Beniamino Galvani <[email protected]>");
MODULE_DESCRIPTION("RN5T618 regulator driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index bdc07739e9a2..12d6b8d2e97e 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -588,7 +588,7 @@ static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev,
if (of_property_read_u32(reg_np, "op_mode",
&rmode->mode)) {
dev_warn(iodev->dev,
- "no op_mode property property at %pOF\n",
+ "no op_mode property at %pOF\n",
reg_np);
rmode->mode = S5M8767_OPMODE_NORMAL_MODE;
diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index ca1d49146f61..7597c70e04d5 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -787,7 +787,7 @@ struct reset_control *__devm_reset_control_get(struct device *dev,
return ERR_PTR(-ENOMEM);
rstc = __reset_control_get(dev, id, index, shared, optional, acquired);
- if (!IS_ERR(rstc)) {
+ if (!IS_ERR_OR_NULL(rstc)) {
*ptr = rstc;
devres_add(dev, ptr);
} else {
@@ -861,8 +861,7 @@ static int of_reset_control_get_count(struct device_node *node)
* @acquired: only one reset control may be acquired for a given controller
* and ID
*
- * Returns pointer to allocated reset_control_array on success or
- * error on failure
+ * Returns pointer to allocated reset_control on success or error on failure
*/
struct reset_control *
of_reset_control_array_get(struct device_node *np, bool shared, bool optional,
@@ -915,8 +914,7 @@ EXPORT_SYMBOL_GPL(of_reset_control_array_get);
* that just have to be asserted or deasserted, without any
* requirements on the order.
*
- * Returns pointer to allocated reset_control_array on success or
- * error on failure
+ * Returns pointer to allocated reset_control on success or error on failure
*/
struct reset_control *
devm_reset_control_array_get(struct device *dev, bool shared, bool optional)
@@ -930,7 +928,7 @@ devm_reset_control_array_get(struct device *dev, bool shared, bool optional)
return ERR_PTR(-ENOMEM);
rstc = of_reset_control_array_get(dev->of_node, shared, optional, true);
- if (IS_ERR(rstc)) {
+ if (IS_ERR_OR_NULL(rstc)) {
devres_free(devres);
return rstc;
}
diff --git a/drivers/reset/reset-brcmstb.c b/drivers/reset/reset-brcmstb.c
index a608f445dad6..f213264c8567 100644
--- a/drivers/reset/reset-brcmstb.c
+++ b/drivers/reset/reset-brcmstb.c
@@ -91,12 +91,6 @@ static int brcmstb_reset_probe(struct platform_device *pdev)
return -ENOMEM;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!IS_ALIGNED(res->start, SW_INIT_BANK_SIZE) ||
- !IS_ALIGNED(resource_size(res), SW_INIT_BANK_SIZE)) {
- dev_err(kdev, "incorrect register range\n");
- return -EINVAL;
- }
-
priv->base = devm_ioremap_resource(kdev, res);
if (IS_ERR(priv->base))
return PTR_ERR(priv->base);
diff --git a/drivers/rtc/rtc-mc146818-lib.c b/drivers/rtc/rtc-mc146818-lib.c
index df2829dd55ad..2ecd8752b088 100644
--- a/drivers/rtc/rtc-mc146818-lib.c
+++ b/drivers/rtc/rtc-mc146818-lib.c
@@ -172,20 +172,7 @@ int mc146818_set_time(struct rtc_time *time)
save_control = CMOS_READ(RTC_CONTROL);
CMOS_WRITE((save_control|RTC_SET), RTC_CONTROL);
save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
-
-#ifdef CONFIG_X86
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
- boot_cpu_data.x86 == 0x17) ||
- boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
- CMOS_WRITE((save_freq_select & (~RTC_DIV_RESET2)),
- RTC_FREQ_SELECT);
- save_freq_select &= ~RTC_DIV_RESET2;
- } else
- CMOS_WRITE((save_freq_select | RTC_DIV_RESET2),
- RTC_FREQ_SELECT);
-#else
- CMOS_WRITE((save_freq_select | RTC_DIV_RESET2), RTC_FREQ_SELECT);
-#endif
+ CMOS_WRITE((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT);
#ifdef CONFIG_MACH_DECSTATION
CMOS_WRITE(real_yrs, RTC_DEC_YEAR);
diff --git a/drivers/rtc/rtc-mt6397.c b/drivers/rtc/rtc-mt6397.c
index 5249fc99fd5f..9135e2101752 100644
--- a/drivers/rtc/rtc-mt6397.c
+++ b/drivers/rtc/rtc-mt6397.c
@@ -47,7 +47,7 @@ static irqreturn_t mtk_rtc_irq_handler_thread(int irq, void *data)
irqen = irqsta & ~RTC_IRQ_EN_AL;
mutex_lock(&rtc->lock);
if (regmap_write(rtc->regmap, rtc->addr_base + RTC_IRQ_EN,
- irqen) < 0)
+ irqen) == 0)
mtk_rtc_write_trigger(rtc);
mutex_unlock(&rtc->lock);
@@ -169,12 +169,12 @@ static int mtk_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
alm->pending = !!(pdn2 & RTC_PDN2_PWRON_ALARM);
mutex_unlock(&rtc->lock);
- tm->tm_sec = data[RTC_OFFSET_SEC];
- tm->tm_min = data[RTC_OFFSET_MIN];
- tm->tm_hour = data[RTC_OFFSET_HOUR];
- tm->tm_mday = data[RTC_OFFSET_DOM];
- tm->tm_mon = data[RTC_OFFSET_MTH];
- tm->tm_year = data[RTC_OFFSET_YEAR];
+ tm->tm_sec = data[RTC_OFFSET_SEC] & RTC_AL_SEC_MASK;
+ tm->tm_min = data[RTC_OFFSET_MIN] & RTC_AL_MIN_MASK;
+ tm->tm_hour = data[RTC_OFFSET_HOUR] & RTC_AL_HOU_MASK;
+ tm->tm_mday = data[RTC_OFFSET_DOM] & RTC_AL_DOM_MASK;
+ tm->tm_mon = data[RTC_OFFSET_MTH] & RTC_AL_MTH_MASK;
+ tm->tm_year = data[RTC_OFFSET_YEAR] & RTC_AL_YEA_MASK;
tm->tm_year += RTC_MIN_YEAR_OFFSET;
tm->tm_mon--;
@@ -195,14 +195,25 @@ static int mtk_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
tm->tm_year -= RTC_MIN_YEAR_OFFSET;
tm->tm_mon++;
- data[RTC_OFFSET_SEC] = tm->tm_sec;
- data[RTC_OFFSET_MIN] = tm->tm_min;
- data[RTC_OFFSET_HOUR] = tm->tm_hour;
- data[RTC_OFFSET_DOM] = tm->tm_mday;
- data[RTC_OFFSET_MTH] = tm->tm_mon;
- data[RTC_OFFSET_YEAR] = tm->tm_year;
-
mutex_lock(&rtc->lock);
+ ret = regmap_bulk_read(rtc->regmap, rtc->addr_base + RTC_AL_SEC,
+ data, RTC_OFFSET_COUNT);
+ if (ret < 0)
+ goto exit;
+
+ data[RTC_OFFSET_SEC] = ((data[RTC_OFFSET_SEC] & ~(RTC_AL_SEC_MASK)) |
+ (tm->tm_sec & RTC_AL_SEC_MASK));
+ data[RTC_OFFSET_MIN] = ((data[RTC_OFFSET_MIN] & ~(RTC_AL_MIN_MASK)) |
+ (tm->tm_min & RTC_AL_MIN_MASK));
+ data[RTC_OFFSET_HOUR] = ((data[RTC_OFFSET_HOUR] & ~(RTC_AL_HOU_MASK)) |
+ (tm->tm_hour & RTC_AL_HOU_MASK));
+ data[RTC_OFFSET_DOM] = ((data[RTC_OFFSET_DOM] & ~(RTC_AL_DOM_MASK)) |
+ (tm->tm_mday & RTC_AL_DOM_MASK));
+ data[RTC_OFFSET_MTH] = ((data[RTC_OFFSET_MTH] & ~(RTC_AL_MTH_MASK)) |
+ (tm->tm_mon & RTC_AL_MTH_MASK));
+ data[RTC_OFFSET_YEAR] = ((data[RTC_OFFSET_YEAR] & ~(RTC_AL_YEA_MASK)) |
+ (tm->tm_year & RTC_AL_YEA_MASK));
+
if (alm->enabled) {
ret = regmap_bulk_write(rtc->regmap,
rtc->addr_base + RTC_AL_SEC,
diff --git a/drivers/rtc/rtc-sun6i.c b/drivers/rtc/rtc-sun6i.c
index 8dcd20b34dde..852f5f3b3592 100644
--- a/drivers/rtc/rtc-sun6i.c
+++ b/drivers/rtc/rtc-sun6i.c
@@ -379,6 +379,22 @@ static void __init sun50i_h6_rtc_clk_init(struct device_node *node)
CLK_OF_DECLARE_DRIVER(sun50i_h6_rtc_clk, "allwinner,sun50i-h6-rtc",
sun50i_h6_rtc_clk_init);
+/*
+ * The R40 user manual is self-conflicting on whether the prescaler is
+ * fixed or configurable. The clock diagram shows it as fixed, but there
+ * is also a configurable divider in the RTC block.
+ */
+static const struct sun6i_rtc_clk_data sun8i_r40_rtc_data = {
+ .rc_osc_rate = 16000000,
+ .fixed_prescaler = 512,
+};
+static void __init sun8i_r40_rtc_clk_init(struct device_node *node)
+{
+ sun6i_rtc_clk_init(node, &sun8i_r40_rtc_data);
+}
+CLK_OF_DECLARE_DRIVER(sun8i_r40_rtc_clk, "allwinner,sun8i-r40-rtc",
+ sun8i_r40_rtc_clk_init);
+
static const struct sun6i_rtc_clk_data sun8i_v3_rtc_data = {
.rc_osc_rate = 32000,
.has_out_clk = 1,
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index c94184d080f8..a28b9ff82378 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1128,7 +1128,8 @@ static u32 get_fcx_max_data(struct dasd_device *device)
{
struct dasd_eckd_private *private = device->private;
int fcx_in_css, fcx_in_gneq, fcx_in_features;
- int tpm, mdc;
+ unsigned int mdc;
+ int tpm;
if (dasd_nofcx)
return 0;
@@ -1142,7 +1143,7 @@ static u32 get_fcx_max_data(struct dasd_device *device)
return 0;
mdc = ccw_device_get_mdc(device->cdev, 0);
- if (mdc < 0) {
+ if (mdc == 0) {
dev_warn(&device->cdev->dev, "Detecting the maximum supported data size for zHPF requests failed\n");
return 0;
} else {
@@ -1153,12 +1154,12 @@ static u32 get_fcx_max_data(struct dasd_device *device)
static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm)
{
struct dasd_eckd_private *private = device->private;
- int mdc;
+ unsigned int mdc;
u32 fcx_max_data;
if (private->fcx_max_data) {
mdc = ccw_device_get_mdc(device->cdev, lpm);
- if ((mdc < 0)) {
+ if (mdc == 0) {
dev_warn(&device->cdev->dev,
"Detecting the maximum data size for zHPF "
"requests failed (rc=%d) for a new path %x\n",
@@ -2073,7 +2074,7 @@ out_err2:
dasd_free_block(device->block);
device->block = NULL;
out_err1:
- kfree(private->conf_data);
+ dasd_eckd_clear_conf_data(device);
kfree(device->private);
device->private = NULL;
return rc;
@@ -2082,7 +2083,6 @@ out_err1:
static void dasd_eckd_uncheck_device(struct dasd_device *device)
{
struct dasd_eckd_private *private = device->private;
- int i;
if (!private)
return;
@@ -2092,21 +2092,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device)
private->sneq = NULL;
private->vdsneq = NULL;
private->gneq = NULL;
- private->conf_len = 0;
- for (i = 0; i < 8; i++) {
- kfree(device->path[i].conf_data);
- if ((__u8 *)device->path[i].conf_data ==
- private->conf_data) {
- private->conf_data = NULL;
- private->conf_len = 0;
- }
- device->path[i].conf_data = NULL;
- device->path[i].cssid = 0;
- device->path[i].ssid = 0;
- device->path[i].chpid = 0;
- }
- kfree(private->conf_data);
- private->conf_data = NULL;
+ dasd_eckd_clear_conf_data(device);
}
static struct dasd_ccw_req *
diff --git a/drivers/s390/block/dasd_fba.h b/drivers/s390/block/dasd_fba.h
index 8f75df06e893..45ddabec4017 100644
--- a/drivers/s390/block/dasd_fba.h
+++ b/drivers/s390/block/dasd_fba.h
@@ -2,7 +2,7 @@
/*
* Author(s)......: Holger Smolinski <[email protected]>
* Bugreports.to..: <[email protected]>
- * Coypright IBM Corp. 1999, 2000
+ * Copyright IBM Corp. 1999, 2000
*
*/
diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c
index 1770b99f607e..8d4d69ea5baf 100644
--- a/drivers/s390/block/dasd_proc.c
+++ b/drivers/s390/block/dasd_proc.c
@@ -5,7 +5,7 @@
* Carsten Otte <[email protected]>
* Martin Schwidefsky <[email protected]>
* Bugreports.to..: <[email protected]>
- * Coypright IBM Corp. 1999, 2002
+ * Copyright IBM Corp. 1999, 2002
*
* /proc interface for the dasd driver.
*
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 65841af15748..ccecf6b9504e 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -635,7 +635,7 @@ EXPORT_SYMBOL(ccw_device_tm_start_timeout);
* @mask: mask of paths to use
*
* Return the number of 64K-bytes blocks all paths at least support
- * for a transport command. Return values <= 0 indicate failures.
+ * for a transport command. Return value 0 indicates failure.
*/
int ccw_device_get_mdc(struct ccw_device *cdev, u8 mask)
{
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 871d44746f5c..7c37e94fb28b 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -125,12 +125,6 @@ struct qeth_routing_info {
enum qeth_routing_types type;
};
-/* IPA stuff */
-struct qeth_ipa_info {
- __u32 supported_funcs;
- __u32 enabled_funcs;
-};
-
/* SETBRIDGEPORT stuff */
enum qeth_sbp_roles {
QETH_SBP_ROLE_NONE = 0,
@@ -169,41 +163,6 @@ struct qeth_vnicc_info {
bool rx_bcast_enabled;
};
-static inline int qeth_is_adp_supported(struct qeth_ipa_info *ipa,
- enum qeth_ipa_setadp_cmd func)
-{
- return (ipa->supported_funcs & func);
-}
-
-static inline int qeth_is_ipa_supported(struct qeth_ipa_info *ipa,
- enum qeth_ipa_funcs func)
-{
- return (ipa->supported_funcs & func);
-}
-
-static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa,
- enum qeth_ipa_funcs func)
-{
- return (ipa->supported_funcs & ipa->enabled_funcs & func);
-}
-
-#define qeth_adp_supported(c, f) \
- qeth_is_adp_supported(&c->options.adp, f)
-#define qeth_is_supported(c, f) \
- qeth_is_ipa_supported(&c->options.ipa4, f)
-#define qeth_is_enabled(c, f) \
- qeth_is_ipa_enabled(&c->options.ipa4, f)
-#define qeth_is_supported6(c, f) \
- qeth_is_ipa_supported(&c->options.ipa6, f)
-#define qeth_is_enabled6(c, f) \
- qeth_is_ipa_enabled(&c->options.ipa6, f)
-#define qeth_is_ipafunc_supported(c, prot, f) \
- ((prot == QETH_PROT_IPV6) ? \
- qeth_is_supported6(c, f) : qeth_is_supported(c, f))
-#define qeth_is_ipafunc_enabled(c, prot, f) \
- ((prot == QETH_PROT_IPV6) ? \
- qeth_is_enabled6(c, f) : qeth_is_enabled(c, f))
-
#define QETH_IDX_FUNC_LEVEL_OSD 0x0101
#define QETH_IDX_FUNC_LEVEL_IQD 0x4108
@@ -262,7 +221,6 @@ static inline int qeth_is_ipa_enabled(struct qeth_ipa_info *ipa,
/* large receive scatter gather copy break */
#define QETH_RX_SG_CB (PAGE_SIZE >> 1)
-#define QETH_RX_PULL_LEN 256
struct qeth_hdr_layer3 {
__u8 id;
@@ -735,11 +693,11 @@ enum qeth_discipline_id {
};
struct qeth_card_options {
+ struct qeth_ipa_caps ipa4;
+ struct qeth_ipa_caps ipa6;
struct qeth_routing_info route4;
- struct qeth_ipa_info ipa4;
- struct qeth_ipa_info adp; /*Adapter parameters*/
struct qeth_routing_info route6;
- struct qeth_ipa_info ipa6;
+ struct qeth_ipa_caps adp; /* Adapter parameters */
struct qeth_sbp_info sbp; /* SETBRIDGEPORT options */
struct qeth_vnicc_info vnicc; /* VNICC options */
int fake_broadcast;
@@ -769,7 +727,6 @@ struct qeth_osn_info {
struct qeth_discipline {
const struct device_type *devtype;
- int (*process_rx_buffer)(struct qeth_card *card, int budget, int *done);
int (*recover)(void *ptr);
int (*setup) (struct ccwgroup_device *);
void (*remove) (struct ccwgroup_device *);
@@ -862,6 +819,13 @@ static inline bool qeth_card_hw_is_reachable(struct qeth_card *card)
return card->state == CARD_STATE_SOFTSETUP;
}
+static inline void qeth_unlock_channel(struct qeth_card *card,
+ struct qeth_channel *channel)
+{
+ atomic_set(&channel->irq_pending, 0);
+ wake_up(&card->wait_q);
+}
+
struct qeth_trap_id {
__u16 lparnr;
char vmname[8];
@@ -957,18 +921,6 @@ static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, int ipv)
return dst;
}
-static inline void qeth_rx_csum(struct qeth_card *card, struct sk_buff *skb,
- u8 flags)
-{
- if ((card->dev->features & NETIF_F_RXCSUM) &&
- (flags & QETH_HDR_EXT_CSUM_TRANSP_REQ)) {
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- QETH_CARD_STAT_INC(card, rx_skb_csum);
- } else {
- skb->ip_summed = CHECKSUM_NONE;
- }
-}
-
static inline void qeth_tx_csum(struct sk_buff *skb, u8 *flags, int ipv)
{
*flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ;
@@ -1065,9 +1017,6 @@ struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason);
void qeth_put_cmd(struct qeth_cmd_buffer *iob);
-struct sk_buff *qeth_core_get_next_skb(struct qeth_card *,
- struct qeth_qdio_buffer *, struct qdio_buffer_element **, int *,
- struct qeth_hdr **);
void qeth_schedule_recovery(struct qeth_card *);
int qeth_poll(struct napi_struct *napi, int budget);
void qeth_clear_ipacmd_list(struct qeth_card *);
@@ -1076,7 +1025,7 @@ void qeth_clear_working_pool_list(struct qeth_card *);
void qeth_drain_output_queues(struct qeth_card *card);
void qeth_setadp_promisc_mode(struct qeth_card *card, bool enable);
int qeth_setadpparms_change_macaddr(struct qeth_card *);
-void qeth_tx_timeout(struct net_device *);
+void qeth_tx_timeout(struct net_device *, unsigned int txqueue);
void qeth_prepare_ipa_cmd(struct qeth_card *card, struct qeth_cmd_buffer *iob,
u16 cmd_length);
int qeth_query_switch_attributes(struct qeth_card *card,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index b9a2349e4b90..c19cc3978e1f 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -520,11 +520,10 @@ static int __qeth_issue_next_read(struct qeth_card *card)
} else {
QETH_DBF_MESSAGE(2, "error %i on device %x when starting next read ccw!\n",
rc, CARD_DEVID(card));
- atomic_set(&channel->irq_pending, 0);
+ qeth_unlock_channel(card, channel);
qeth_put_cmd(iob);
card->read_or_write_problem = 1;
qeth_schedule_recovery(card);
- wake_up(&card->wait_q);
}
return rc;
}
@@ -655,17 +654,17 @@ static int qeth_check_idx_response(struct qeth_card *card,
unsigned char *buffer)
{
QETH_DBF_HEX(CTRL, 2, buffer, QETH_DBF_CTRL_LEN);
- if ((buffer[2] & 0xc0) == 0xc0) {
+ if ((buffer[2] & QETH_IDX_TERMINATE_MASK) == QETH_IDX_TERMINATE) {
QETH_DBF_MESSAGE(2, "received an IDX TERMINATE with cause code %#04x\n",
buffer[4]);
QETH_CARD_TEXT(card, 2, "ckidxres");
QETH_CARD_TEXT(card, 2, " idxterm");
- QETH_CARD_TEXT_(card, 2, " rc%d", -EIO);
- if (buffer[4] == 0xf6) {
+ QETH_CARD_TEXT_(card, 2, "rc%x", buffer[4]);
+ if (buffer[4] == QETH_IDX_TERM_BAD_TRANSPORT ||
+ buffer[4] == QETH_IDX_TERM_BAD_TRANSPORT_VM) {
dev_err(&card->gdev->dev,
- "The qeth device is not configured "
- "for the OSI layer required by z/VM\n");
- return -EPERM;
+ "The device does not support the configured transport mode\n");
+ return -EPROTONOSUPPORT;
}
return -EIO;
}
@@ -742,10 +741,10 @@ static void qeth_issue_next_read_cb(struct qeth_card *card,
case 0:
break;
case -EIO:
- qeth_clear_ipacmd_list(card);
qeth_schedule_recovery(card);
/* fall through */
default:
+ qeth_clear_ipacmd_list(card);
goto out;
}
@@ -972,8 +971,6 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
/* while we hold the ccwdev lock, this stays valid: */
gdev = dev_get_drvdata(&cdev->dev);
card = dev_get_drvdata(&gdev->dev);
- if (!card)
- return;
QETH_CARD_TEXT(card, 5, "irq");
@@ -1003,24 +1000,25 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
}
channel->active_cmd = NULL;
+ qeth_unlock_channel(card, channel);
rc = qeth_check_irb_error(card, cdev, irb);
if (rc) {
/* IO was terminated, free its resources. */
if (iob)
qeth_cancel_cmd(iob, rc);
- atomic_set(&channel->irq_pending, 0);
- wake_up(&card->wait_q);
return;
}
- atomic_set(&channel->irq_pending, 0);
-
- if (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC))
+ if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC) {
channel->state = CH_STATE_STOPPED;
+ wake_up(&card->wait_q);
+ }
- if (irb->scsw.cmd.fctl & (SCSW_FCTL_HALT_FUNC))
+ if (irb->scsw.cmd.fctl & SCSW_FCTL_HALT_FUNC) {
channel->state = CH_STATE_HALTED;
+ wake_up(&card->wait_q);
+ }
if (iob && (irb->scsw.cmd.fctl & (SCSW_FCTL_CLEAR_FUNC |
SCSW_FCTL_HALT_FUNC))) {
@@ -1054,7 +1052,7 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
qeth_cancel_cmd(iob, rc);
qeth_clear_ipacmd_list(card);
qeth_schedule_recovery(card);
- goto out;
+ return;
}
}
@@ -1062,16 +1060,12 @@ static void qeth_irq(struct ccw_device *cdev, unsigned long intparm,
/* sanity check: */
if (irb->scsw.cmd.count > iob->length) {
qeth_cancel_cmd(iob, -EIO);
- goto out;
+ return;
}
if (iob->callback)
iob->callback(card, iob,
iob->length - irb->scsw.cmd.count);
}
-
-out:
- wake_up(&card->wait_q);
- return;
}
static void qeth_notify_skbs(struct qeth_qdio_out_q *q,
@@ -1198,31 +1192,6 @@ static void qeth_free_buffer_pool(struct qeth_card *card)
}
}
-static void qeth_clean_channel(struct qeth_channel *channel)
-{
- struct ccw_device *cdev = channel->ccwdev;
-
- QETH_DBF_TEXT(SETUP, 2, "freech");
-
- spin_lock_irq(get_ccwdev_lock(cdev));
- cdev->handler = NULL;
- spin_unlock_irq(get_ccwdev_lock(cdev));
-}
-
-static void qeth_setup_channel(struct qeth_channel *channel)
-{
- struct ccw_device *cdev = channel->ccwdev;
-
- QETH_DBF_TEXT(SETUP, 2, "setupch");
-
- channel->state = CH_STATE_DOWN;
- atomic_set(&channel->irq_pending, 0);
-
- spin_lock_irq(get_ccwdev_lock(cdev));
- cdev->handler = qeth_irq;
- spin_unlock_irq(get_ccwdev_lock(cdev));
-}
-
static int qeth_osa_set_output_queues(struct qeth_card *card, bool single)
{
unsigned int count = single ? 1 : card->dev->num_tx_queues;
@@ -1395,9 +1364,6 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev)
if (!card->read_cmd)
goto out_read_cmd;
- qeth_setup_channel(&card->read);
- qeth_setup_channel(&card->write);
- qeth_setup_channel(&card->data);
card->qeth_service_level.seq_print = qeth_core_sl_print;
register_service_level(&card->qeth_service_level);
return card;
@@ -1467,12 +1433,38 @@ int qeth_stop_channel(struct qeth_channel *channel)
channel->active_cmd);
channel->active_cmd = NULL;
}
+ cdev->handler = NULL;
spin_unlock_irq(get_ccwdev_lock(cdev));
return rc;
}
EXPORT_SYMBOL_GPL(qeth_stop_channel);
+static int qeth_start_channel(struct qeth_channel *channel)
+{
+ struct ccw_device *cdev = channel->ccwdev;
+ int rc;
+
+ channel->state = CH_STATE_DOWN;
+ atomic_set(&channel->irq_pending, 0);
+
+ spin_lock_irq(get_ccwdev_lock(cdev));
+ cdev->handler = qeth_irq;
+ spin_unlock_irq(get_ccwdev_lock(cdev));
+
+ rc = ccw_device_set_online(cdev);
+ if (rc)
+ goto err;
+
+ return 0;
+
+err:
+ spin_lock_irq(get_ccwdev_lock(cdev));
+ cdev->handler = NULL;
+ spin_unlock_irq(get_ccwdev_lock(cdev));
+ return rc;
+}
+
static int qeth_halt_channels(struct qeth_card *card)
{
int rc1 = 0, rc2 = 0, rc3 = 0;
@@ -1784,8 +1776,7 @@ static int qeth_send_control_data(struct qeth_card *card,
QETH_CARD_TEXT_(card, 2, " err%d", rc);
qeth_dequeue_cmd(card, iob);
qeth_put_cmd(iob);
- atomic_set(&channel->irq_pending, 0);
- wake_up(&card->wait_q);
+ qeth_unlock_channel(card, channel);
goto out;
}
@@ -2482,50 +2473,46 @@ static int qeth_mpc_initialize(struct qeth_card *card)
rc = qeth_cm_enable(card);
if (rc) {
QETH_CARD_TEXT_(card, 2, "2err%d", rc);
- goto out_qdio;
+ return rc;
}
rc = qeth_cm_setup(card);
if (rc) {
QETH_CARD_TEXT_(card, 2, "3err%d", rc);
- goto out_qdio;
+ return rc;
}
rc = qeth_ulp_enable(card);
if (rc) {
QETH_CARD_TEXT_(card, 2, "4err%d", rc);
- goto out_qdio;
+ return rc;
}
rc = qeth_ulp_setup(card);
if (rc) {
QETH_CARD_TEXT_(card, 2, "5err%d", rc);
- goto out_qdio;
+ return rc;
}
rc = qeth_alloc_qdio_queues(card);
if (rc) {
QETH_CARD_TEXT_(card, 2, "5err%d", rc);
- goto out_qdio;
+ return rc;
}
rc = qeth_qdio_establish(card);
if (rc) {
QETH_CARD_TEXT_(card, 2, "6err%d", rc);
qeth_free_qdio_queues(card);
- goto out_qdio;
+ return rc;
}
rc = qeth_qdio_activate(card);
if (rc) {
QETH_CARD_TEXT_(card, 2, "7err%d", rc);
- goto out_qdio;
+ return rc;
}
rc = qeth_dm_act(card);
if (rc) {
QETH_CARD_TEXT_(card, 2, "8err%d", rc);
- goto out_qdio;
+ return rc;
}
return 0;
-out_qdio:
- qeth_qdio_clear_card(card, !IS_IQD(card));
- qdio_free(CARD_DDEV(card));
- return rc;
}
void qeth_print_status_message(struct qeth_card *card)
@@ -2636,7 +2623,8 @@ static int qeth_init_input_buffer(struct qeth_card *card,
if ((card->options.cq == QETH_CQ_ENABLED) && (!buf->rx_skb)) {
buf->rx_skb = netdev_alloc_skb(card->dev,
- QETH_RX_PULL_LEN + ETH_HLEN);
+ ETH_HLEN +
+ sizeof(struct ipv6hdr));
if (!buf->rx_skb)
return 1;
}
@@ -2871,7 +2859,7 @@ static int qeth_query_setadapterparms_cb(struct qeth_card *card,
cmd->data.setadapterparms.data.query_cmds_supp.lan_type;
QETH_CARD_TEXT_(card, 2, "lnk %d", card->info.link_type);
}
- card->options.adp.supported_funcs =
+ card->options.adp.supported =
cmd->data.setadapterparms.data.query_cmds_supp.supported_cmds;
return 0;
}
@@ -2927,8 +2915,8 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
case IPA_RC_NOTSUPP:
case IPA_RC_L2_UNSUPPORTED_CMD:
QETH_CARD_TEXT(card, 2, "ipaunsup");
- card->options.ipa4.supported_funcs |= IPA_SETADAPTERPARMS;
- card->options.ipa6.supported_funcs |= IPA_SETADAPTERPARMS;
+ card->options.ipa4.supported |= IPA_SETADAPTERPARMS;
+ card->options.ipa6.supported |= IPA_SETADAPTERPARMS;
return -EOPNOTSUPP;
default:
QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Unhandled rc=%#x\n",
@@ -2936,13 +2924,11 @@ static int qeth_query_ipassists_cb(struct qeth_card *card,
return -EIO;
}
- if (cmd->hdr.prot_version == QETH_PROT_IPV4) {
- card->options.ipa4.supported_funcs = cmd->hdr.ipa_supported;
- card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled;
- } else if (cmd->hdr.prot_version == QETH_PROT_IPV6) {
- card->options.ipa6.supported_funcs = cmd->hdr.ipa_supported;
- card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled;
- } else
+ if (cmd->hdr.prot_version == QETH_PROT_IPV4)
+ card->options.ipa4 = cmd->hdr.assists;
+ else if (cmd->hdr.prot_version == QETH_PROT_IPV6)
+ card->options.ipa6 = cmd->hdr.assists;
+ else
QETH_DBF_MESSAGE(1, "IPA_CMD_QIPASSIST on device %x: Flawed LIC detected\n",
CARD_DEVID(card));
return 0;
@@ -3413,7 +3399,7 @@ static void qeth_qdio_start_poll(struct ccw_device *ccwdev, int queue,
struct qeth_card *card = (struct qeth_card *)card_ptr;
if (card->dev->flags & IFF_UP)
- napi_schedule(&card->napi);
+ napi_schedule_irqoff(&card->napi);
}
int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq)
@@ -3429,11 +3415,6 @@ int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq)
goto out;
}
- if (card->state != CARD_STATE_DOWN) {
- rc = -1;
- goto out;
- }
-
qeth_free_qdio_queues(card);
card->options.cq = cq;
rc = 0;
@@ -4325,7 +4306,7 @@ int qeth_set_access_ctrl_online(struct qeth_card *card, int fallback)
return rc;
}
-void qeth_tx_timeout(struct net_device *dev)
+void qeth_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct qeth_card *card;
@@ -4706,7 +4687,7 @@ static void qeth_determine_capabilities(struct qeth_card *card)
QETH_CARD_TEXT(card, 2, "detcapab");
if (!ddev->online) {
ddev_offline = 1;
- rc = ccw_device_set_online(ddev);
+ rc = qeth_start_channel(channel);
if (rc) {
QETH_CARD_TEXT_(card, 2, "3err%d", rc);
goto out;
@@ -4779,7 +4760,7 @@ static int qeth_qdio_establish(struct qeth_card *card)
QETH_CARD_TEXT(card, 2, "qdioest");
- qib_param_field = kzalloc(FIELD_SIZEOF(struct qib, parm), GFP_KERNEL);
+ qib_param_field = kzalloc(sizeof_field(struct qib, parm), GFP_KERNEL);
if (!qib_param_field) {
rc = -ENOMEM;
goto out_free_nothing;
@@ -4881,9 +4862,6 @@ out_free_nothing:
static void qeth_core_free_card(struct qeth_card *card)
{
QETH_CARD_TEXT(card, 2, "freecrd");
- qeth_clean_channel(&card->read);
- qeth_clean_channel(&card->write);
- qeth_clean_channel(&card->data);
qeth_put_cmd(card->read_cmd);
destroy_workqueue(card->event_wq);
unregister_service_level(&card->qeth_service_level);
@@ -4946,13 +4924,14 @@ retry:
qeth_stop_channel(&card->write);
qeth_stop_channel(&card->read);
qdio_free(CARD_DDEV(card));
- rc = ccw_device_set_online(CARD_RDEV(card));
+
+ rc = qeth_start_channel(&card->read);
if (rc)
goto retriable;
- rc = ccw_device_set_online(CARD_WDEV(card));
+ rc = qeth_start_channel(&card->write);
if (rc)
goto retriable;
- rc = ccw_device_set_online(CARD_DDEV(card));
+ rc = qeth_start_channel(&card->data);
if (rc)
goto retriable;
retriable:
@@ -5013,9 +4992,9 @@ retriable:
*carrier_ok = true;
}
- card->options.ipa4.supported_funcs = 0;
- card->options.ipa6.supported_funcs = 0;
- card->options.adp.supported_funcs = 0;
+ card->options.ipa4.supported = 0;
+ card->options.ipa6.supported = 0;
+ card->options.adp.supported = 0;
card->options.sbp.supported_funcs = 0;
card->info.diagass_support = 0;
rc = qeth_query_ipassists(card, QETH_PROT_IPV4);
@@ -5035,10 +5014,8 @@ retriable:
}
if (qeth_adp_supported(card, IPA_SETADP_SET_DIAG_ASSIST)) {
rc = qeth_query_setdiagass(card);
- if (rc < 0) {
+ if (rc)
QETH_CARD_TEXT_(card, 2, "8err%d", rc);
- goto out;
- }
}
if (!qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP) ||
@@ -5059,6 +5036,121 @@ out:
}
EXPORT_SYMBOL_GPL(qeth_core_hardsetup_card);
+#if IS_ENABLED(CONFIG_QETH_L3)
+static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
+ struct qeth_hdr *hdr)
+{
+ struct af_iucv_trans_hdr *iucv = (struct af_iucv_trans_hdr *) skb->data;
+ struct qeth_hdr_layer3 *l3_hdr = &hdr->hdr.l3;
+ struct net_device *dev = skb->dev;
+
+ if (IS_IQD(card) && iucv->magic == ETH_P_AF_IUCV) {
+ dev_hard_header(skb, dev, ETH_P_AF_IUCV, dev->dev_addr,
+ "FAKELL", skb->len);
+ return;
+ }
+
+ if (!(l3_hdr->flags & QETH_HDR_PASSTHRU)) {
+ u16 prot = (l3_hdr->flags & QETH_HDR_IPV6) ? ETH_P_IPV6 :
+ ETH_P_IP;
+ unsigned char tg_addr[ETH_ALEN];
+
+ skb_reset_network_header(skb);
+ switch (l3_hdr->flags & QETH_HDR_CAST_MASK) {
+ case QETH_CAST_MULTICAST:
+ if (prot == ETH_P_IP)
+ ip_eth_mc_map(ip_hdr(skb)->daddr, tg_addr);
+ else
+ ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
+ QETH_CARD_STAT_INC(card, rx_multicast);
+ break;
+ case QETH_CAST_BROADCAST:
+ ether_addr_copy(tg_addr, dev->broadcast);
+ QETH_CARD_STAT_INC(card, rx_multicast);
+ break;
+ default:
+ if (card->options.sniffer)
+ skb->pkt_type = PACKET_OTHERHOST;
+ ether_addr_copy(tg_addr, dev->dev_addr);
+ }
+
+ if (l3_hdr->ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
+ dev_hard_header(skb, dev, prot, tg_addr,
+ &l3_hdr->next_hop.rx.src_mac, skb->len);
+ else
+ dev_hard_header(skb, dev, prot, tg_addr, "FAKELL",
+ skb->len);
+ }
+
+ /* copy VLAN tag from hdr into skb */
+ if (!card->options.sniffer &&
+ (l3_hdr->ext_flags & (QETH_HDR_EXT_VLAN_FRAME |
+ QETH_HDR_EXT_INCLUDE_VLAN_TAG))) {
+ u16 tag = (l3_hdr->ext_flags & QETH_HDR_EXT_VLAN_FRAME) ?
+ l3_hdr->vlan_id :
+ l3_hdr->next_hop.rx.vlan_id;
+
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag);
+ }
+}
+#endif
+
+static void qeth_receive_skb(struct qeth_card *card, struct sk_buff *skb,
+ struct qeth_hdr *hdr, bool uses_frags)
+{
+ struct napi_struct *napi = &card->napi;
+ bool is_cso;
+
+ switch (hdr->hdr.l2.id) {
+ case QETH_HEADER_TYPE_OSN:
+ skb_push(skb, sizeof(*hdr));
+ skb_copy_to_linear_data(skb, hdr, sizeof(*hdr));
+ QETH_CARD_STAT_ADD(card, rx_bytes, skb->len);
+ QETH_CARD_STAT_INC(card, rx_packets);
+
+ card->osn_info.data_cb(skb);
+ return;
+#if IS_ENABLED(CONFIG_QETH_L3)
+ case QETH_HEADER_TYPE_LAYER3:
+ qeth_l3_rebuild_skb(card, skb, hdr);
+ is_cso = hdr->hdr.l3.ext_flags & QETH_HDR_EXT_CSUM_TRANSP_REQ;
+ break;
+#endif
+ case QETH_HEADER_TYPE_LAYER2:
+ is_cso = hdr->hdr.l2.flags[1] & QETH_HDR_EXT_CSUM_TRANSP_REQ;
+ break;
+ default:
+ /* never happens */
+ if (uses_frags)
+ napi_free_frags(napi);
+ else
+ dev_kfree_skb_any(skb);
+ return;
+ }
+
+ if (is_cso && (card->dev->features & NETIF_F_RXCSUM)) {
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ QETH_CARD_STAT_INC(card, rx_skb_csum);
+ } else {
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+
+ QETH_CARD_STAT_ADD(card, rx_bytes, skb->len);
+ QETH_CARD_STAT_INC(card, rx_packets);
+ if (skb_is_nonlinear(skb)) {
+ QETH_CARD_STAT_INC(card, rx_sg_skbs);
+ QETH_CARD_STAT_ADD(card, rx_sg_frags,
+ skb_shinfo(skb)->nr_frags);
+ }
+
+ if (uses_frags) {
+ napi_gro_frags(napi);
+ } else {
+ skb->protocol = eth_type_trans(skb, skb->dev);
+ napi_gro_receive(napi, skb);
+ }
+}
+
static void qeth_create_skb_frag(struct sk_buff *skb, char *data, int data_len)
{
struct page *page = virt_to_page(data);
@@ -5075,17 +5167,20 @@ static inline int qeth_is_last_sbale(struct qdio_buffer_element *sbale)
return (sbale->eflags & SBAL_EFLAGS_LAST_ENTRY);
}
-struct sk_buff *qeth_core_get_next_skb(struct qeth_card *card,
- struct qeth_qdio_buffer *qethbuffer,
- struct qdio_buffer_element **__element, int *__offset,
- struct qeth_hdr **hdr)
+static int qeth_extract_skb(struct qeth_card *card,
+ struct qeth_qdio_buffer *qethbuffer,
+ struct qdio_buffer_element **__element,
+ int *__offset)
{
struct qdio_buffer_element *element = *__element;
struct qdio_buffer *buffer = qethbuffer->buffer;
+ struct napi_struct *napi = &card->napi;
unsigned int linear_len = 0;
+ bool uses_frags = false;
int offset = *__offset;
bool use_rx_sg = false;
unsigned int headroom;
+ struct qeth_hdr *hdr;
struct sk_buff *skb;
int skb_len = 0;
@@ -5093,42 +5188,42 @@ next_packet:
/* qeth_hdr must not cross element boundaries */
while (element->length < offset + sizeof(struct qeth_hdr)) {
if (qeth_is_last_sbale(element))
- return NULL;
+ return -ENODATA;
element++;
offset = 0;
}
- *hdr = element->addr + offset;
- offset += sizeof(struct qeth_hdr);
+ hdr = element->addr + offset;
+ offset += sizeof(*hdr);
skb = NULL;
- switch ((*hdr)->hdr.l2.id) {
+ switch (hdr->hdr.l2.id) {
case QETH_HEADER_TYPE_LAYER2:
- skb_len = (*hdr)->hdr.l2.pkt_length;
+ skb_len = hdr->hdr.l2.pkt_length;
linear_len = ETH_HLEN;
headroom = 0;
break;
case QETH_HEADER_TYPE_LAYER3:
- skb_len = (*hdr)->hdr.l3.length;
+ skb_len = hdr->hdr.l3.length;
if (!IS_LAYER3(card)) {
QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
goto walk_packet;
}
- if ((*hdr)->hdr.l3.flags & QETH_HDR_PASSTHRU) {
+ if (hdr->hdr.l3.flags & QETH_HDR_PASSTHRU) {
linear_len = ETH_HLEN;
headroom = 0;
break;
}
- if ((*hdr)->hdr.l3.flags & QETH_HDR_IPV6)
+ if (hdr->hdr.l3.flags & QETH_HDR_IPV6)
linear_len = sizeof(struct ipv6hdr);
else
linear_len = sizeof(struct iphdr);
headroom = ETH_HLEN;
break;
case QETH_HEADER_TYPE_OSN:
- skb_len = (*hdr)->hdr.osn.pdu_length;
+ skb_len = hdr->hdr.osn.pdu_length;
if (!IS_OSN(card)) {
QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
goto walk_packet;
@@ -5138,13 +5233,13 @@ next_packet:
headroom = sizeof(struct qeth_hdr);
break;
default:
- if ((*hdr)->hdr.l2.id & QETH_HEADER_MASK_INVAL)
+ if (hdr->hdr.l2.id & QETH_HEADER_MASK_INVAL)
QETH_CARD_STAT_INC(card, rx_frame_errors);
else
QETH_CARD_STAT_INC(card, rx_dropped_notsupp);
/* Can't determine packet length, drop the whole buffer. */
- return NULL;
+ return -EPROTONOSUPPORT;
}
if (skb_len < linear_len) {
@@ -5157,21 +5252,43 @@ next_packet:
!atomic_read(&card->force_alloc_skb) &&
!IS_OSN(card));
- if (use_rx_sg && qethbuffer->rx_skb) {
+ if (use_rx_sg) {
/* QETH_CQ_ENABLED only: */
- skb = qethbuffer->rx_skb;
- qethbuffer->rx_skb = NULL;
- } else {
- if (!use_rx_sg)
- linear_len = skb_len;
- skb = napi_alloc_skb(&card->napi, linear_len + headroom);
+ if (qethbuffer->rx_skb &&
+ skb_tailroom(qethbuffer->rx_skb) >= linear_len + headroom) {
+ skb = qethbuffer->rx_skb;
+ qethbuffer->rx_skb = NULL;
+ goto use_skb;
+ }
+
+ skb = napi_get_frags(napi);
+ if (!skb) {
+ /* -ENOMEM, no point in falling back further. */
+ QETH_CARD_STAT_INC(card, rx_dropped_nomem);
+ goto walk_packet;
+ }
+
+ if (skb_tailroom(skb) >= linear_len + headroom) {
+ uses_frags = true;
+ goto use_skb;
+ }
+
+ netdev_info_once(card->dev,
+ "Insufficient linear space in NAPI frags skb, need %u but have %u\n",
+ linear_len + headroom, skb_tailroom(skb));
+ /* Shouldn't happen. Don't optimize, fall back to linear skb. */
}
- if (!skb)
+ linear_len = skb_len;
+ skb = napi_alloc_skb(napi, linear_len + headroom);
+ if (!skb) {
QETH_CARD_STAT_INC(card, rx_dropped_nomem);
- else if (headroom)
- skb_reserve(skb, headroom);
+ goto walk_packet;
+ }
+use_skb:
+ if (headroom)
+ skb_reserve(skb, headroom);
walk_packet:
while (skb_len) {
int data_len = min(skb_len, (int)(element->length - offset));
@@ -5204,11 +5321,14 @@ walk_packet:
QETH_CARD_TEXT(card, 4, "unexeob");
QETH_CARD_HEX(card, 2, buffer, sizeof(void *));
if (skb) {
- dev_kfree_skb_any(skb);
+ if (uses_frags)
+ napi_free_frags(napi);
+ else
+ dev_kfree_skb_any(skb);
QETH_CARD_STAT_INC(card,
rx_length_errors);
}
- return NULL;
+ return -EMSGSIZE;
}
element++;
offset = 0;
@@ -5221,22 +5341,40 @@ walk_packet:
*__element = element;
*__offset = offset;
- if (use_rx_sg) {
- QETH_CARD_STAT_INC(card, rx_sg_skbs);
- QETH_CARD_STAT_ADD(card, rx_sg_frags,
- skb_shinfo(skb)->nr_frags);
+
+ qeth_receive_skb(card, skb, hdr, uses_frags);
+ return 0;
+}
+
+static int qeth_extract_skbs(struct qeth_card *card, int budget,
+ struct qeth_qdio_buffer *buf, bool *done)
+{
+ int work_done = 0;
+
+ WARN_ON_ONCE(!budget);
+ *done = false;
+
+ while (budget) {
+ if (qeth_extract_skb(card, buf, &card->rx.b_element,
+ &card->rx.e_offset)) {
+ *done = true;
+ break;
+ }
+
+ work_done++;
+ budget--;
}
- return skb;
+
+ return work_done;
}
-EXPORT_SYMBOL_GPL(qeth_core_get_next_skb);
int qeth_poll(struct napi_struct *napi, int budget)
{
struct qeth_card *card = container_of(napi, struct qeth_card, napi);
int work_done = 0;
struct qeth_qdio_buffer *buffer;
- int done;
int new_budget = budget;
+ bool done;
while (1) {
if (!card->rx.b_count) {
@@ -5259,11 +5397,10 @@ int qeth_poll(struct napi_struct *napi, int budget)
if (!(card->rx.qdio_err &&
qeth_check_qdio_errors(card, buffer->buffer,
card->rx.qdio_err, "qinerr")))
- work_done +=
- card->discipline->process_rx_buffer(
- card, new_budget, &done);
+ work_done += qeth_extract_skbs(card, new_budget,
+ buffer, &done);
else
- done = 1;
+ done = true;
if (done) {
QETH_CARD_STAT_INC(card, rx_bufs);
@@ -5432,9 +5569,9 @@ int qeth_setassparms_cb(struct qeth_card *card,
cmd->hdr.return_code = cmd->data.setassparms.hdr.return_code;
if (cmd->hdr.prot_version == QETH_PROT_IPV4)
- card->options.ipa4.enabled_funcs = cmd->hdr.ipa_enabled;
+ card->options.ipa4.enabled = cmd->hdr.assists.enabled;
if (cmd->hdr.prot_version == QETH_PROT_IPV6)
- card->options.ipa6.enabled_funcs = cmd->hdr.ipa_enabled;
+ card->options.ipa6.enabled = cmd->hdr.assists.enabled;
return 0;
}
EXPORT_SYMBOL_GPL(qeth_setassparms_cb);
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index 88f4dc140751..3865f7258449 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -53,6 +53,16 @@ static inline bool qeth_ipa_caps_enabled(struct qeth_ipa_caps *caps, u32 mask)
return (caps->enabled & mask) == mask;
}
+#define qeth_adp_supported(c, f) \
+ qeth_ipa_caps_supported(&c->options.adp, f)
+#define qeth_is_supported(c, f) \
+ qeth_ipa_caps_supported(&c->options.ipa4, f)
+#define qeth_is_supported6(c, f) \
+ qeth_ipa_caps_supported(&c->options.ipa6, f)
+#define qeth_is_ipafunc_supported(c, prot, f) \
+ ((prot == QETH_PROT_IPV6) ? qeth_is_supported6(c, f) : \
+ qeth_is_supported(c, f))
+
enum qeth_card_types {
QETH_CARD_TYPE_OSD = 1,
QETH_CARD_TYPE_IQD = 5,
@@ -338,14 +348,14 @@ enum qeth_card_info_port_speed {
/* (SET)DELIP(M) IPA stuff ***************************************************/
struct qeth_ipacmd_setdelip4 {
- __u8 ip_addr[4];
- __u8 mask[4];
+ __be32 addr;
+ __be32 mask;
__u32 flags;
} __attribute__ ((packed));
struct qeth_ipacmd_setdelip6 {
- __u8 ip_addr[16];
- __u8 mask[16];
+ struct in6_addr addr;
+ struct in6_addr prefix;
__u32 flags;
} __attribute__ ((packed));
@@ -421,7 +431,7 @@ struct qeth_ipacmd_setassparms {
} data;
} __attribute__ ((packed));
-#define SETASS_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setassparms,\
+#define SETASS_DATA_SIZEOF(field) sizeof_field(struct qeth_ipacmd_setassparms,\
data.field)
/* SETRTG IPA Command: ****************************************************/
@@ -535,7 +545,7 @@ struct qeth_ipacmd_setadpparms {
} data;
} __attribute__ ((packed));
-#define SETADP_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setadpparms,\
+#define SETADP_DATA_SIZEOF(field) sizeof_field(struct qeth_ipacmd_setadpparms,\
data.field)
/* CREATE_ADDR IPA Command: ***********************************************/
@@ -648,7 +658,7 @@ struct qeth_ipacmd_vnicc {
} data;
};
-#define VNICC_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_vnicc,\
+#define VNICC_DATA_SIZEOF(field) sizeof_field(struct qeth_ipacmd_vnicc,\
data.field)
/* SETBRIDGEPORT IPA Command: *********************************************/
@@ -729,7 +739,7 @@ struct qeth_ipacmd_setbridgeport {
} data;
} __packed;
-#define SBP_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipacmd_setbridgeport,\
+#define SBP_DATA_SIZEOF(field) sizeof_field(struct qeth_ipacmd_setbridgeport,\
data.field)
/* ADDRESS_CHANGE_NOTIFICATION adapter-initiated "command" *******************/
@@ -766,8 +776,7 @@ struct qeth_ipacmd_hdr {
__u8 prim_version_no;
__u8 param_count;
__u16 prot_version;
- __u32 ipa_supported;
- __u32 ipa_enabled;
+ struct qeth_ipa_caps assists;
} __attribute__ ((packed));
/* The IPA command itself */
@@ -790,7 +799,7 @@ struct qeth_ipa_cmd {
} data;
} __attribute__ ((packed));
-#define IPA_DATA_SIZEOF(field) FIELD_SIZEOF(struct qeth_ipa_cmd, data.field)
+#define IPA_DATA_SIZEOF(field) sizeof_field(struct qeth_ipa_cmd, data.field)
/*
* special command for ARP processing.
@@ -899,6 +908,11 @@ extern unsigned char IDX_ACTIVATE_WRITE[];
#define QETH_IDX_ACT_ERR_AUTH 0x1E
#define QETH_IDX_ACT_ERR_AUTH_USER 0x20
+#define QETH_IDX_TERMINATE 0xc0
+#define QETH_IDX_TERMINATE_MASK 0xc0
+#define QETH_IDX_TERM_BAD_TRANSPORT 0x41
+#define QETH_IDX_TERM_BAD_TRANSPORT_VM 0xf6
+
#define PDU_ENCAPSULATION(buffer) \
(buffer + *(buffer + (*(buffer + 0x0b)) + \
*(buffer + *(buffer + 0x0b) + 0x11) + 0x07))
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index e81170ab6d9a..7bd86027f559 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -207,7 +207,7 @@ static ssize_t qeth_dev_prioqing_store(struct device *dev,
card->qdio.default_out_queue = QETH_DEFAULT_QUEUE;
} else if (sysfs_streq(buf, "prio_queueing_vlan")) {
if (IS_LAYER3(card)) {
- rc = -ENOTSUPP;
+ rc = -EOPNOTSUPP;
goto out;
}
card->qdio.do_prio_queueing = QETH_PRIO_Q_ING_VLAN;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 9086bc04fa6b..7175b5d8a23c 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -287,53 +287,15 @@ static void qeth_l2_stop_card(struct qeth_card *card)
card->state = CARD_STATE_HARDSETUP;
}
if (card->state == CARD_STATE_HARDSETUP) {
- qeth_qdio_clear_card(card, 0);
qeth_drain_output_queues(card);
qeth_clear_working_pool_list(card);
card->state = CARD_STATE_DOWN;
}
+ qeth_qdio_clear_card(card, 0);
flush_workqueue(card->event_wq);
card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED;
-}
-
-static int qeth_l2_process_inbound_buffer(struct qeth_card *card,
- int budget, int *done)
-{
- int work_done = 0;
- struct sk_buff *skb;
- struct qeth_hdr *hdr;
- unsigned int len;
-
- *done = 0;
- WARN_ON_ONCE(!budget);
- while (budget) {
- skb = qeth_core_get_next_skb(card,
- &card->qdio.in_q->bufs[card->rx.b_index],
- &card->rx.b_element, &card->rx.e_offset, &hdr);
- if (!skb) {
- *done = 1;
- break;
- }
-
- if (hdr->hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
- skb->protocol = eth_type_trans(skb, skb->dev);
- qeth_rx_csum(card, skb, hdr->hdr.l2.flags[1]);
- len = skb->len;
- napi_gro_receive(&card->napi, skb);
- } else {
- skb_push(skb, sizeof(*hdr));
- skb_copy_to_linear_data(skb, hdr, sizeof(*hdr));
- len = skb->len;
- card->osn_info.data_cb(skb);
- }
-
- work_done++;
- budget--;
- QETH_CARD_STAT_INC(card, rx_packets);
- QETH_CARD_STAT_ADD(card, rx_bytes, len);
- }
- return work_done;
+ card->info.promisc_mode = 0;
}
static int qeth_l2_request_initial_mac(struct qeth_card *card)
@@ -960,7 +922,6 @@ static int qeth_l2_control_event(struct qeth_card *card,
struct qeth_discipline qeth_l2_discipline = {
.devtype = &qeth_l2_devtype,
- .process_rx_buffer = qeth_l2_process_inbound_buffer,
.recover = qeth_l2_recover,
.setup = qeth_l2_probe_device,
.remove = qeth_l2_remove_device,
@@ -1951,8 +1912,7 @@ int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout)
/* check if VNICC is currently enabled */
bool qeth_l2_vnicc_is_in_use(struct qeth_card *card)
{
- /* if everything is turned off, VNICC is not active */
- if (!card->options.vnicc.cur_chars)
+ if (!card->options.vnicc.sup_chars)
return false;
/* default values are only OK if rx_bcast was not enabled by user
* or the card is offline.
@@ -2039,8 +1999,9 @@ static void qeth_l2_vnicc_init(struct qeth_card *card)
/* enforce assumed default values and recover settings, if changed */
error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING,
timeout);
- chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT;
- chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE;
+ /* Change chars, if necessary */
+ chars_tmp = card->options.vnicc.wanted_chars ^
+ card->options.vnicc.cur_chars;
chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE;
for_each_set_bit(i, &chars_tmp, chars_len) {
vnicc = BIT(i);
diff --git a/drivers/s390/net/qeth_l2_sys.c b/drivers/s390/net/qeth_l2_sys.c
index f70c7aac2dcc..7fa325cf6f8d 100644
--- a/drivers/s390/net/qeth_l2_sys.c
+++ b/drivers/s390/net/qeth_l2_sys.c
@@ -262,7 +262,8 @@ void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
return;
mutex_lock(&card->sbp_lock);
- if (card->options.sbp.role != QETH_SBP_ROLE_NONE) {
+ if (!card->options.sbp.reflect_promisc &&
+ card->options.sbp.role != QETH_SBP_ROLE_NONE) {
/* Conditional to avoid spurious error messages */
qeth_bridgeport_setrole(card, card->options.sbp.role);
/* Let the callback function refresh the stored role value. */
diff --git a/drivers/s390/net/qeth_l3.h b/drivers/s390/net/qeth_l3.h
index 5db04fe472c0..6ccfe2121095 100644
--- a/drivers/s390/net/qeth_l3.h
+++ b/drivers/s390/net/qeth_l3.h
@@ -23,7 +23,6 @@ struct qeth_ipaddr {
struct hlist_node hnode;
enum qeth_ip_types type;
u8 is_multicast:1;
- u8 in_progress:1;
u8 disp_flag:2;
u8 ipato:1; /* ucast only */
@@ -35,7 +34,7 @@ struct qeth_ipaddr {
union {
struct {
__be32 addr;
- unsigned int mask;
+ __be32 mask;
} a4;
struct {
struct in6_addr addr;
@@ -102,7 +101,8 @@ struct qeth_ipato_entry {
extern const struct attribute_group *qeth_l3_attr_groups[];
-void qeth_l3_ipaddr_to_string(enum qeth_prot_versions, const __u8 *, char *);
+int qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const u8 *addr,
+ char *buf);
int qeth_l3_create_device_attributes(struct device *);
void qeth_l3_remove_device_attributes(struct device *);
int qeth_l3_setrouting_v4(struct qeth_card *);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 27126330a4b0..6c0bffd11138 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -44,23 +44,13 @@ static int qeth_l3_register_addr_entry(struct qeth_card *,
static int qeth_l3_deregister_addr_entry(struct qeth_card *,
struct qeth_ipaddr *);
-static void qeth_l3_ipaddr4_to_string(const __u8 *addr, char *buf)
-{
- sprintf(buf, "%pI4", addr);
-}
-
-static void qeth_l3_ipaddr6_to_string(const __u8 *addr, char *buf)
-{
- sprintf(buf, "%pI6", addr);
-}
-
-void qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const __u8 *addr,
- char *buf)
+int qeth_l3_ipaddr_to_string(enum qeth_prot_versions proto, const u8 *addr,
+ char *buf)
{
if (proto == QETH_PROT_IPV4)
- qeth_l3_ipaddr4_to_string(addr, buf);
- else if (proto == QETH_PROT_IPV6)
- qeth_l3_ipaddr6_to_string(addr, buf);
+ return sprintf(buf, "%pI4", addr);
+ else
+ return sprintf(buf, "%pI6", addr);
}
static struct qeth_ipaddr *qeth_l3_find_addr_by_ip(struct qeth_card *card,
@@ -161,8 +151,6 @@ static int qeth_l3_delete_ip(struct qeth_card *card,
addr->ref_counter--;
if (addr->type == QETH_IP_TYPE_NORMAL && addr->ref_counter > 0)
return rc;
- if (addr->in_progress)
- return -EINPROGRESS;
if (qeth_card_hw_is_reachable(card))
rc = qeth_l3_deregister_addr_entry(card, addr);
@@ -223,29 +211,10 @@ static int qeth_l3_add_ip(struct qeth_card *card, struct qeth_ipaddr *tmp_addr)
return 0;
}
- /* qeth_l3_register_addr_entry can go to sleep
- * if we add a IPV4 addr. It is caused by the reason
- * that SETIP ipa cmd starts ARP staff for IPV4 addr.
- * Thus we should unlock spinlock, and make a protection
- * using in_progress variable to indicate that there is
- * an hardware operation with this IPV4 address
- */
- if (addr->proto == QETH_PROT_IPV4) {
- addr->in_progress = 1;
- mutex_unlock(&card->ip_lock);
- rc = qeth_l3_register_addr_entry(card, addr);
- mutex_lock(&card->ip_lock);
- addr->in_progress = 0;
- } else
- rc = qeth_l3_register_addr_entry(card, addr);
+ rc = qeth_l3_register_addr_entry(card, addr);
if (!rc || rc == -EADDRINUSE || rc == -ENETDOWN) {
addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
- if (addr->ref_counter < 1) {
- qeth_l3_deregister_addr_entry(card, addr);
- hash_del(&addr->hnode);
- kfree(addr);
- }
} else {
hash_del(&addr->hnode);
kfree(addr);
@@ -313,19 +282,10 @@ static void qeth_l3_recover_ip(struct qeth_card *card)
hash_for_each_safe(card->ip_htable, i, tmp, addr, hnode) {
if (addr->disp_flag == QETH_DISP_ADDR_ADD) {
- if (addr->proto == QETH_PROT_IPV4) {
- addr->in_progress = 1;
- mutex_unlock(&card->ip_lock);
- rc = qeth_l3_register_addr_entry(card, addr);
- mutex_lock(&card->ip_lock);
- addr->in_progress = 0;
- } else
- rc = qeth_l3_register_addr_entry(card, addr);
+ rc = qeth_l3_register_addr_entry(card, addr);
if (!rc) {
addr->disp_flag = QETH_DISP_ADDR_DO_NOTHING;
- if (addr->ref_counter < 1)
- qeth_l3_delete_ip(card, addr);
} else {
hash_del(&addr->hnode);
kfree(addr);
@@ -379,17 +339,16 @@ static int qeth_l3_send_setdelmc(struct qeth_card *card,
return qeth_send_ipa_cmd(card, iob, qeth_l3_setdelip_cb, NULL);
}
-static void qeth_l3_fill_netmask(u8 *netmask, unsigned int len)
+static void qeth_l3_set_ipv6_prefix(struct in6_addr *prefix, unsigned int len)
{
- int i, j;
- for (i = 0; i < 16; i++) {
- j = (len) - (i * 8);
- if (j >= 8)
- netmask[i] = 0xff;
- else if (j > 0)
- netmask[i] = (u8)(0xFF00 >> j);
- else
- netmask[i] = 0;
+ unsigned int i = 0;
+
+ while (len && i < 4) {
+ int mask_len = min_t(int, len, 32);
+
+ prefix->s6_addr32[i] = inet_make_mask(mask_len);
+ len -= mask_len;
+ i++;
}
}
@@ -412,7 +371,6 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
{
struct qeth_cmd_buffer *iob;
struct qeth_ipa_cmd *cmd;
- __u8 netmask[16];
u32 flags;
QETH_CARD_TEXT(card, 4, "setdelip");
@@ -427,15 +385,13 @@ static int qeth_l3_send_setdelip(struct qeth_card *card,
QETH_CARD_TEXT_(card, 4, "flags%02X", flags);
if (addr->proto == QETH_PROT_IPV6) {
- memcpy(cmd->data.setdelip6.ip_addr, &addr->u.a6.addr,
- sizeof(struct in6_addr));
- qeth_l3_fill_netmask(netmask, addr->u.a6.pfxlen);
- memcpy(cmd->data.setdelip6.mask, netmask,
- sizeof(struct in6_addr));
+ cmd->data.setdelip6.addr = addr->u.a6.addr;
+ qeth_l3_set_ipv6_prefix(&cmd->data.setdelip6.prefix,
+ addr->u.a6.pfxlen);
cmd->data.setdelip6.flags = flags;
} else {
- memcpy(cmd->data.setdelip4.ip_addr, &addr->u.a4.addr, 4);
- memcpy(cmd->data.setdelip4.mask, &addr->u.a4.mask, 4);
+ cmd->data.setdelip4.addr = addr->u.a4.addr;
+ cmd->data.setdelip4.mask = addr->u.a4.mask;
cmd->data.setdelip4.flags = flags;
}
@@ -581,6 +537,7 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
QETH_CARD_TEXT(card, 2, "addipato");
+ mutex_lock(&card->conf_mutex);
mutex_lock(&card->ip_lock);
list_for_each_entry(ipatoe, &card->ipato.entries, entry) {
@@ -600,6 +557,7 @@ int qeth_l3_add_ipato_entry(struct qeth_card *card,
}
mutex_unlock(&card->ip_lock);
+ mutex_unlock(&card->conf_mutex);
return rc;
}
@@ -613,6 +571,7 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card,
QETH_CARD_TEXT(card, 2, "delipato");
+ mutex_lock(&card->conf_mutex);
mutex_lock(&card->ip_lock);
list_for_each_entry_safe(ipatoe, tmp, &card->ipato.entries, entry) {
@@ -629,6 +588,8 @@ int qeth_l3_del_ipato_entry(struct qeth_card *card,
}
mutex_unlock(&card->ip_lock);
+ mutex_unlock(&card->conf_mutex);
+
return rc;
}
@@ -637,6 +598,7 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
enum qeth_prot_versions proto)
{
struct qeth_ipaddr addr;
+ int rc;
qeth_l3_init_ipaddr(&addr, type, proto);
if (proto == QETH_PROT_IPV4)
@@ -644,7 +606,11 @@ int qeth_l3_modify_rxip_vipa(struct qeth_card *card, bool add, const u8 *ip,
else
memcpy(&addr.u.a6.addr, ip, 16);
- return qeth_l3_modify_ip(card, &addr, add);
+ mutex_lock(&card->conf_mutex);
+ rc = qeth_l3_modify_ip(card, &addr, add);
+ mutex_unlock(&card->conf_mutex);
+
+ return rc;
}
int qeth_l3_modify_hsuid(struct qeth_card *card, bool add)
@@ -1198,96 +1164,6 @@ static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev,
return 0;
}
-static void qeth_l3_rebuild_skb(struct qeth_card *card, struct sk_buff *skb,
- struct qeth_hdr *hdr)
-{
- struct af_iucv_trans_hdr *iucv = (struct af_iucv_trans_hdr *) skb->data;
- struct net_device *dev = skb->dev;
-
- if (IS_IQD(card) && iucv->magic == ETH_P_AF_IUCV) {
- dev_hard_header(skb, dev, ETH_P_AF_IUCV, dev->dev_addr,
- "FAKELL", skb->len);
- return;
- }
-
- if (!(hdr->hdr.l3.flags & QETH_HDR_PASSTHRU)) {
- u16 prot = (hdr->hdr.l3.flags & QETH_HDR_IPV6) ? ETH_P_IPV6 :
- ETH_P_IP;
- unsigned char tg_addr[ETH_ALEN];
-
- skb_reset_network_header(skb);
- switch (hdr->hdr.l3.flags & QETH_HDR_CAST_MASK) {
- case QETH_CAST_MULTICAST:
- if (prot == ETH_P_IP)
- ip_eth_mc_map(ip_hdr(skb)->daddr, tg_addr);
- else
- ipv6_eth_mc_map(&ipv6_hdr(skb)->daddr, tg_addr);
- QETH_CARD_STAT_INC(card, rx_multicast);
- break;
- case QETH_CAST_BROADCAST:
- ether_addr_copy(tg_addr, card->dev->broadcast);
- QETH_CARD_STAT_INC(card, rx_multicast);
- break;
- default:
- if (card->options.sniffer)
- skb->pkt_type = PACKET_OTHERHOST;
- ether_addr_copy(tg_addr, card->dev->dev_addr);
- }
-
- if (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_SRC_MAC_ADDR)
- card->dev->header_ops->create(skb, card->dev, prot,
- tg_addr, &hdr->hdr.l3.next_hop.rx.src_mac,
- skb->len);
- else
- card->dev->header_ops->create(skb, card->dev, prot,
- tg_addr, "FAKELL", skb->len);
- }
-
- /* copy VLAN tag from hdr into skb */
- if (!card->options.sniffer &&
- (hdr->hdr.l3.ext_flags & (QETH_HDR_EXT_VLAN_FRAME |
- QETH_HDR_EXT_INCLUDE_VLAN_TAG))) {
- u16 tag = (hdr->hdr.l3.ext_flags & QETH_HDR_EXT_VLAN_FRAME) ?
- hdr->hdr.l3.vlan_id :
- hdr->hdr.l3.next_hop.rx.vlan_id;
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tag);
- }
-
- qeth_rx_csum(card, skb, hdr->hdr.l3.ext_flags);
-}
-
-static int qeth_l3_process_inbound_buffer(struct qeth_card *card,
- int budget, int *done)
-{
- int work_done = 0;
- struct sk_buff *skb;
- struct qeth_hdr *hdr;
-
- *done = 0;
- WARN_ON_ONCE(!budget);
- while (budget) {
- skb = qeth_core_get_next_skb(card,
- &card->qdio.in_q->bufs[card->rx.b_index],
- &card->rx.b_element, &card->rx.e_offset, &hdr);
- if (!skb) {
- *done = 1;
- break;
- }
-
- if (hdr->hdr.l3.id == QETH_HEADER_TYPE_LAYER3)
- qeth_l3_rebuild_skb(card, skb, hdr);
-
- skb->protocol = eth_type_trans(skb, skb->dev);
- QETH_CARD_STAT_INC(card, rx_packets);
- QETH_CARD_STAT_ADD(card, rx_bytes, skb->len);
-
- napi_gro_receive(&card->napi, skb);
- work_done++;
- budget--;
- }
- return work_done;
-}
-
static void qeth_l3_stop_card(struct qeth_card *card)
{
QETH_CARD_TEXT(card, 2, "stopcard");
@@ -1307,13 +1183,14 @@ static void qeth_l3_stop_card(struct qeth_card *card)
card->state = CARD_STATE_HARDSETUP;
}
if (card->state == CARD_STATE_HARDSETUP) {
- qeth_qdio_clear_card(card, 0);
qeth_drain_output_queues(card);
qeth_clear_working_pool_list(card);
card->state = CARD_STATE_DOWN;
}
+ qeth_qdio_clear_card(card, 0);
flush_workqueue(card->event_wq);
+ card->info.promisc_mode = 0;
}
static void qeth_l3_set_promisc_mode(struct qeth_card *card)
@@ -2292,12 +2169,6 @@ static int __qeth_l3_set_offline(struct ccwgroup_device *cgdev,
rtnl_unlock();
qeth_l3_stop_card(card);
- if (card->options.cq == QETH_CQ_ENABLED) {
- rtnl_lock();
- call_netdevice_notifiers(NETDEV_REBOOT, card->dev);
- rtnl_unlock();
- }
-
rc = qeth_stop_channel(&card->data);
rc2 = qeth_stop_channel(&card->write);
rc3 = qeth_stop_channel(&card->read);
@@ -2356,7 +2227,6 @@ static int qeth_l3_control_event(struct qeth_card *card,
struct qeth_discipline qeth_l3_discipline = {
.devtype = &qeth_l3_devtype,
- .process_rx_buffer = qeth_l3_process_inbound_buffer,
.recover = qeth_l3_recover,
.setup = qeth_l3_probe_device,
.remove = qeth_l3_remove_device,
@@ -2436,7 +2306,7 @@ static int qeth_l3_ip_event(struct notifier_block *this,
qeth_l3_init_ipaddr(&addr, QETH_IP_TYPE_NORMAL, QETH_PROT_IPV4);
addr.u.a4.addr = ifa->ifa_address;
- addr.u.a4.mask = be32_to_cpu(ifa->ifa_mask);
+ addr.u.a4.mask = ifa->ifa_mask;
return qeth_l3_handle_ip_event(card, &addr, event);
}
diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c
index f9067ed6c7d3..29f2517d2a31 100644
--- a/drivers/s390/net/qeth_l3_sys.c
+++ b/drivers/s390/net/qeth_l3_sys.c
@@ -242,21 +242,33 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
struct qeth_card *card = dev_get_drvdata(dev);
+ int rc = 0;
char *tmp;
- int rc;
if (!IS_IQD(card))
return -EPERM;
- if (card->state != CARD_STATE_DOWN)
- return -EPERM;
- if (card->options.sniffer)
- return -EPERM;
- if (card->options.cq == QETH_CQ_NOTAVAILABLE)
- return -EPERM;
+
+ mutex_lock(&card->conf_mutex);
+ if (card->state != CARD_STATE_DOWN) {
+ rc = -EPERM;
+ goto out;
+ }
+
+ if (card->options.sniffer) {
+ rc = -EPERM;
+ goto out;
+ }
+
+ if (card->options.cq == QETH_CQ_NOTAVAILABLE) {
+ rc = -EPERM;
+ goto out;
+ }
tmp = strsep((char **)&buf, "\n");
- if (strlen(tmp) > 8)
- return -EINVAL;
+ if (strlen(tmp) > 8) {
+ rc = -EINVAL;
+ goto out;
+ }
if (card->options.hsuid[0])
/* delete old ip address */
@@ -267,11 +279,13 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
card->options.hsuid[0] = '\0';
memcpy(card->dev->perm_addr, card->options.hsuid, 9);
qeth_configure_cq(card, QETH_CQ_DISABLED);
- return count;
+ goto out;
}
- if (qeth_configure_cq(card, QETH_CQ_ENABLED))
- return -EPERM;
+ if (qeth_configure_cq(card, QETH_CQ_ENABLED)) {
+ rc = -EPERM;
+ goto out;
+ }
snprintf(card->options.hsuid, sizeof(card->options.hsuid),
"%-8s", tmp);
@@ -280,6 +294,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev,
rc = qeth_l3_modify_hsuid(card, true);
+out:
+ mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}
@@ -386,30 +402,35 @@ static ssize_t qeth_l3_dev_ipato_add_show(char *buf, struct qeth_card *card,
enum qeth_prot_versions proto)
{
struct qeth_ipato_entry *ipatoe;
- char addr_str[40];
- int entry_len; /* length of 1 entry string, differs between v4 and v6 */
- int i = 0;
+ int str_len = 0;
- entry_len = (proto == QETH_PROT_IPV4)? 12 : 40;
- /* add strlen for "/<mask>\n" */
- entry_len += (proto == QETH_PROT_IPV4)? 5 : 6;
mutex_lock(&card->ip_lock);
list_for_each_entry(ipatoe, &card->ipato.entries, entry) {
+ char addr_str[40];
+ int entry_len;
+
if (ipatoe->proto != proto)
continue;
- /* String must not be longer than PAGE_SIZE. So we check if
- * string length gets near PAGE_SIZE. Then we can savely display
- * the next IPv6 address (worst case, compared to IPv4) */
- if ((PAGE_SIZE - i) <= entry_len)
+
+ entry_len = qeth_l3_ipaddr_to_string(proto, ipatoe->addr,
+ addr_str);
+ if (entry_len < 0)
+ continue;
+
+ /* Append /%mask to the entry: */
+ entry_len += 1 + ((proto == QETH_PROT_IPV4) ? 2 : 3);
+ /* Enough room to format %entry\n into null terminated page? */
+ if (entry_len + 1 > PAGE_SIZE - str_len - 1)
break;
- qeth_l3_ipaddr_to_string(proto, ipatoe->addr, addr_str);
- i += snprintf(buf + i, PAGE_SIZE - i,
- "%s/%i\n", addr_str, ipatoe->mask_bits);
+
+ entry_len = scnprintf(buf, PAGE_SIZE - str_len,
+ "%s/%i\n", addr_str, ipatoe->mask_bits);
+ str_len += entry_len;
+ buf += entry_len;
}
mutex_unlock(&card->ip_lock);
- i += snprintf(buf + i, PAGE_SIZE - i, "\n");
- return i;
+ return str_len ? str_len : scnprintf(buf, PAGE_SIZE, "\n");
}
static ssize_t qeth_l3_dev_ipato_add4_show(struct device *dev,
@@ -455,16 +476,14 @@ static ssize_t qeth_l3_dev_ipato_add_store(const char *buf, size_t count,
int mask_bits;
int rc = 0;
- mutex_lock(&card->conf_mutex);
rc = qeth_l3_parse_ipatoe(buf, proto, addr, &mask_bits);
if (rc)
- goto out;
+ return rc;
ipatoe = kzalloc(sizeof(struct qeth_ipato_entry), GFP_KERNEL);
- if (!ipatoe) {
- rc = -ENOMEM;
- goto out;
- }
+ if (!ipatoe)
+ return -ENOMEM;
+
ipatoe->proto = proto;
memcpy(ipatoe->addr, addr, (proto == QETH_PROT_IPV4)? 4:16);
ipatoe->mask_bits = mask_bits;
@@ -472,8 +491,7 @@ static ssize_t qeth_l3_dev_ipato_add_store(const char *buf, size_t count,
rc = qeth_l3_add_ipato_entry(card, ipatoe);
if (rc)
kfree(ipatoe);
-out:
- mutex_unlock(&card->conf_mutex);
+
return rc ? rc : count;
}
@@ -496,11 +514,9 @@ static ssize_t qeth_l3_dev_ipato_del_store(const char *buf, size_t count,
int mask_bits;
int rc = 0;
- mutex_lock(&card->conf_mutex);
rc = qeth_l3_parse_ipatoe(buf, proto, addr, &mask_bits);
if (!rc)
rc = qeth_l3_del_ipato_entry(card, proto, addr, mask_bits);
- mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}
@@ -607,31 +623,34 @@ static ssize_t qeth_l3_dev_ip_add_show(struct device *dev, char *buf,
{
struct qeth_card *card = dev_get_drvdata(dev);
struct qeth_ipaddr *ipaddr;
- char addr_str[40];
int str_len = 0;
- int entry_len; /* length of 1 entry string, differs between v4 and v6 */
int i;
- entry_len = (proto == QETH_PROT_IPV4)? 12 : 40;
- entry_len += 2; /* \n + terminator */
mutex_lock(&card->ip_lock);
hash_for_each(card->ip_htable, i, ipaddr, hnode) {
+ char addr_str[40];
+ int entry_len;
+
if (ipaddr->proto != proto || ipaddr->type != type)
continue;
- /* String must not be longer than PAGE_SIZE. So we check if
- * string length gets near PAGE_SIZE. Then we can savely display
- * the next IPv6 address (worst case, compared to IPv4) */
- if ((PAGE_SIZE - str_len) <= entry_len)
+
+ entry_len = qeth_l3_ipaddr_to_string(proto, (u8 *)&ipaddr->u,
+ addr_str);
+ if (entry_len < 0)
+ continue;
+
+ /* Enough room to format %addr\n into null terminated page? */
+ if (entry_len + 1 > PAGE_SIZE - str_len - 1)
break;
- qeth_l3_ipaddr_to_string(proto, (const u8 *)&ipaddr->u,
- addr_str);
- str_len += snprintf(buf + str_len, PAGE_SIZE - str_len, "%s\n",
- addr_str);
+
+ entry_len = scnprintf(buf, PAGE_SIZE - str_len, "%s\n",
+ addr_str);
+ str_len += entry_len;
+ buf += entry_len;
}
mutex_unlock(&card->ip_lock);
- str_len += snprintf(buf + str_len, PAGE_SIZE - str_len, "\n");
- return str_len;
+ return str_len ? str_len : scnprintf(buf, PAGE_SIZE, "\n");
}
static ssize_t qeth_l3_dev_vipa_add4_show(struct device *dev,
@@ -642,63 +661,34 @@ static ssize_t qeth_l3_dev_vipa_add4_show(struct device *dev,
QETH_IP_TYPE_VIPA);
}
-static int qeth_l3_parse_vipae(const char *buf, enum qeth_prot_versions proto,
- u8 *addr)
-{
- if (qeth_l3_string_to_ipaddr(buf, proto, addr)) {
- return -EINVAL;
- }
- return 0;
-}
-
-static ssize_t qeth_l3_dev_vipa_add_store(const char *buf, size_t count,
- struct qeth_card *card, enum qeth_prot_versions proto)
+static ssize_t qeth_l3_vipa_store(struct device *dev, const char *buf, bool add,
+ size_t count, enum qeth_prot_versions proto)
{
+ struct qeth_card *card = dev_get_drvdata(dev);
u8 addr[16] = {0, };
int rc;
- mutex_lock(&card->conf_mutex);
- rc = qeth_l3_parse_vipae(buf, proto, addr);
+ rc = qeth_l3_string_to_ipaddr(buf, proto, addr);
if (!rc)
- rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+ rc = qeth_l3_modify_rxip_vipa(card, add, addr,
QETH_IP_TYPE_VIPA, proto);
- mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}
static ssize_t qeth_l3_dev_vipa_add4_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
- struct qeth_card *card = dev_get_drvdata(dev);
-
- return qeth_l3_dev_vipa_add_store(buf, count, card, QETH_PROT_IPV4);
+ return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4);
}
static QETH_DEVICE_ATTR(vipa_add4, add4, 0644,
qeth_l3_dev_vipa_add4_show,
qeth_l3_dev_vipa_add4_store);
-static ssize_t qeth_l3_dev_vipa_del_store(const char *buf, size_t count,
- struct qeth_card *card, enum qeth_prot_versions proto)
-{
- u8 addr[16];
- int rc;
-
- mutex_lock(&card->conf_mutex);
- rc = qeth_l3_parse_vipae(buf, proto, addr);
- if (!rc)
- rc = qeth_l3_modify_rxip_vipa(card, false, addr,
- QETH_IP_TYPE_VIPA, proto);
- mutex_unlock(&card->conf_mutex);
- return rc ? rc : count;
-}
-
static ssize_t qeth_l3_dev_vipa_del4_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
- struct qeth_card *card = dev_get_drvdata(dev);
-
- return qeth_l3_dev_vipa_del_store(buf, count, card, QETH_PROT_IPV4);
+ return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV4);
}
static QETH_DEVICE_ATTR(vipa_del4, del4, 0200, NULL,
@@ -715,9 +705,7 @@ static ssize_t qeth_l3_dev_vipa_add6_show(struct device *dev,
static ssize_t qeth_l3_dev_vipa_add6_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
- struct qeth_card *card = dev_get_drvdata(dev);
-
- return qeth_l3_dev_vipa_add_store(buf, count, card, QETH_PROT_IPV6);
+ return qeth_l3_vipa_store(dev, buf, true, count, QETH_PROT_IPV6);
}
static QETH_DEVICE_ATTR(vipa_add6, add6, 0644,
@@ -727,9 +715,7 @@ static QETH_DEVICE_ATTR(vipa_add6, add6, 0644,
static ssize_t qeth_l3_dev_vipa_del6_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
- struct qeth_card *card = dev_get_drvdata(dev);
-
- return qeth_l3_dev_vipa_del_store(buf, count, card, QETH_PROT_IPV6);
+ return qeth_l3_vipa_store(dev, buf, false, count, QETH_PROT_IPV6);
}
static QETH_DEVICE_ATTR(vipa_del6, del6, 0200, NULL,
@@ -782,54 +768,34 @@ static int qeth_l3_parse_rxipe(const char *buf, enum qeth_prot_versions proto,
return 0;
}
-static ssize_t qeth_l3_dev_rxip_add_store(const char *buf, size_t count,
- struct qeth_card *card, enum qeth_prot_versions proto)
+static ssize_t qeth_l3_rxip_store(struct device *dev, const char *buf, bool add,
+ size_t count, enum qeth_prot_versions proto)
{
+ struct qeth_card *card = dev_get_drvdata(dev);
u8 addr[16] = {0, };
int rc;
- mutex_lock(&card->conf_mutex);
rc = qeth_l3_parse_rxipe(buf, proto, addr);
if (!rc)
- rc = qeth_l3_modify_rxip_vipa(card, true, addr,
+ rc = qeth_l3_modify_rxip_vipa(card, add, addr,
QETH_IP_TYPE_RXIP, proto);
- mutex_unlock(&card->conf_mutex);
return rc ? rc : count;
}
static ssize_t qeth_l3_dev_rxip_add4_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
- struct qeth_card *card = dev_get_drvdata(dev);
-
- return qeth_l3_dev_rxip_add_store(buf, count, card, QETH_PROT_IPV4);
+ return qeth_l3_rxip_store(dev, buf, true, count, QETH_PROT_IPV4);
}
static QETH_DEVICE_ATTR(rxip_add4, add4, 0644,
qeth_l3_dev_rxip_add4_show,
qeth_l3_dev_rxip_add4_store);
-static ssize_t qeth_l3_dev_rxip_del_store(const char *buf, size_t count,
- struct qeth_card *card, enum qeth_prot_versions proto)
-{
- u8 addr[16];
- int rc;
-
- mutex_lock(&card->conf_mutex);
- rc = qeth_l3_parse_rxipe(buf, proto, addr);
- if (!rc)
- rc = qeth_l3_modify_rxip_vipa(card, false, addr,
- QETH_IP_TYPE_RXIP, proto);
- mutex_unlock(&card->conf_mutex);
- return rc ? rc : count;
-}
-
static ssize_t qeth_l3_dev_rxip_del4_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
- struct qeth_card *card = dev_get_drvdata(dev);
-
- return qeth_l3_dev_rxip_del_store(buf, count, card, QETH_PROT_IPV4);
+ return qeth_l3_rxip_store(dev, buf, false, count, QETH_PROT_IPV4);
}
static QETH_DEVICE_ATTR(rxip_del4, del4, 0200, NULL,
@@ -846,9 +812,7 @@ static ssize_t qeth_l3_dev_rxip_add6_show(struct device *dev,
static ssize_t qeth_l3_dev_rxip_add6_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
- struct qeth_card *card = dev_get_drvdata(dev);
-
- return qeth_l3_dev_rxip_add_store(buf, count, card, QETH_PROT_IPV6);
+ return qeth_l3_rxip_store(dev, buf, true, count, QETH_PROT_IPV6);
}
static QETH_DEVICE_ATTR(rxip_add6, add6, 0644,
@@ -858,9 +822,7 @@ static QETH_DEVICE_ATTR(rxip_add6, add6, 0644,
static ssize_t qeth_l3_dev_rxip_del6_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
- struct qeth_card *card = dev_get_drvdata(dev);
-
- return qeth_l3_dev_rxip_del_store(buf, count, card, QETH_PROT_IPV6);
+ return qeth_l3_rxip_store(dev, buf, false, count, QETH_PROT_IPV6);
}
static QETH_DEVICE_ATTR(rxip_del6, del6, 0200, NULL,
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index e36608ce937a..33dbc051bff9 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -535,7 +535,7 @@ static void get_container_name_callback(void *context, struct fib * fibptr)
if ((le32_to_cpu(get_name_reply->status) == CT_OK)
&& (get_name_reply->data[0] != '\0')) {
char *sp = get_name_reply->data;
- int data_size = FIELD_SIZEOF(struct aac_get_name_resp, data);
+ int data_size = sizeof_field(struct aac_get_name_resp, data);
sp[data_size - 1] = '\0';
while (*sp == ' ')
@@ -574,7 +574,7 @@ static int aac_get_container_name(struct scsi_cmnd * scsicmd)
dev = (struct aac_dev *)scsicmd->device->host->hostdata;
- data_size = FIELD_SIZEOF(struct aac_get_name_resp, data);
+ data_size = sizeof_field(struct aac_get_name_resp, data);
cmd_fibcontext = aac_fib_alloc_tag(dev, scsicmd);
diff --git a/drivers/scsi/be2iscsi/be_cmds.h b/drivers/scsi/be2iscsi/be_cmds.h
index 063dccc18f70..5f9f0b18ddf3 100644
--- a/drivers/scsi/be2iscsi/be_cmds.h
+++ b/drivers/scsi/be2iscsi/be_cmds.h
@@ -1300,7 +1300,7 @@ struct be_cmd_get_port_name {
/* Returns the number of items in the field array. */
#define BE_NUMBER_OF_FIELD(_type_, _field_) \
- (FIELD_SIZEOF(_type_, _field_)/sizeof((((_type_ *)0)->_field_[0])))\
+ (sizeof_field(_type_, _field_)/sizeof((((_type_ *)0)->_field_[0])))\
/**
* Different types of iSCSI completions to host driver for both initiator
diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c
index 0d044c165960..4bc794d2f51c 100644
--- a/drivers/scsi/cxgbi/libcxgbi.c
+++ b/drivers/scsi/cxgbi/libcxgbi.c
@@ -121,7 +121,8 @@ static inline void cxgbi_device_destroy(struct cxgbi_device *cdev)
"cdev 0x%p, p# %u.\n", cdev, cdev->nports);
cxgbi_hbas_remove(cdev);
cxgbi_device_portmap_cleanup(cdev);
- cxgbi_ppm_release(cdev->cdev2ppm(cdev));
+ if (cdev->cdev2ppm)
+ cxgbi_ppm_release(cdev->cdev2ppm(cdev));
if (cdev->pmap.max_connect)
cxgbi_free_big_mem(cdev->pmap.port_csk);
kfree(cdev);
@@ -2746,7 +2747,7 @@ static int __init libcxgbi_init_module(void)
{
pr_info("%s", version);
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) <
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, cb) <
sizeof(struct cxgbi_skb_cb));
return 0;
}
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index ebd47c0cf9e9..70b99c0e2e67 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1945,7 +1945,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc);
- spin_lock(&session->frwd_lock);
+ spin_lock_bh(&session->frwd_lock);
task = (struct iscsi_task *)sc->SCp.ptr;
if (!task) {
/*
@@ -2072,7 +2072,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
done:
if (task)
task->last_timeout = jiffies;
- spin_unlock(&session->frwd_lock);
+ spin_unlock_bh(&session->frwd_lock);
ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
"timer reset" : "shutdown or nh");
return rc;
diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index f47b4b281b14..d7302c2052f9 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -81,12 +81,21 @@ static int sas_get_port_device(struct asd_sas_port *port)
else
dev->dev_type = SAS_SATA_DEV;
dev->tproto = SAS_PROTOCOL_SATA;
- } else {
+ } else if (port->oob_mode == SAS_OOB_MODE) {
struct sas_identify_frame *id =
(struct sas_identify_frame *) dev->frame_rcvd;
dev->dev_type = id->dev_type;
dev->iproto = id->initiator_bits;
dev->tproto = id->target_bits;
+ } else {
+ /* If the oob mode is OOB_NOT_CONNECTED, the port is
+ * disconnected due to race with PHY down. We cannot
+ * continue to discover this port
+ */
+ sas_put_device(dev);
+ pr_warn("Port %016llx is disconnected when discovering\n",
+ SAS_ADDR(port->attached_sas_addr));
+ return -ENODEV;
}
sas_init_dev(dev);
diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c
index d4e1b120cc9e..0ea03ae93d91 100644
--- a/drivers/scsi/lpfc/lpfc_bsg.c
+++ b/drivers/scsi/lpfc/lpfc_bsg.c
@@ -4489,12 +4489,6 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job,
phba->mbox_ext_buf_ctx.seqNum++;
nemb_tp = phba->mbox_ext_buf_ctx.nembType;
- dd_data = kmalloc(sizeof(struct bsg_job_data), GFP_KERNEL);
- if (!dd_data) {
- rc = -ENOMEM;
- goto job_error;
- }
-
pbuf = (uint8_t *)dmabuf->virt;
size = job->request_payload.payload_len;
sg_copy_to_buffer(job->request_payload.sg_list,
@@ -4531,6 +4525,13 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job,
"2968 SLI_CONFIG ext-buffer wr all %d "
"ebuffers received\n",
phba->mbox_ext_buf_ctx.numBuf);
+
+ dd_data = kmalloc(sizeof(struct bsg_job_data), GFP_KERNEL);
+ if (!dd_data) {
+ rc = -ENOMEM;
+ goto job_error;
+ }
+
/* mailbox command structure for base driver */
pmboxq = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
if (!pmboxq) {
@@ -4579,6 +4580,8 @@ lpfc_bsg_write_ebuf_set(struct lpfc_hba *phba, struct bsg_job *job,
return SLI_CONFIG_HANDLED;
job_error:
+ if (pmboxq)
+ mempool_free(pmboxq, phba->mbox_mem_pool);
lpfc_bsg_dma_page_free(phba, dmabuf);
kfree(dd_data);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 2e6a68d9ea4f..a5ecbce4eda2 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -5385,7 +5385,6 @@ static const struct file_operations lpfc_debugfs_ras_log = {
.read = lpfc_debugfs_read,
.release = lpfc_debugfs_ras_log_release,
};
-#endif
#undef lpfc_debugfs_op_dumpHBASlim
static const struct file_operations lpfc_debugfs_op_dumpHBASlim = {
@@ -5557,7 +5556,7 @@ static const struct file_operations lpfc_idiag_op_extAcc = {
.write = lpfc_idiag_extacc_write,
.release = lpfc_idiag_cmd_release,
};
-
+#endif
/* lpfc_idiag_mbxacc_dump_bsg_mbox - idiag debugfs dump bsg mailbox command
* @phba: Pointer to HBA context object.
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 6298b1729098..6a04fdb3fbf2 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -5883,7 +5883,7 @@ void lpfc_sli4_async_event_proc(struct lpfc_hba *phba)
break;
default:
lpfc_printf_log(phba, KERN_ERR, LOG_SLI,
- "1804 Invalid asynchrous event code: "
+ "1804 Invalid asynchronous event code: "
"x%x\n", bf_get(lpfc_trailer_code,
&cq_event->cqe.mcqe_cmpl));
break;
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index db4a04a207ec..f6c8963c915d 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -1985,6 +1985,8 @@ out_unlock:
/* Declare and initialization an instance of the FC NVME template. */
static struct nvme_fc_port_template lpfc_nvme_template = {
+ .module = THIS_MODULE,
+
/* initiator-based functions */
.localport_delete = lpfc_nvme_localport_delete,
.remoteport_delete = lpfc_nvme_remoteport_delete,
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index c82b5792da98..625c046ac4ef 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -8555,7 +8555,7 @@ lpfc_sli4_async_mbox_unblock(struct lpfc_hba *phba)
psli->sli_flag &= ~LPFC_SLI_ASYNC_MBX_BLK;
spin_unlock_irq(&phba->hbalock);
- /* wake up worker thread to post asynchronlous mailbox command */
+ /* wake up worker thread to post asynchronous mailbox command */
lpfc_worker_wake_up(phba);
}
@@ -8823,7 +8823,7 @@ lpfc_sli_issue_mbox_s4(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq,
return rc;
}
- /* Now, interrupt mode asynchrous mailbox command */
+ /* Now, interrupt mode asynchronous mailbox command */
rc = lpfc_mbox_cmd_check(phba, mboxq);
if (rc) {
lpfc_printf_log(phba, KERN_ERR, LOG_MBOX | LOG_SLI,
@@ -13112,11 +13112,11 @@ lpfc_cq_event_setup(struct lpfc_hba *phba, void *entry, int size)
}
/**
- * lpfc_sli4_sp_handle_async_event - Handle an asynchroous event
+ * lpfc_sli4_sp_handle_async_event - Handle an asynchronous event
* @phba: Pointer to HBA context object.
* @cqe: Pointer to mailbox completion queue entry.
*
- * This routine process a mailbox completion queue entry with asynchrous
+ * This routine process a mailbox completion queue entry with asynchronous
* event.
*
* Return: true if work posted to worker thread, otherwise false.
@@ -13270,7 +13270,7 @@ out_no_mqe_complete:
* @cqe: Pointer to mailbox completion queue entry.
*
* This routine process a mailbox completion queue entry, it invokes the
- * proper mailbox complete handling or asynchrous event handling routine
+ * proper mailbox complete handling or asynchronous event handling routine
* according to the MCQE's async bit.
*
* Return: true if work posted to worker thread, otherwise false.
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index 848fbec7bda6..45fd8dfb7c40 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -5248,7 +5248,6 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
&ct->chain_buffer_dma);
if (!ct->chain_buffer) {
ioc_err(ioc, "chain_lookup: pci_pool_alloc failed\n");
- _base_release_memory_pools(ioc);
goto out;
}
}
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index ae97e2f310a3..d7e7043f9eab 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -178,6 +178,7 @@ qla2x00_sysfs_read_nvram(struct file *filp, struct kobject *kobj,
faddr = ha->flt_region_nvram;
if (IS_QLA28XX(ha)) {
+ qla28xx_get_aux_images(vha, &active_regions);
if (active_regions.aux.vpd_nvram == QLA27XX_SECONDARY_IMAGE)
faddr = ha->flt_region_nvram_sec;
}
diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
index 99f0a1a08143..cbaf178fc979 100644
--- a/drivers/scsi/qla2xxx/qla_bsg.c
+++ b/drivers/scsi/qla2xxx/qla_bsg.c
@@ -2399,7 +2399,7 @@ qla2x00_get_flash_image_status(struct bsg_job *bsg_job)
struct qla_active_regions regions = { };
struct active_regions active_regions = { };
- qla28xx_get_aux_images(vha, &active_regions);
+ qla27xx_get_active_image(vha, &active_regions);
regions.global_image = active_regions.global;
if (IS_QLA28XX(ha)) {
diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 460f443f6471..2edd9f7b3074 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -2401,6 +2401,7 @@ typedef struct fc_port {
unsigned int id_changed:1;
unsigned int scan_needed:1;
unsigned int n2n_flag:1;
+ unsigned int explicit_logout:1;
struct completion nvme_del_done;
uint32_t nvme_prli_service_param;
diff --git a/drivers/scsi/qla2xxx/qla_fw.h b/drivers/scsi/qla2xxx/qla_fw.h
index 59f6903e5abe..9dc09c117416 100644
--- a/drivers/scsi/qla2xxx/qla_fw.h
+++ b/drivers/scsi/qla2xxx/qla_fw.h
@@ -1523,6 +1523,10 @@ struct qla_flt_header {
#define FLT_REG_NVRAM_SEC_28XX_1 0x10F
#define FLT_REG_NVRAM_SEC_28XX_2 0x111
#define FLT_REG_NVRAM_SEC_28XX_3 0x113
+#define FLT_REG_MPI_PRI_28XX 0xD3
+#define FLT_REG_MPI_SEC_28XX 0xF0
+#define FLT_REG_PEP_PRI_28XX 0xD1
+#define FLT_REG_PEP_SEC_28XX 0xF1
struct qla_flt_region {
uint16_t code;
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 6c28f38f8021..aa5204163bec 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -533,6 +533,7 @@ static int qla_post_els_plogi_work(struct scsi_qla_host *vha, fc_port_t *fcport)
e->u.fcport.fcport = fcport;
fcport->flags |= FCF_ASYNC_ACTIVE;
+ fcport->disc_state = DSC_LOGIN_PEND;
return qla2x00_post_work(vha, e);
}
@@ -1526,8 +1527,8 @@ int qla24xx_fcport_handle_login(struct scsi_qla_host *vha, fc_port_t *fcport)
}
}
- /* for pure Target Mode. Login will not be initiated */
- if (vha->host->active_mode == MODE_TARGET)
+ /* Target won't initiate port login if fabric is present */
+ if (vha->host->active_mode == MODE_TARGET && !N2N_TOPO(vha->hw))
return 0;
if (fcport->flags & FCF_ASYNC_SENT) {
@@ -1719,6 +1720,10 @@ void qla24xx_handle_relogin_event(scsi_qla_host_t *vha,
void qla_handle_els_plogi_done(scsi_qla_host_t *vha,
struct event_arg *ea)
{
+ /* for pure Target Mode, PRLI will not be initiated */
+ if (vha->host->active_mode == MODE_TARGET)
+ return;
+
ql_dbg(ql_dbg_disc, vha, 0x2118,
"%s %d %8phC post PRLI\n",
__func__, __LINE__, ea->fcport->port_name);
@@ -4852,6 +4857,7 @@ qla2x00_alloc_fcport(scsi_qla_host_t *vha, gfp_t flags)
}
INIT_WORK(&fcport->del_work, qla24xx_delete_sess_fn);
+ INIT_WORK(&fcport->free_work, qlt_free_session_done);
INIT_WORK(&fcport->reg_work, qla_register_fcport_fn);
INIT_LIST_HEAD(&fcport->gnl_entry);
INIT_LIST_HEAD(&fcport->list);
@@ -4930,14 +4936,8 @@ qla2x00_configure_loop(scsi_qla_host_t *vha)
set_bit(RSCN_UPDATE, &flags);
clear_bit(LOCAL_LOOP_UPDATE, &flags);
- } else if (ha->current_topology == ISP_CFG_N) {
- clear_bit(RSCN_UPDATE, &flags);
- if (qla_tgt_mode_enabled(vha)) {
- /* allow the other side to start the login */
- clear_bit(LOCAL_LOOP_UPDATE, &flags);
- set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
- }
- } else if (ha->current_topology == ISP_CFG_NL) {
+ } else if (ha->current_topology == ISP_CFG_NL ||
+ ha->current_topology == ISP_CFG_N) {
clear_bit(RSCN_UPDATE, &flags);
set_bit(LOCAL_LOOP_UPDATE, &flags);
} else if (!vha->flags.online ||
@@ -5054,7 +5054,6 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
memcpy(&ha->plogi_els_payld.data,
(void *)ha->init_cb,
sizeof(ha->plogi_els_payld.data));
- set_bit(RELOGIN_NEEDED, &vha->dpc_flags);
} else {
ql_dbg(ql_dbg_init, vha, 0x00d1,
"PLOGI ELS param read fail.\n");
diff --git a/drivers/scsi/qla2xxx/qla_iocb.c b/drivers/scsi/qla2xxx/qla_iocb.c
index b25f87ff8cde..8b050f0b4333 100644
--- a/drivers/scsi/qla2xxx/qla_iocb.c
+++ b/drivers/scsi/qla2xxx/qla_iocb.c
@@ -2405,11 +2405,19 @@ qla2x00_login_iocb(srb_t *sp, struct mbx_entry *mbx)
static void
qla24xx_logout_iocb(srb_t *sp, struct logio_entry_24xx *logio)
{
+ u16 control_flags = LCF_COMMAND_LOGO;
logio->entry_type = LOGINOUT_PORT_IOCB_TYPE;
- logio->control_flags =
- cpu_to_le16(LCF_COMMAND_LOGO|LCF_IMPL_LOGO);
- if (!sp->fcport->keep_nport_handle)
- logio->control_flags |= cpu_to_le16(LCF_FREE_NPORT);
+
+ if (sp->fcport->explicit_logout) {
+ control_flags |= LCF_EXPL_LOGO|LCF_FREE_NPORT;
+ } else {
+ control_flags |= LCF_IMPL_LOGO;
+
+ if (!sp->fcport->keep_nport_handle)
+ control_flags |= LCF_FREE_NPORT;
+ }
+
+ logio->control_flags = cpu_to_le16(control_flags);
logio->nport_handle = cpu_to_le16(sp->fcport->loop_id);
logio->port_id[0] = sp->fcport->d_id.b.al_pa;
logio->port_id[1] = sp->fcport->d_id.b.area;
@@ -2617,6 +2625,10 @@ qla24xx_els_dcmd_iocb(scsi_qla_host_t *vha, int els_opcode,
memcpy(elsio->u.els_logo.els_logo_pyld, &logo_pyld,
sizeof(struct els_logo_payload));
+ ql_dbg(ql_dbg_disc + ql_dbg_buffer, vha, 0x3075, "LOGO buffer:");
+ ql_dump_buffer(ql_dbg_disc + ql_dbg_buffer, vha, 0x010a,
+ elsio->u.els_logo.els_logo_pyld,
+ sizeof(*elsio->u.els_logo.els_logo_pyld));
rval = qla2x00_start_sp(sp);
if (rval != QLA_SUCCESS) {
@@ -2676,7 +2688,8 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
ql_dbg(ql_dbg_io + ql_dbg_buffer, vha, 0x3073,
"PLOGI ELS IOCB:\n");
ql_dump_buffer(ql_log_info, vha, 0x0109,
- (uint8_t *)els_iocb, 0x70);
+ (uint8_t *)els_iocb,
+ sizeof(*els_iocb));
} else {
els_iocb->control_flags = 1 << 13;
els_iocb->tx_byte_count =
@@ -2688,6 +2701,11 @@ qla24xx_els_logo_iocb(srb_t *sp, struct els_entry_24xx *els_iocb)
els_iocb->rx_byte_count = 0;
els_iocb->rx_address = 0;
els_iocb->rx_len = 0;
+ ql_dbg(ql_dbg_io + ql_dbg_buffer, vha, 0x3076,
+ "LOGO ELS IOCB:");
+ ql_dump_buffer(ql_log_info, vha, 0x010b,
+ els_iocb,
+ sizeof(*els_iocb));
}
sp->vha->qla_stats.control_requests++;
@@ -2934,7 +2952,8 @@ qla24xx_els_dcmd2_iocb(scsi_qla_host_t *vha, int els_opcode,
ql_dbg(ql_dbg_disc + ql_dbg_buffer, vha, 0x3073, "PLOGI buffer:\n");
ql_dump_buffer(ql_dbg_disc + ql_dbg_buffer, vha, 0x0109,
- (uint8_t *)elsio->u.els_plogi.els_plogi_pyld, 0x70);
+ (uint8_t *)elsio->u.els_plogi.els_plogi_pyld,
+ sizeof(*elsio->u.els_plogi.els_plogi_pyld));
rval = qla2x00_start_sp(sp);
if (rval != QLA_SUCCESS) {
diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
index 2601d7673c37..7b8a6bfcf08d 100644
--- a/drivers/scsi/qla2xxx/qla_isr.c
+++ b/drivers/scsi/qla2xxx/qla_isr.c
@@ -1061,8 +1061,6 @@ global_port_update:
ql_dbg(ql_dbg_async, vha, 0x5011,
"Asynchronous PORT UPDATE ignored %04x/%04x/%04x.\n",
mb[1], mb[2], mb[3]);
-
- qlt_async_event(mb[0], vha, mb);
break;
}
@@ -1079,8 +1077,6 @@ global_port_update:
set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
set_bit(VP_CONFIG_OK, &vha->vp_flags);
-
- qlt_async_event(mb[0], vha, mb);
break;
case MBA_RSCN_UPDATE: /* State Change Registration */
diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
index 0cf94f05f008..b7c1108c48e2 100644
--- a/drivers/scsi/qla2xxx/qla_mbx.c
+++ b/drivers/scsi/qla2xxx/qla_mbx.c
@@ -3921,6 +3921,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
vha->d_id.b24 = 0;
vha->d_id.b.al_pa = 1;
ha->flags.n2n_bigger = 1;
+ ha->flags.n2n_ae = 0;
id.b.al_pa = 2;
ql_dbg(ql_dbg_async, vha, 0x5075,
@@ -3931,6 +3932,7 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
"Format 1: Remote login - Waiting for WWPN %8phC.\n",
rptid_entry->u.f1.port_name);
ha->flags.n2n_bigger = 0;
+ ha->flags.n2n_ae = 1;
}
qla24xx_post_newsess_work(vha, &id,
rptid_entry->u.f1.port_name,
@@ -3942,7 +3944,6 @@ qla24xx_report_id_acquisition(scsi_qla_host_t *vha,
/* if our portname is higher then initiate N2N login */
set_bit(N2N_LOGIN_NEEDED, &vha->dpc_flags);
- ha->flags.n2n_ae = 1;
return;
break;
case TOPO_FL:
diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
index 941aa53363f5..bfcd02fdf2b8 100644
--- a/drivers/scsi/qla2xxx/qla_nvme.c
+++ b/drivers/scsi/qla2xxx/qla_nvme.c
@@ -610,6 +610,7 @@ static void qla_nvme_remoteport_delete(struct nvme_fc_remote_port *rport)
}
static struct nvme_fc_port_template qla_nvme_fc_transport = {
+ .module = THIS_MODULE,
.localport_delete = qla_nvme_localport_delete,
.remoteport_delete = qla_nvme_remoteport_delete,
.create_queue = qla_nvme_alloc_queue,
diff --git a/drivers/scsi/qla2xxx/qla_sup.c b/drivers/scsi/qla2xxx/qla_sup.c
index f2d5115b2d8d..bbe90354f49b 100644
--- a/drivers/scsi/qla2xxx/qla_sup.c
+++ b/drivers/scsi/qla2xxx/qla_sup.c
@@ -847,15 +847,15 @@ qla2xxx_get_flt_info(scsi_qla_host_t *vha, uint32_t flt_addr)
ha->flt_region_img_status_pri = start;
break;
case FLT_REG_IMG_SEC_27XX:
- if (IS_QLA27XX(ha) && !IS_QLA28XX(ha))
+ if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
ha->flt_region_img_status_sec = start;
break;
case FLT_REG_FW_SEC_27XX:
- if (IS_QLA27XX(ha) && !IS_QLA28XX(ha))
+ if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
ha->flt_region_fw_sec = start;
break;
case FLT_REG_BOOTLOAD_SEC_27XX:
- if (IS_QLA27XX(ha) && !IS_QLA28XX(ha))
+ if (IS_QLA27XX(ha) || IS_QLA28XX(ha))
ha->flt_region_boot_sec = start;
break;
case FLT_REG_AUX_IMG_PRI_28XX:
@@ -2725,8 +2725,11 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
ql_log(ql_log_warn + ql_dbg_verbose, vha, 0xffff,
"Region %x is secure\n", region.code);
- if (region.code == FLT_REG_FW ||
- region.code == FLT_REG_FW_SEC_27XX) {
+ switch (region.code) {
+ case FLT_REG_FW:
+ case FLT_REG_FW_SEC_27XX:
+ case FLT_REG_MPI_PRI_28XX:
+ case FLT_REG_MPI_SEC_28XX:
fw_array = dwptr;
/* 1st fw array */
@@ -2757,9 +2760,23 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
buf_size_without_sfub += risc_size;
fw_array += risc_size;
}
- } else {
- ql_log(ql_log_warn + ql_dbg_verbose, vha, 0xffff,
- "Secure region %x not supported\n",
+ break;
+
+ case FLT_REG_PEP_PRI_28XX:
+ case FLT_REG_PEP_SEC_28XX:
+ fw_array = dwptr;
+
+ /* 1st fw array */
+ risc_size = be32_to_cpu(fw_array[3]);
+ risc_attr = be32_to_cpu(fw_array[9]);
+
+ buf_size_without_sfub = risc_size;
+ fw_array += risc_size;
+ break;
+
+ default:
+ ql_log(ql_log_warn + ql_dbg_verbose, vha,
+ 0xffff, "Secure region %x not supported\n",
region.code);
rval = QLA_COMMAND_ERROR;
goto done;
@@ -2880,7 +2897,7 @@ qla28xx_write_flash_data(scsi_qla_host_t *vha, uint32_t *dwptr, uint32_t faddr,
"Sending Secure Flash MB Cmd\n");
rval = qla28xx_secure_flash_update(vha, 0, region.code,
buf_size_without_sfub, sfub_dma,
- sizeof(struct secure_flash_update_block));
+ sizeof(struct secure_flash_update_block) >> 2);
if (rval != QLA_SUCCESS) {
ql_log(ql_log_warn, vha, 0xffff,
"Secure Flash MB Cmd failed %x.", rval);
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 51b275a575a5..68c14143e50e 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -1104,6 +1104,7 @@ void qlt_free_session_done(struct work_struct *work)
}
}
+ sess->explicit_logout = 0;
spin_unlock_irqrestore(&ha->tgt.sess_lock, flags);
sess->free_pending = 0;
@@ -1160,7 +1161,6 @@ void qlt_unreg_sess(struct fc_port *sess)
sess->last_rscn_gen = sess->rscn_gen;
sess->last_login_gen = sess->login_gen;
- INIT_WORK(&sess->free_work, qlt_free_session_done);
queue_work(sess->vha->hw->wq, &sess->free_work);
}
EXPORT_SYMBOL(qlt_unreg_sess);
@@ -1265,7 +1265,6 @@ void qlt_schedule_sess_for_deletion(struct fc_port *sess)
"Scheduling sess %p for deletion %8phC\n",
sess, sess->port_name);
- INIT_WORK(&sess->del_work, qla24xx_delete_sess_fn);
WARN_ON(!queue_work(sess->vha->hw->wq, &sess->del_work));
}
@@ -4804,6 +4803,7 @@ static int qlt_handle_login(struct scsi_qla_host *vha,
switch (sess->disc_state) {
case DSC_DELETED:
+ case DSC_LOGIN_PEND:
qlt_plogi_ack_unref(vha, pla);
break;
diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
index 042a24314edc..abe7f79bb789 100644
--- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c
+++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c
@@ -246,6 +246,8 @@ static void tcm_qla2xxx_complete_mcmd(struct work_struct *work)
*/
static void tcm_qla2xxx_free_mcmd(struct qla_tgt_mgmt_cmd *mcmd)
{
+ if (!mcmd)
+ return;
INIT_WORK(&mcmd->free_work, tcm_qla2xxx_complete_mcmd);
queue_work(tcm_qla2xxx_free_wq, &mcmd->free_work);
}
@@ -348,6 +350,7 @@ static void tcm_qla2xxx_close_session(struct se_session *se_sess)
target_sess_cmd_list_set_waiting(se_sess);
spin_unlock_irqrestore(&vha->hw->tgt.sess_lock, flags);
+ sess->explicit_logout = 1;
tcm_qla2xxx_put_sess(sess);
}
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 8c674eca09f1..2323432a0edb 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -4275,7 +4275,6 @@ static int qla4xxx_mem_alloc(struct scsi_qla_host *ha)
return QLA_SUCCESS;
mem_alloc_error_exit:
- qla4xxx_mem_free(ha);
return QLA_ERROR;
}
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 417b868d8735..ed8d9709b9b9 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -24,6 +24,8 @@
#define ISCSI_TRANSPORT_VERSION "2.0-870"
+#define ISCSI_SEND_MAX_ALLOWED 10
+
#define CREATE_TRACE_POINTS
#include <trace/events/iscsi.h>
@@ -3682,6 +3684,7 @@ iscsi_if_rx(struct sk_buff *skb)
struct nlmsghdr *nlh;
struct iscsi_uevent *ev;
uint32_t group;
+ int retries = ISCSI_SEND_MAX_ALLOWED;
nlh = nlmsg_hdr(skb);
if (nlh->nlmsg_len < sizeof(*nlh) + sizeof(*ev) ||
@@ -3712,6 +3715,10 @@ iscsi_if_rx(struct sk_buff *skb)
break;
err = iscsi_if_send_reply(portid, nlh->nlmsg_type,
ev, sizeof(*ev));
+ if (err == -EAGAIN && --retries < 0) {
+ printk(KERN_WARNING "Send reply failed, error %d\n", err);
+ break;
+ }
} while (err < 0 && err != -ECONNREFUSED && err != -ESRCH);
skb_pull(skb, rlen);
}
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 7b7ef3acb504..412ac56ecd60 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -8689,11 +8689,11 @@ static void __attribute__((unused)) verify_structures(void)
BUILD_BUG_ON(offsetof(struct pqi_general_admin_request,
data.delete_operational_queue.queue_id) != 12);
BUILD_BUG_ON(sizeof(struct pqi_general_admin_request) != 64);
- BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request,
+ BUILD_BUG_ON(sizeof_field(struct pqi_general_admin_request,
data.create_operational_iq) != 64 - 11);
- BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request,
+ BUILD_BUG_ON(sizeof_field(struct pqi_general_admin_request,
data.create_operational_oq) != 64 - 11);
- BUILD_BUG_ON(FIELD_SIZEOF(struct pqi_general_admin_request,
+ BUILD_BUG_ON(sizeof_field(struct pqi_general_admin_request,
data.delete_operational_queue) != 64 - 11);
BUILD_BUG_ON(offsetof(struct pqi_general_admin_response,
diff --git a/drivers/scsi/ufs/cdns-pltfrm.c b/drivers/scsi/ufs/cdns-pltfrm.c
index b2af04c57a39..6feeb0faf123 100644
--- a/drivers/scsi/ufs/cdns-pltfrm.c
+++ b/drivers/scsi/ufs/cdns-pltfrm.c
@@ -99,6 +99,12 @@ static int cdns_ufs_link_startup_notify(struct ufs_hba *hba,
*/
ufshcd_dme_set(hba, UIC_ARG_MIB(PA_LOCAL_TX_LCC_ENABLE), 0);
+ /*
+ * Disabling Autohibern8 feature in cadence UFS
+ * to mask unexpected interrupt trigger.
+ */
+ hba->ahit = 0;
+
return 0;
}
diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c
index baeecee35d1e..53dd87628cbe 100644
--- a/drivers/scsi/ufs/ufs_bsg.c
+++ b/drivers/scsi/ufs/ufs_bsg.c
@@ -203,7 +203,7 @@ int ufs_bsg_probe(struct ufs_hba *hba)
bsg_dev->parent = get_device(parent);
bsg_dev->release = ufs_bsg_node_release;
- dev_set_name(bsg_dev, "ufs-bsg");
+ dev_set_name(bsg_dev, "ufs-bsg%u", shost->host_no);
ret = device_add(bsg_dev);
if (ret)
diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index 833e04a7835c..1778f8c62861 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -14,6 +14,7 @@ source "drivers/soc/qcom/Kconfig"
source "drivers/soc/renesas/Kconfig"
source "drivers/soc/rockchip/Kconfig"
source "drivers/soc/samsung/Kconfig"
+source "drivers/soc/sifive/Kconfig"
source "drivers/soc/sunxi/Kconfig"
source "drivers/soc/tegra/Kconfig"
source "drivers/soc/ti/Kconfig"
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 2ec355003524..8b49d782a1ab 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -20,6 +20,7 @@ obj-y += qcom/
obj-y += renesas/
obj-$(CONFIG_ARCH_ROCKCHIP) += rockchip/
obj-$(CONFIG_SOC_SAMSUNG) += samsung/
+obj-$(CONFIG_SOC_SIFIVE) += sifive/
obj-y += sunxi/
obj-$(CONFIG_ARCH_TEGRA) += tegra/
obj-y += ti/
diff --git a/drivers/soc/sifive/Kconfig b/drivers/soc/sifive/Kconfig
new file mode 100644
index 000000000000..58cf8c40d08d
--- /dev/null
+++ b/drivers/soc/sifive/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+if SOC_SIFIVE
+
+config SIFIVE_L2
+ bool "Sifive L2 Cache controller"
+ help
+ Support for the L2 cache controller on SiFive platforms.
+
+endif
diff --git a/drivers/soc/sifive/Makefile b/drivers/soc/sifive/Makefile
new file mode 100644
index 000000000000..b5caff77938f
--- /dev/null
+++ b/drivers/soc/sifive/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SIFIVE_L2) += sifive_l2_cache.o
diff --git a/arch/riscv/mm/sifive_l2_cache.c b/drivers/soc/sifive/sifive_l2_cache.c
index a9ffff3277c7..a9ffff3277c7 100644
--- a/arch/riscv/mm/sifive_l2_cache.c
+++ b/drivers/soc/sifive/sifive_l2_cache.c
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index c36587b42e95..82a0ee09cbe1 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -168,16 +168,16 @@ static void cdns_spi_init_hw(struct cdns_spi *xspi)
/**
* cdns_spi_chipselect - Select or deselect the chip select line
* @spi: Pointer to the spi_device structure
- * @enable: Select (1) or deselect (0) the chip select line
+ * @is_high: Select(0) or deselect (1) the chip select line
*/
-static void cdns_spi_chipselect(struct spi_device *spi, bool enable)
+static void cdns_spi_chipselect(struct spi_device *spi, bool is_high)
{
struct cdns_spi *xspi = spi_master_get_devdata(spi->master);
u32 ctrl_reg;
ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR);
- if (!enable) {
+ if (is_high) {
/* Deselect the slave */
ctrl_reg |= CDNS_SPI_CR_SSCTRL;
} else {
diff --git a/drivers/spi/spi-cavium-thunderx.c b/drivers/spi/spi-cavium-thunderx.c
index d12e149f1a41..fd6b9caffaf0 100644
--- a/drivers/spi/spi-cavium-thunderx.c
+++ b/drivers/spi/spi-cavium-thunderx.c
@@ -82,6 +82,7 @@ static int thunderx_spi_probe(struct pci_dev *pdev,
error:
clk_disable_unprepare(p->clk);
+ pci_release_regions(pdev);
spi_master_put(master);
return ret;
}
@@ -96,6 +97,7 @@ static void thunderx_spi_remove(struct pci_dev *pdev)
return;
clk_disable_unprepare(p->clk);
+ pci_release_regions(pdev);
/* Put everything in a known state. */
writeq(0, p->register_base + OCTEON_SPI_CFG(p));
}
diff --git a/drivers/spi/spi-dw.c b/drivers/spi/spi-dw.c
index a92aa5cd4fbe..5a25da377119 100644
--- a/drivers/spi/spi-dw.c
+++ b/drivers/spi/spi-dw.c
@@ -129,10 +129,11 @@ void dw_spi_set_cs(struct spi_device *spi, bool enable)
struct dw_spi *dws = spi_controller_get_devdata(spi->controller);
struct chip_data *chip = spi_get_ctldata(spi);
+ /* Chip select logic is inverted from spi_set_cs() */
if (chip && chip->cs_control)
- chip->cs_control(enable);
+ chip->cs_control(!enable);
- if (enable)
+ if (!enable)
dw_writel(dws, DW_SPI_SER, BIT(spi->chip_select));
else if (dws->cs_override)
dw_writel(dws, DW_SPI_SER, 0);
@@ -171,9 +172,11 @@ static inline u32 rx_max(struct dw_spi *dws)
static void dw_writer(struct dw_spi *dws)
{
- u32 max = tx_max(dws);
+ u32 max;
u16 txw = 0;
+ spin_lock(&dws->buf_lock);
+ max = tx_max(dws);
while (max--) {
/* Set the tx word if the transfer's original "tx" is not null */
if (dws->tx_end - dws->len) {
@@ -185,13 +188,16 @@ static void dw_writer(struct dw_spi *dws)
dw_write_io_reg(dws, DW_SPI_DR, txw);
dws->tx += dws->n_bytes;
}
+ spin_unlock(&dws->buf_lock);
}
static void dw_reader(struct dw_spi *dws)
{
- u32 max = rx_max(dws);
+ u32 max;
u16 rxw;
+ spin_lock(&dws->buf_lock);
+ max = rx_max(dws);
while (max--) {
rxw = dw_read_io_reg(dws, DW_SPI_DR);
/* Care rx only if the transfer's original "rx" is not null */
@@ -203,6 +209,7 @@ static void dw_reader(struct dw_spi *dws)
}
dws->rx += dws->n_bytes;
}
+ spin_unlock(&dws->buf_lock);
}
static void int_error_stop(struct dw_spi *dws, const char *msg)
@@ -275,18 +282,20 @@ static int dw_spi_transfer_one(struct spi_controller *master,
{
struct dw_spi *dws = spi_controller_get_devdata(master);
struct chip_data *chip = spi_get_ctldata(spi);
+ unsigned long flags;
u8 imask = 0;
u16 txlevel = 0;
u32 cr0;
int ret;
dws->dma_mapped = 0;
-
+ spin_lock_irqsave(&dws->buf_lock, flags);
dws->tx = (void *)transfer->tx_buf;
dws->tx_end = dws->tx + transfer->len;
dws->rx = transfer->rx_buf;
dws->rx_end = dws->rx + transfer->len;
dws->len = transfer->len;
+ spin_unlock_irqrestore(&dws->buf_lock, flags);
spi_enable_chip(dws, 0);
@@ -470,6 +479,7 @@ int dw_spi_add_host(struct device *dev, struct dw_spi *dws)
dws->type = SSI_MOTO_SPI;
dws->dma_inited = 0;
dws->dma_addr = (dma_addr_t)(dws->paddr + DW_SPI_DR);
+ spin_lock_init(&dws->buf_lock);
spi_controller_set_devdata(master, dws);
diff --git a/drivers/spi/spi-dw.h b/drivers/spi/spi-dw.h
index 38c7de1f0aa9..1bf5713e047d 100644
--- a/drivers/spi/spi-dw.h
+++ b/drivers/spi/spi-dw.h
@@ -119,6 +119,7 @@ struct dw_spi {
size_t len;
void *tx;
void *tx_end;
+ spinlock_t buf_lock;
void *rx;
void *rx_end;
int dma_mapped;
diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
index 442cff71a0d2..8428b69c858b 100644
--- a/drivers/spi/spi-fsl-dspi.c
+++ b/drivers/spi/spi-fsl-dspi.c
@@ -185,6 +185,7 @@ struct fsl_dspi {
struct spi_transfer *cur_transfer;
struct spi_message *cur_msg;
struct chip_data *cur_chip;
+ size_t progress;
size_t len;
const void *tx;
void *rx;
@@ -586,21 +587,14 @@ static void dspi_tcfq_write(struct fsl_dspi *dspi)
dspi->tx_cmd |= SPI_PUSHR_CMD_CTCNT;
if (dspi->devtype_data->xspi_mode && dspi->bits_per_word > 16) {
- /* Write two TX FIFO entries first, and then the corresponding
- * CMD FIFO entry.
+ /* Write the CMD FIFO entry first, and then the two
+ * corresponding TX FIFO entries.
*/
u32 data = dspi_pop_tx(dspi);
- if (dspi->cur_chip->ctar_val & SPI_CTAR_LSBFE) {
- /* LSB */
- tx_fifo_write(dspi, data & 0xFFFF);
- tx_fifo_write(dspi, data >> 16);
- } else {
- /* MSB */
- tx_fifo_write(dspi, data >> 16);
- tx_fifo_write(dspi, data & 0xFFFF);
- }
cmd_fifo_write(dspi);
+ tx_fifo_write(dspi, data & 0xFFFF);
+ tx_fifo_write(dspi, data >> 16);
} else {
/* Write one entry to both TX FIFO and CMD FIFO
* simultaneously.
@@ -658,7 +652,7 @@ static int dspi_rxtx(struct fsl_dspi *dspi)
u32 spi_tcr;
spi_take_timestamp_post(dspi->ctlr, dspi->cur_transfer,
- dspi->tx - dspi->bytes_per_word, !dspi->irq);
+ dspi->progress, !dspi->irq);
/* Get transfer counter (in number of SPI transfers). It was
* reset to 0 when transfer(s) were started.
@@ -667,6 +661,7 @@ static int dspi_rxtx(struct fsl_dspi *dspi)
spi_tcnt = SPI_TCR_GET_TCNT(spi_tcr);
/* Update total number of bytes that were transferred */
msg->actual_length += spi_tcnt * dspi->bytes_per_word;
+ dspi->progress += spi_tcnt;
trans_mode = dspi->devtype_data->trans_mode;
if (trans_mode == DSPI_EOQ_MODE)
@@ -679,7 +674,7 @@ static int dspi_rxtx(struct fsl_dspi *dspi)
return 0;
spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer,
- dspi->tx, !dspi->irq);
+ dspi->progress, !dspi->irq);
if (trans_mode == DSPI_EOQ_MODE)
dspi_eoq_write(dspi);
@@ -768,6 +763,7 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
dspi->rx = transfer->rx_buf;
dspi->rx_end = dspi->rx + transfer->len;
dspi->len = transfer->len;
+ dspi->progress = 0;
/* Validated transfer specific frame size (defaults applied) */
dspi->bits_per_word = transfer->bits_per_word;
if (transfer->bits_per_word <= 8)
@@ -789,7 +785,7 @@ static int dspi_transfer_one_message(struct spi_controller *ctlr,
SPI_CTARE_DTCP(1));
spi_take_timestamp_pre(dspi->ctlr, dspi->cur_transfer,
- dspi->tx, !dspi->irq);
+ dspi->progress, !dspi->irq);
trans_mode = dspi->devtype_data->trans_mode;
switch (trans_mode) {
diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c
index 114801a32371..fb4159ad6bf6 100644
--- a/drivers/spi/spi-fsl-spi.c
+++ b/drivers/spi/spi-fsl-spi.c
@@ -611,6 +611,7 @@ static struct spi_master * fsl_spi_probe(struct device *dev,
master->setup = fsl_spi_setup;
master->cleanup = fsl_spi_cleanup;
master->transfer_one_message = fsl_spi_do_one_msg;
+ master->use_gpio_descriptors = true;
mpc8xxx_spi = spi_master_get_devdata(master);
mpc8xxx_spi->max_bits_per_word = 32;
@@ -727,17 +728,27 @@ static int of_fsl_spi_probe(struct platform_device *ofdev)
}
}
#endif
-
- pdata->cs_control = fsl_spi_cs_control;
+ /*
+ * Handle the case where we have one hardwired (always selected)
+ * device on the first "chipselect". Else we let the core code
+ * handle any GPIOs or native chip selects and assign the
+ * appropriate callback for dealing with the CS lines. This isn't
+ * supported on the GRLIB variant.
+ */
+ ret = gpiod_count(dev, "cs");
+ if (ret <= 0)
+ pdata->max_chipselect = 1;
+ else
+ pdata->cs_control = fsl_spi_cs_control;
}
ret = of_address_to_resource(np, 0, &mem);
if (ret)
goto err;
- irq = irq_of_parse_and_map(np, 0);
- if (!irq) {
- ret = -EINVAL;
+ irq = platform_get_irq(ofdev, 0);
+ if (irq < 0) {
+ ret = irq;
goto err;
}
@@ -750,7 +761,6 @@ static int of_fsl_spi_probe(struct platform_device *ofdev)
return 0;
err:
- irq_dispose_mapping(irq);
return ret;
}
diff --git a/drivers/spi/spi-nxp-fspi.c b/drivers/spi/spi-nxp-fspi.c
index c36bb1bb464e..8c5084a3a617 100644
--- a/drivers/spi/spi-nxp-fspi.c
+++ b/drivers/spi/spi-nxp-fspi.c
@@ -439,7 +439,7 @@ static bool nxp_fspi_supports_op(struct spi_mem *mem,
op->data.nbytes > f->devtype_data->txfifo)
return false;
- return true;
+ return spi_mem_default_supports_op(mem, op);
}
/* Instead of busy looping invoke readl_poll_timeout functionality. */
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 16b6b2ad4e7c..9071333ebdd8 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1443,6 +1443,10 @@ static const struct pci_device_id pxa2xx_spi_pci_compound_match[] = {
{ PCI_VDEVICE(INTEL, 0x4b2a), LPSS_BXT_SSP },
{ PCI_VDEVICE(INTEL, 0x4b2b), LPSS_BXT_SSP },
{ PCI_VDEVICE(INTEL, 0x4b37), LPSS_BXT_SSP },
+ /* JSL */
+ { PCI_VDEVICE(INTEL, 0x4daa), LPSS_CNL_SSP },
+ { PCI_VDEVICE(INTEL, 0x4dab), LPSS_CNL_SSP },
+ { PCI_VDEVICE(INTEL, 0x4dfb), LPSS_CNL_SSP },
/* APL */
{ PCI_VDEVICE(INTEL, 0x5ac2), LPSS_BXT_SSP },
{ PCI_VDEVICE(INTEL, 0x5ac4), LPSS_BXT_SSP },
diff --git a/drivers/spi/spi-sprd.c b/drivers/spi/spi-sprd.c
index 2ee1feb41681..6678f1cbc566 100644
--- a/drivers/spi/spi-sprd.c
+++ b/drivers/spi/spi-sprd.c
@@ -678,7 +678,7 @@ static int sprd_spi_init_hw(struct sprd_spi *ss, struct spi_transfer *t)
if (d->unit != SPI_DELAY_UNIT_SCK)
return -EINVAL;
- val = readl_relaxed(ss->base + SPRD_SPI_CTL7);
+ val = readl_relaxed(ss->base + SPRD_SPI_CTL0);
val &= ~(SPRD_SPI_SCK_REV | SPRD_SPI_NG_TX | SPRD_SPI_NG_RX);
/* Set default chip selection, clock phase and clock polarity */
val |= ss->hw_mode & SPI_CPHA ? SPRD_SPI_NG_RX : SPRD_SPI_NG_TX;
diff --git a/drivers/spi/spi-ti-qspi.c b/drivers/spi/spi-ti-qspi.c
index 3cb65371ae3b..66dcb6128539 100644
--- a/drivers/spi/spi-ti-qspi.c
+++ b/drivers/spi/spi-ti-qspi.c
@@ -62,6 +62,7 @@ struct ti_qspi {
u32 dc;
bool mmap_enabled;
+ int current_cs;
};
#define QSPI_PID (0x0)
@@ -487,6 +488,7 @@ static void ti_qspi_enable_memory_map(struct spi_device *spi)
MEM_CS_EN(spi->chip_select));
}
qspi->mmap_enabled = true;
+ qspi->current_cs = spi->chip_select;
}
static void ti_qspi_disable_memory_map(struct spi_device *spi)
@@ -498,6 +500,7 @@ static void ti_qspi_disable_memory_map(struct spi_device *spi)
regmap_update_bits(qspi->ctrl_base, qspi->ctrl_reg,
MEM_CS_MASK, 0);
qspi->mmap_enabled = false;
+ qspi->current_cs = -1;
}
static void ti_qspi_setup_mmap_read(struct spi_device *spi, u8 opcode,
@@ -543,7 +546,7 @@ static int ti_qspi_exec_mem_op(struct spi_mem *mem,
mutex_lock(&qspi->list_lock);
- if (!qspi->mmap_enabled)
+ if (!qspi->mmap_enabled || qspi->current_cs != mem->spi->chip_select)
ti_qspi_enable_memory_map(mem->spi);
ti_qspi_setup_mmap_read(mem->spi, op->cmd.opcode, op->data.buswidth,
op->addr.nbytes, op->dummy.nbytes);
@@ -799,6 +802,7 @@ no_dma:
}
}
qspi->mmap_enabled = false;
+ qspi->current_cs = -1;
ret = devm_spi_register_master(&pdev->dev, master);
if (!ret)
diff --git a/drivers/spi/spi-uniphier.c b/drivers/spi/spi-uniphier.c
index 47cde1864630..ce9b30112e26 100644
--- a/drivers/spi/spi-uniphier.c
+++ b/drivers/spi/spi-uniphier.c
@@ -290,25 +290,32 @@ static void uniphier_spi_recv(struct uniphier_spi_priv *priv)
}
}
-static void uniphier_spi_fill_tx_fifo(struct uniphier_spi_priv *priv)
+static void uniphier_spi_set_fifo_threshold(struct uniphier_spi_priv *priv,
+ unsigned int threshold)
{
- unsigned int fifo_threshold, fill_bytes;
u32 val;
- fifo_threshold = DIV_ROUND_UP(priv->rx_bytes,
- bytes_per_word(priv->bits_per_word));
- fifo_threshold = min(fifo_threshold, SSI_FIFO_DEPTH);
-
- fill_bytes = fifo_threshold - (priv->rx_bytes - priv->tx_bytes);
-
- /* set fifo threshold */
val = readl(priv->base + SSI_FC);
val &= ~(SSI_FC_TXFTH_MASK | SSI_FC_RXFTH_MASK);
- val |= FIELD_PREP(SSI_FC_TXFTH_MASK, fifo_threshold);
- val |= FIELD_PREP(SSI_FC_RXFTH_MASK, fifo_threshold);
+ val |= FIELD_PREP(SSI_FC_TXFTH_MASK, SSI_FIFO_DEPTH - threshold);
+ val |= FIELD_PREP(SSI_FC_RXFTH_MASK, threshold);
writel(val, priv->base + SSI_FC);
+}
+
+static void uniphier_spi_fill_tx_fifo(struct uniphier_spi_priv *priv)
+{
+ unsigned int fifo_threshold, fill_words;
+ unsigned int bpw = bytes_per_word(priv->bits_per_word);
+
+ fifo_threshold = DIV_ROUND_UP(priv->rx_bytes, bpw);
+ fifo_threshold = min(fifo_threshold, SSI_FIFO_DEPTH);
+
+ uniphier_spi_set_fifo_threshold(priv, fifo_threshold);
+
+ fill_words = fifo_threshold -
+ DIV_ROUND_UP(priv->rx_bytes - priv->tx_bytes, bpw);
- while (fill_bytes--)
+ while (fill_words--)
uniphier_spi_send(priv);
}
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 5e4c4532f7f3..8994545367a2 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -1499,8 +1499,7 @@ static void spi_pump_messages(struct kthread_work *work)
* advances its @tx buffer pointer monotonically.
* @ctlr: Pointer to the spi_controller structure of the driver
* @xfer: Pointer to the transfer being timestamped
- * @tx: Pointer to the current word within the xfer->tx_buf that the driver is
- * preparing to transmit right now.
+ * @progress: How many words (not bytes) have been transferred so far
* @irqs_off: If true, will disable IRQs and preemption for the duration of the
* transfer, for less jitter in time measurement. Only compatible
* with PIO drivers. If true, must follow up with
@@ -1510,21 +1509,19 @@ static void spi_pump_messages(struct kthread_work *work)
*/
void spi_take_timestamp_pre(struct spi_controller *ctlr,
struct spi_transfer *xfer,
- const void *tx, bool irqs_off)
+ size_t progress, bool irqs_off)
{
- u8 bytes_per_word = DIV_ROUND_UP(xfer->bits_per_word, 8);
-
if (!xfer->ptp_sts)
return;
if (xfer->timestamped_pre)
return;
- if (tx < (xfer->tx_buf + xfer->ptp_sts_word_pre * bytes_per_word))
+ if (progress < xfer->ptp_sts_word_pre)
return;
/* Capture the resolution of the timestamp */
- xfer->ptp_sts_word_pre = (tx - xfer->tx_buf) / bytes_per_word;
+ xfer->ptp_sts_word_pre = progress;
xfer->timestamped_pre = true;
@@ -1546,23 +1543,20 @@ EXPORT_SYMBOL_GPL(spi_take_timestamp_pre);
* timestamped.
* @ctlr: Pointer to the spi_controller structure of the driver
* @xfer: Pointer to the transfer being timestamped
- * @tx: Pointer to the current word within the xfer->tx_buf that the driver has
- * just transmitted.
+ * @progress: How many words (not bytes) have been transferred so far
* @irqs_off: If true, will re-enable IRQs and preemption for the local CPU.
*/
void spi_take_timestamp_post(struct spi_controller *ctlr,
struct spi_transfer *xfer,
- const void *tx, bool irqs_off)
+ size_t progress, bool irqs_off)
{
- u8 bytes_per_word = DIV_ROUND_UP(xfer->bits_per_word, 8);
-
if (!xfer->ptp_sts)
return;
if (xfer->timestamped_post)
return;
- if (tx < (xfer->tx_buf + xfer->ptp_sts_word_post * bytes_per_word))
+ if (progress < xfer->ptp_sts_word_post)
return;
ptp_read_system_postts(xfer->ptp_sts);
@@ -1573,7 +1567,7 @@ void spi_take_timestamp_post(struct spi_controller *ctlr,
}
/* Capture the resolution of the timestamp */
- xfer->ptp_sts_word_post = (tx - xfer->tx_buf) / bytes_per_word;
+ xfer->ptp_sts_word_post = progress;
xfer->timestamped_post = true;
}
diff --git a/drivers/staging/axis-fifo/Kconfig b/drivers/staging/axis-fifo/Kconfig
index 3fffe4d6f327..f180a8e9f58a 100644
--- a/drivers/staging/axis-fifo/Kconfig
+++ b/drivers/staging/axis-fifo/Kconfig
@@ -4,7 +4,7 @@
#
config XIL_AXIS_FIFO
tristate "Xilinx AXI-Stream FIFO IP core driver"
- depends on OF
+ depends on OF && HAS_IOMEM
help
This adds support for the Xilinx AXI-Stream FIFO IP core driver.
The AXI Streaming FIFO allows memory mapped access to a AXI Streaming
diff --git a/drivers/staging/comedi/drivers/gsc_hpdi.c b/drivers/staging/comedi/drivers/gsc_hpdi.c
index 4bdf44d82879..dc62db1ee1dd 100644
--- a/drivers/staging/comedi/drivers/gsc_hpdi.c
+++ b/drivers/staging/comedi/drivers/gsc_hpdi.c
@@ -623,6 +623,11 @@ static int gsc_hpdi_auto_attach(struct comedi_device *dev,
dma_alloc_coherent(&pcidev->dev, DMA_BUFFER_SIZE,
&devpriv->dio_buffer_phys_addr[i],
GFP_KERNEL);
+ if (!devpriv->dio_buffer[i]) {
+ dev_warn(dev->class_dev,
+ "failed to allocate DMA buffer\n");
+ return -ENOMEM;
+ }
}
/* allocate dma descriptors */
devpriv->dma_desc = dma_alloc_coherent(&pcidev->dev,
@@ -630,6 +635,11 @@ static int gsc_hpdi_auto_attach(struct comedi_device *dev,
NUM_DMA_DESCRIPTORS,
&devpriv->dma_desc_phys_addr,
GFP_KERNEL);
+ if (!devpriv->dma_desc) {
+ dev_warn(dev->class_dev,
+ "failed to allocate DMA descriptors\n");
+ return -ENOMEM;
+ }
if (devpriv->dma_desc_phys_addr & 0xf) {
dev_warn(dev->class_dev,
" dma descriptors not quad-word aligned (bug)\n");
diff --git a/drivers/staging/exfat/exfat.h b/drivers/staging/exfat/exfat.h
index 2aac1e000977..51c665a924b7 100644
--- a/drivers/staging/exfat/exfat.h
+++ b/drivers/staging/exfat/exfat.h
@@ -805,8 +805,8 @@ s32 create_dir(struct inode *inode, struct chain_t *p_dir,
s32 create_file(struct inode *inode, struct chain_t *p_dir,
struct uni_name_t *p_uniname, u8 mode, struct file_id_t *fid);
void remove_file(struct inode *inode, struct chain_t *p_dir, s32 entry);
-s32 rename_file(struct inode *inode, struct chain_t *p_dir, s32 old_entry,
- struct uni_name_t *p_uniname, struct file_id_t *fid);
+s32 exfat_rename_file(struct inode *inode, struct chain_t *p_dir, s32 old_entry,
+ struct uni_name_t *p_uniname, struct file_id_t *fid);
s32 move_file(struct inode *inode, struct chain_t *p_olddir, s32 oldentry,
struct chain_t *p_newdir, struct uni_name_t *p_uniname,
struct file_id_t *fid);
diff --git a/drivers/staging/exfat/exfat_core.c b/drivers/staging/exfat/exfat_core.c
index d2d3447083c7..794000e7bc6f 100644
--- a/drivers/staging/exfat/exfat_core.c
+++ b/drivers/staging/exfat/exfat_core.c
@@ -192,8 +192,6 @@ static s32 clr_alloc_bitmap(struct super_block *sb, u32 clu)
exfat_bitmap_clear((u8 *)p_fs->vol_amap[i]->b_data, b);
- return sector_write(sb, sector, p_fs->vol_amap[i], 0);
-
#ifdef CONFIG_EXFAT_DISCARD
if (opts->discard) {
ret = sb_issue_discard(sb, START_SECTOR(clu),
@@ -202,9 +200,13 @@ static s32 clr_alloc_bitmap(struct super_block *sb, u32 clu)
if (ret == -EOPNOTSUPP) {
pr_warn("discard not supported by device, disabling");
opts->discard = 0;
+ } else {
+ return ret;
}
}
#endif /* CONFIG_EXFAT_DISCARD */
+
+ return sector_write(sb, sector, p_fs->vol_amap[i], 0);
}
static u32 test_alloc_bitmap(struct super_block *sb, u32 clu)
@@ -2322,8 +2324,8 @@ void remove_file(struct inode *inode, struct chain_t *p_dir, s32 entry)
fs_func->delete_dir_entry(sb, p_dir, entry, 0, num_entries);
}
-s32 rename_file(struct inode *inode, struct chain_t *p_dir, s32 oldentry,
- struct uni_name_t *p_uniname, struct file_id_t *fid)
+s32 exfat_rename_file(struct inode *inode, struct chain_t *p_dir, s32 oldentry,
+ struct uni_name_t *p_uniname, struct file_id_t *fid)
{
s32 ret, newentry = -1, num_old_entries, num_new_entries;
sector_t sector_old, sector_new;
diff --git a/drivers/staging/exfat/exfat_super.c b/drivers/staging/exfat/exfat_super.c
index 6e481908c59f..9f91853b189b 100644
--- a/drivers/staging/exfat/exfat_super.c
+++ b/drivers/staging/exfat/exfat_super.c
@@ -1262,8 +1262,8 @@ static int ffsMoveFile(struct inode *old_parent_inode, struct file_id_t *fid,
fs_set_vol_flags(sb, VOL_DIRTY);
if (olddir.dir == newdir.dir)
- ret = rename_file(new_parent_inode, &olddir, dentry, &uni_name,
- fid);
+ ret = exfat_rename_file(new_parent_inode, &olddir, dentry,
+ &uni_name, fid);
else
ret = move_file(new_parent_inode, &olddir, dentry, &newdir,
&uni_name, fid);
diff --git a/drivers/staging/fbtft/fb_uc1611.c b/drivers/staging/fbtft/fb_uc1611.c
index e763205e9e4f..f61e373c75e9 100644
--- a/drivers/staging/fbtft/fb_uc1611.c
+++ b/drivers/staging/fbtft/fb_uc1611.c
@@ -63,11 +63,17 @@ static int init_display(struct fbtft_par *par)
{
int ret;
- /* Set CS active high */
- par->spi->mode |= SPI_CS_HIGH;
+ /*
+ * Set CS active inverse polarity: just setting SPI_CS_HIGH does not
+ * work with GPIO based chip selects that are logically active high
+ * but inverted inside the GPIO library, so enforce inverted
+ * semantics.
+ */
+ par->spi->mode ^= SPI_CS_HIGH;
ret = spi_setup(par->spi);
if (ret) {
- dev_err(par->info->device, "Could not set SPI_CS_HIGH\n");
+ dev_err(par->info->device,
+ "Could not set inverse CS polarity\n");
return ret;
}
diff --git a/drivers/staging/fbtft/fb_watterott.c b/drivers/staging/fbtft/fb_watterott.c
index 27cc8eabcbe9..76b25df376b8 100644
--- a/drivers/staging/fbtft/fb_watterott.c
+++ b/drivers/staging/fbtft/fb_watterott.c
@@ -150,10 +150,17 @@ static int init_display(struct fbtft_par *par)
/* enable SPI interface by having CS and MOSI low during reset */
save_mode = par->spi->mode;
- par->spi->mode |= SPI_CS_HIGH;
- ret = spi_setup(par->spi); /* set CS inactive low */
+ /*
+ * Set CS active inverse polarity: just setting SPI_CS_HIGH does not
+ * work with GPIO based chip selects that are logically active high
+ * but inverted inside the GPIO library, so enforce inverted
+ * semantics.
+ */
+ par->spi->mode ^= SPI_CS_HIGH;
+ ret = spi_setup(par->spi);
if (ret) {
- dev_err(par->info->device, "Could not set SPI_CS_HIGH\n");
+ dev_err(par->info->device,
+ "Could not set inverse CS polarity\n");
return ret;
}
write_reg(par, 0x00); /* make sure mode is set */
diff --git a/drivers/staging/fbtft/fbtft-core.c b/drivers/staging/fbtft/fbtft-core.c
index ffb84987dd86..d3e098b41b1a 100644
--- a/drivers/staging/fbtft/fbtft-core.c
+++ b/drivers/staging/fbtft/fbtft-core.c
@@ -913,7 +913,7 @@ static int fbtft_init_display_from_property(struct fbtft_par *par)
if (count == 0)
return -EINVAL;
- values = kmalloc_array(count, sizeof(*values), GFP_KERNEL);
+ values = kmalloc_array(count + 1, sizeof(*values), GFP_KERNEL);
if (!values)
return -ENOMEM;
@@ -926,9 +926,9 @@ static int fbtft_init_display_from_property(struct fbtft_par *par)
gpiod_set_value(par->gpio.cs, 0); /* Activate chip */
index = -1;
- while (index < count) {
- val = values[++index];
+ val = values[++index];
+ while (index < count) {
if (val & FBTFT_OF_INIT_CMD) {
val &= 0xFFFF;
i = 0;
diff --git a/drivers/staging/hp/Kconfig b/drivers/staging/hp/Kconfig
index fb395cfe6b92..f20ab21a6b2a 100644
--- a/drivers/staging/hp/Kconfig
+++ b/drivers/staging/hp/Kconfig
@@ -6,6 +6,7 @@
config NET_VENDOR_HP
bool "HP devices"
default y
+ depends on ETHERNET
depends on ISA || EISA || PCI
---help---
If you have a network (Ethernet) card belonging to this class, say Y.
diff --git a/drivers/staging/isdn/gigaset/usb-gigaset.c b/drivers/staging/isdn/gigaset/usb-gigaset.c
index 1b9b43659bdf..a20c0bfa68f3 100644
--- a/drivers/staging/isdn/gigaset/usb-gigaset.c
+++ b/drivers/staging/isdn/gigaset/usb-gigaset.c
@@ -571,8 +571,7 @@ static int gigaset_initcshw(struct cardstate *cs)
{
struct usb_cardstate *ucs;
- cs->hw.usb = ucs =
- kmalloc(sizeof(struct usb_cardstate), GFP_KERNEL);
+ cs->hw.usb = ucs = kzalloc(sizeof(struct usb_cardstate), GFP_KERNEL);
if (!ucs) {
pr_err("out of memory\n");
return -ENOMEM;
@@ -584,9 +583,6 @@ static int gigaset_initcshw(struct cardstate *cs)
ucs->bchars[3] = 0;
ucs->bchars[4] = 0x11;
ucs->bchars[5] = 0x13;
- ucs->bulk_out_buffer = NULL;
- ucs->bulk_out_urb = NULL;
- ucs->read_urb = NULL;
tasklet_init(&cs->write_tasklet,
gigaset_modem_fill, (unsigned long) cs);
@@ -685,6 +681,11 @@ static int gigaset_probe(struct usb_interface *interface,
return -ENODEV;
}
+ if (hostif->desc.bNumEndpoints < 2) {
+ dev_err(&interface->dev, "missing endpoints\n");
+ return -ENODEV;
+ }
+
dev_info(&udev->dev, "%s: Device matched ... !\n", __func__);
/* allocate memory for our device state and initialize it */
@@ -704,6 +705,12 @@ static int gigaset_probe(struct usb_interface *interface,
endpoint = &hostif->endpoint[0].desc;
+ if (!usb_endpoint_is_bulk_out(endpoint)) {
+ dev_err(&interface->dev, "missing bulk-out endpoint\n");
+ retval = -ENODEV;
+ goto error;
+ }
+
buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
ucs->bulk_out_size = buffer_size;
ucs->bulk_out_epnum = usb_endpoint_num(endpoint);
@@ -723,6 +730,12 @@ static int gigaset_probe(struct usb_interface *interface,
endpoint = &hostif->endpoint[1].desc;
+ if (!usb_endpoint_is_int_in(endpoint)) {
+ dev_err(&interface->dev, "missing int-in endpoint\n");
+ retval = -ENODEV;
+ goto error;
+ }
+
ucs->busy = 0;
ucs->read_urb = usb_alloc_urb(0, GFP_KERNEL);
diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c
index 3cffc8be6656..211dd4a11cac 100644
--- a/drivers/staging/ks7010/ks_wlan_net.c
+++ b/drivers/staging/ks7010/ks_wlan_net.c
@@ -45,7 +45,7 @@ struct wep_key {
* function prototypes
*/
static int ks_wlan_open(struct net_device *dev);
-static void ks_wlan_tx_timeout(struct net_device *dev);
+static void ks_wlan_tx_timeout(struct net_device *dev, unsigned int txqueue);
static int ks_wlan_start_xmit(struct sk_buff *skb, struct net_device *dev);
static int ks_wlan_close(struct net_device *dev);
static void ks_wlan_set_rx_mode(struct net_device *dev);
@@ -2498,7 +2498,7 @@ int ks_wlan_set_mac_address(struct net_device *dev, void *addr)
}
static
-void ks_wlan_tx_timeout(struct net_device *dev)
+void ks_wlan_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct ks_wlan_private *priv = netdev_priv(dev);
diff --git a/drivers/staging/media/ipu3/include/intel-ipu3.h b/drivers/staging/media/ipu3/include/intel-ipu3.h
index 08eaa0bad0de..1c9c3ba4d518 100644
--- a/drivers/staging/media/ipu3/include/intel-ipu3.h
+++ b/drivers/staging/media/ipu3/include/intel-ipu3.h
@@ -449,7 +449,7 @@ struct ipu3_uapi_awb_fr_config_s {
__u16 reserved1;
__u32 bayer_sign;
__u8 bayer_nf;
- __u8 reserved2[3];
+ __u8 reserved2[7];
} __attribute__((aligned(32))) __packed;
/**
diff --git a/drivers/staging/octeon/Kconfig b/drivers/staging/octeon/Kconfig
index 5319909eb2f6..e7f4ddcc1361 100644
--- a/drivers/staging/octeon/Kconfig
+++ b/drivers/staging/octeon/Kconfig
@@ -3,6 +3,7 @@ config OCTEON_ETHERNET
tristate "Cavium Networks Octeon Ethernet support"
depends on CAVIUM_OCTEON_SOC || COMPILE_TEST
depends on NETDEVICES
+ depends on BROKEN
select PHYLIB
select MDIO_OCTEON
help
diff --git a/drivers/staging/qlge/qlge_ethtool.c b/drivers/staging/qlge/qlge_ethtool.c
index a6886cc5654c..56d116d79e56 100644
--- a/drivers/staging/qlge/qlge_ethtool.c
+++ b/drivers/staging/qlge/qlge_ethtool.c
@@ -41,7 +41,7 @@ struct ql_stats {
int stat_offset;
};
-#define QL_SIZEOF(m) FIELD_SIZEOF(struct ql_adapter, m)
+#define QL_SIZEOF(m) sizeof_field(struct ql_adapter, m)
#define QL_OFF(m) offsetof(struct ql_adapter, m)
static const struct ql_stats ql_gstrings_stats[] = {
diff --git a/drivers/staging/qlge/qlge_main.c b/drivers/staging/qlge/qlge_main.c
index 6ad4515311f7..24d20f000435 100644
--- a/drivers/staging/qlge/qlge_main.c
+++ b/drivers/staging/qlge/qlge_main.c
@@ -4274,7 +4274,7 @@ static int qlge_set_mac_address(struct net_device *ndev, void *p)
return status;
}
-static void qlge_tx_timeout(struct net_device *ndev)
+static void qlge_tx_timeout(struct net_device *ndev, unsigned int txqueue)
{
struct ql_adapter *qdev = netdev_priv(ndev);
ql_queue_asic_error(qdev);
diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
index 4fac9dca798e..a7cac0719b8b 100644
--- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c
+++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c
@@ -70,7 +70,7 @@ static struct dvobj_priv *usb_dvobj_init(struct usb_interface *usb_intf)
phost_conf = pusbd->actconfig;
pconf_desc = &phost_conf->desc;
- phost_iface = &usb_intf->altsetting[0];
+ phost_iface = usb_intf->cur_altsetting;
piface_desc = &phost_iface->desc;
pdvobjpriv->NumInterfaces = pconf_desc->bNumInterfaces;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index dace81a7d1ba..a51d627284d1 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -267,7 +267,7 @@ static short _rtl92e_check_nic_enough_desc(struct net_device *dev, int prio)
return 0;
}
-static void _rtl92e_tx_timeout(struct net_device *dev)
+static void _rtl92e_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct r8192_priv *priv = rtllib_priv(dev);
diff --git a/drivers/staging/rtl8192u/r8192U_core.c b/drivers/staging/rtl8192u/r8192U_core.c
index 7e2cabd16e88..482382a887f8 100644
--- a/drivers/staging/rtl8192u/r8192U_core.c
+++ b/drivers/staging/rtl8192u/r8192U_core.c
@@ -640,7 +640,7 @@ short check_nic_enough_desc(struct net_device *dev, int queue_index)
return (used < MAX_TX_URB);
}
-static void tx_timeout(struct net_device *dev)
+static void tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct r8192_priv *priv = ieee80211_priv(dev);
diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c
index ba1288297ee4..a87562f632a7 100644
--- a/drivers/staging/rtl8712/usb_intf.c
+++ b/drivers/staging/rtl8712/usb_intf.c
@@ -247,7 +247,7 @@ static uint r8712_usb_dvobj_init(struct _adapter *padapter)
pdvobjpriv->padapter = padapter;
padapter->eeprom_address_size = 6;
- phost_iface = &pintf->altsetting[0];
+ phost_iface = pintf->cur_altsetting;
piface_desc = &phost_iface->desc;
pdvobjpriv->nr_endpoint = piface_desc->bNumEndpoints;
if (pusbd->speed == USB_SPEED_HIGH) {
diff --git a/drivers/staging/unisys/visornic/visornic_main.c b/drivers/staging/unisys/visornic/visornic_main.c
index 1d1440d43002..0433536930a9 100644
--- a/drivers/staging/unisys/visornic/visornic_main.c
+++ b/drivers/staging/unisys/visornic/visornic_main.c
@@ -1078,7 +1078,7 @@ out_save_flags:
* Queue the work and return. Make sure we have not already been informed that
* the IO Partition is gone; if so, we will have already timed-out the xmits.
*/
-static void visornic_xmit_timeout(struct net_device *netdev)
+static void visornic_xmit_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct visornic_devdata *devdata = netdev_priv(netdev);
unsigned long flags;
diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
index 02148a24818a..4458c1e60fa3 100644
--- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
+++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c
@@ -3309,7 +3309,7 @@ static int __init vchiq_driver_init(void)
return 0;
region_unregister:
- platform_driver_unregister(&vchiq_driver);
+ unregister_chrdev_region(vchiq_devid, 1);
class_destroy:
class_destroy(vchiq_class);
diff --git a/drivers/staging/wfx/data_tx.c b/drivers/staging/wfx/data_tx.c
index b722e9773232..b13d7341f8bb 100644
--- a/drivers/staging/wfx/data_tx.c
+++ b/drivers/staging/wfx/data_tx.c
@@ -16,7 +16,7 @@
#include "traces.h"
#include "hif_tx_mib.h"
-#define WFX_INVALID_RATE_ID (0xFF)
+#define WFX_INVALID_RATE_ID 15
#define WFX_LINK_ID_NO_ASSOC 15
#define WFX_LINK_ID_GC_TIMEOUT ((unsigned long)(10 * HZ))
@@ -184,7 +184,7 @@ static int wfx_tx_policy_get(struct wfx_vif *wvif,
*/
entry = list_entry(cache->free.prev, struct tx_policy, link);
memcpy(entry->rates, wanted.rates, sizeof(entry->rates));
- entry->uploaded = 0;
+ entry->uploaded = false;
entry->usage_count = 0;
idx = entry - cache->cache;
}
@@ -202,6 +202,8 @@ static void wfx_tx_policy_put(struct wfx_vif *wvif, int idx)
int usage, locked;
struct tx_policy_cache *cache = &wvif->tx_policy_cache;
+ if (idx == WFX_INVALID_RATE_ID)
+ return;
spin_lock_bh(&cache->lock);
locked = list_empty(&cache->free);
usage = wfx_tx_policy_release(cache, &cache->cache[idx]);
@@ -239,7 +241,7 @@ static int wfx_tx_policy_upload(struct wfx_vif *wvif)
dst->terminate = 1;
dst->count_init = 1;
memcpy(&dst->rates, src->rates, sizeof(src->rates));
- src->uploaded = 1;
+ src->uploaded = true;
arg->num_tx_rate_policies++;
}
}
@@ -249,7 +251,7 @@ static int wfx_tx_policy_upload(struct wfx_vif *wvif)
return 0;
}
-static void wfx_tx_policy_upload_work(struct work_struct *work)
+void wfx_tx_policy_upload_work(struct work_struct *work)
{
struct wfx_vif *wvif =
container_of(work, struct wfx_vif, tx_policy_upload_work);
@@ -270,7 +272,6 @@ void wfx_tx_policy_init(struct wfx_vif *wvif)
spin_lock_init(&cache->lock);
INIT_LIST_HEAD(&cache->used);
INIT_LIST_HEAD(&cache->free);
- INIT_WORK(&wvif->tx_policy_upload_work, wfx_tx_policy_upload_work);
for (i = 0; i < HIF_MIB_NUM_TX_RATE_RETRY_POLICIES; ++i)
list_add(&cache->cache[i].link, &cache->free);
@@ -523,9 +524,9 @@ static void wfx_tx_fixup_rates(struct ieee80211_tx_rate *rates)
for (i = 0; i < IEEE80211_TX_MAX_RATES - 1; i++) {
if (rates[i + 1].idx == rates[i].idx &&
rates[i].idx != -1) {
- rates[i].count =
- max_t(int, rates[i].count,
- rates[i + 1].count);
+ rates[i].count += rates[i + 1].count;
+ if (rates[i].count > 15)
+ rates[i].count = 15;
rates[i + 1].idx = -1;
rates[i + 1].count = 0;
@@ -537,6 +538,17 @@ static void wfx_tx_fixup_rates(struct ieee80211_tx_rate *rates)
}
}
} while (!finished);
+ // Ensure that MCS0 or 1Mbps is present at the end of the retry list
+ for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+ if (rates[i].idx == 0)
+ break;
+ if (rates[i].idx == -1) {
+ rates[i].idx = 0;
+ rates[i].count = 8; // == hw->max_rate_tries
+ rates[i].flags = rates[i - 1].flags & IEEE80211_TX_RC_MCS;
+ break;
+ }
+ }
// All retries use long GI
for (i = 1; i < IEEE80211_TX_MAX_RATES; i++)
rates[i].flags &= ~IEEE80211_TX_RC_SHORT_GI;
@@ -550,7 +562,8 @@ static u8 wfx_tx_get_rate_id(struct wfx_vif *wvif,
rate_id = wfx_tx_policy_get(wvif,
tx_info->driver_rates, &tx_policy_renew);
- WARN(rate_id == WFX_INVALID_RATE_ID, "unable to get a valid Tx policy");
+ if (rate_id == WFX_INVALID_RATE_ID)
+ dev_warn(wvif->wdev->dev, "unable to get a valid Tx policy");
if (tx_policy_renew) {
/* FIXME: It's not so optimal to stop TX queues every now and
@@ -679,7 +692,7 @@ void wfx_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control,
struct ieee80211_sta *sta = control ? control->sta : NULL;
struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
- size_t driver_data_room = FIELD_SIZEOF(struct ieee80211_tx_info,
+ size_t driver_data_room = sizeof_field(struct ieee80211_tx_info,
rate_driver_data);
compiletime_assert(sizeof(struct wfx_tx_priv) <= driver_data_room,
@@ -735,7 +748,9 @@ void wfx_tx_confirm_cb(struct wfx_vif *wvif, struct hif_cnf_tx *arg)
rate = &tx_info->status.rates[i];
if (rate->idx < 0)
break;
- if (tx_count < rate->count && arg->status && arg->ack_failures)
+ if (tx_count < rate->count &&
+ arg->status == HIF_STATUS_RETRY_EXCEEDED &&
+ arg->ack_failures)
dev_dbg(wvif->wdev->dev, "all retries were not consumed: %d != %d\n",
rate->count, tx_count);
if (tx_count <= rate->count && tx_count &&
diff --git a/drivers/staging/wfx/data_tx.h b/drivers/staging/wfx/data_tx.h
index 29faa5640516..0fc388db62e0 100644
--- a/drivers/staging/wfx/data_tx.h
+++ b/drivers/staging/wfx/data_tx.h
@@ -39,9 +39,9 @@ struct wfx_link_entry {
struct tx_policy {
struct list_head link;
+ int usage_count;
u8 rates[12];
- u8 usage_count;
- u8 uploaded;
+ bool uploaded;
};
struct tx_policy_cache {
@@ -61,6 +61,7 @@ struct wfx_tx_priv {
} __packed;
void wfx_tx_policy_init(struct wfx_vif *wvif);
+void wfx_tx_policy_upload_work(struct work_struct *work);
void wfx_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control,
struct sk_buff *skb);
diff --git a/drivers/staging/wfx/hif_tx_mib.h b/drivers/staging/wfx/hif_tx_mib.h
index bb091e395ff5..9be74881c56c 100644
--- a/drivers/staging/wfx/hif_tx_mib.h
+++ b/drivers/staging/wfx/hif_tx_mib.h
@@ -147,7 +147,6 @@ static inline int hif_set_mfp(struct wfx_vif *wvif, bool capable, bool required)
}
if (!required)
val.unpmf_allowed = 1;
- cpu_to_le32s((u32 *) &val);
return hif_write_mib(wvif->wdev, wvif->id,
HIF_MIB_ID_PROTECTED_MGMT_POLICY,
&val, sizeof(val));
diff --git a/drivers/staging/wfx/main.c b/drivers/staging/wfx/main.c
index 986a2ef678b9..3b47b6c21ea1 100644
--- a/drivers/staging/wfx/main.c
+++ b/drivers/staging/wfx/main.c
@@ -289,7 +289,7 @@ struct wfx_dev *wfx_init_common(struct device *dev,
hw->sta_data_size = sizeof(struct wfx_sta_priv);
hw->queues = 4;
hw->max_rates = 8;
- hw->max_rate_tries = 15;
+ hw->max_rate_tries = 8;
hw->extra_tx_headroom = sizeof(struct hif_sl_msg_hdr) +
sizeof(struct hif_msg)
+ sizeof(struct hif_req_tx)
diff --git a/drivers/staging/wfx/queue.c b/drivers/staging/wfx/queue.c
index c7ee90888f69..680fed31cefb 100644
--- a/drivers/staging/wfx/queue.c
+++ b/drivers/staging/wfx/queue.c
@@ -422,6 +422,7 @@ static bool hif_handle_tx_data(struct wfx_vif *wvif, struct sk_buff *skb,
break;
case do_wep:
wfx_tx_lock(wvif->wdev);
+ WARN_ON(wvif->wep_pending_skb);
wvif->wep_default_key_id = tx_priv->hw_key->keyidx;
wvif->wep_pending_skb = skb;
if (!schedule_work(&wvif->wep_key_work))
diff --git a/drivers/staging/wfx/sta.c b/drivers/staging/wfx/sta.c
index 29848a202ab4..471dd15b227f 100644
--- a/drivers/staging/wfx/sta.c
+++ b/drivers/staging/wfx/sta.c
@@ -592,6 +592,7 @@ static void wfx_do_unjoin(struct wfx_vif *wvif)
wfx_tx_flush(wvif->wdev);
hif_keep_alive_period(wvif, 0);
hif_reset(wvif, false);
+ wfx_tx_policy_init(wvif);
hif_set_output_power(wvif, wvif->wdev->output_power * 10);
wvif->dtim_period = 0;
hif_set_macaddr(wvif, wvif->vif->addr);
@@ -880,8 +881,10 @@ static int wfx_update_beaconing(struct wfx_vif *wvif)
if (wvif->state != WFX_STATE_AP ||
wvif->beacon_int != conf->beacon_int) {
wfx_tx_lock_flush(wvif->wdev);
- if (wvif->state != WFX_STATE_PASSIVE)
+ if (wvif->state != WFX_STATE_PASSIVE) {
hif_reset(wvif, false);
+ wfx_tx_policy_init(wvif);
+ }
wvif->state = WFX_STATE_PASSIVE;
wfx_start_ap(wvif);
wfx_tx_unlock(wvif->wdev);
@@ -1567,6 +1570,7 @@ int wfx_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
INIT_WORK(&wvif->set_cts_work, wfx_set_cts_work);
INIT_WORK(&wvif->unjoin_work, wfx_unjoin_work);
+ INIT_WORK(&wvif->tx_policy_upload_work, wfx_tx_policy_upload_work);
mutex_unlock(&wdev->conf_mutex);
hif_set_macaddr(wvif, vif->addr);
diff --git a/drivers/staging/wlan-ng/Kconfig b/drivers/staging/wlan-ng/Kconfig
index ac136663fa8e..082c16a31616 100644
--- a/drivers/staging/wlan-ng/Kconfig
+++ b/drivers/staging/wlan-ng/Kconfig
@@ -4,6 +4,7 @@ config PRISM2_USB
depends on WLAN && USB && CFG80211
select WIRELESS_EXT
select WEXT_PRIV
+ select CRC32
help
This is the wlan-ng prism 2.5/3 USB driver for a wide range of
old USB wireless devices.
diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c
index a70fb84f38f1..b809c0015c0c 100644
--- a/drivers/staging/wlan-ng/p80211netdev.c
+++ b/drivers/staging/wlan-ng/p80211netdev.c
@@ -101,7 +101,7 @@ static void p80211knetdev_set_multicast_list(struct net_device *dev);
static int p80211knetdev_do_ioctl(struct net_device *dev, struct ifreq *ifr,
int cmd);
static int p80211knetdev_set_mac_address(struct net_device *dev, void *addr);
-static void p80211knetdev_tx_timeout(struct net_device *netdev);
+static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue);
static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc);
int wlan_watchdog = 5000;
@@ -1074,7 +1074,7 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc)
return drop;
}
-static void p80211knetdev_tx_timeout(struct net_device *netdev)
+static void p80211knetdev_tx_timeout(struct net_device *netdev, unsigned int txqueue)
{
struct wlandevice *wlandev = netdev->ml_priv;
diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c
index e877b917c15f..30ea37e1a3f5 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_main.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c
@@ -708,7 +708,7 @@ static int __init cxgbit_init(void)
pr_info("%s dcb enabled.\n", DRV_NAME);
register_dcbevent_notifier(&cxgbit_dcbevent_nb);
#endif
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, cb) <
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, cb) <
sizeof(union cxgbit_skb_cb));
return 0;
}
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 6949ea8bc387..51ffd5c002de 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -646,7 +646,9 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio,
}
bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
- bip_set_seed(bip, bio->bi_iter.bi_sector);
+ /* virtual start sector must be in integrity interval units */
+ bip_set_seed(bip, bio->bi_iter.bi_sector >>
+ (bi->interval_exp - SECTOR_SHIFT));
pr_debug("IBLOCK BIP Size: %u Sector: %llu\n", bip->bip_iter.bi_size,
(unsigned long long)bip->bip_iter.bi_sector);
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 59b79fc48266..79b27865c6f4 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -108,7 +108,7 @@ config THERMAL_DEFAULT_GOV_USER_SPACE
config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR
bool "power_allocator"
- select THERMAL_GOV_POWER_ALLOCATOR
+ depends on THERMAL_GOV_POWER_ALLOCATOR
help
Select this if you want to control temperature based on
system and device power allocation. This governor can only
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 36a3eb4ad4c5..f1c90fa2978e 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -2704,7 +2704,7 @@ static netdev_tx_t gsm_mux_net_start_xmit(struct sk_buff *skb,
}
/* called when a packet did not ack after watchdogtimeout */
-static void gsm_mux_net_tx_timeout(struct net_device *net)
+static void gsm_mux_net_tx_timeout(struct net_device *net, unsigned int txqueue)
{
/* Tell syslog we are hosed. */
dev_dbg(&net->dev, "Tx timed out.\n");
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index a8dc8af83f39..1ba9bc667e13 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -2270,27 +2270,6 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
mode |= ATMEL_US_USMODE_NORMAL;
}
- /* set the mode, clock divisor, parity, stop bits and data size */
- atmel_uart_writel(port, ATMEL_US_MR, mode);
-
- /*
- * when switching the mode, set the RTS line state according to the
- * new mode, otherwise keep the former state
- */
- if ((old_mode & ATMEL_US_USMODE) != (mode & ATMEL_US_USMODE)) {
- unsigned int rts_state;
-
- if ((mode & ATMEL_US_USMODE) == ATMEL_US_USMODE_HWHS) {
- /* let the hardware control the RTS line */
- rts_state = ATMEL_US_RTSDIS;
- } else {
- /* force RTS line to low level */
- rts_state = ATMEL_US_RTSEN;
- }
-
- atmel_uart_writel(port, ATMEL_US_CR, rts_state);
- }
-
/*
* Set the baud rate:
* Fractional baudrate allows to setup output frequency more
@@ -2317,6 +2296,28 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
if (!(port->iso7816.flags & SER_ISO7816_ENABLED))
atmel_uart_writel(port, ATMEL_US_BRGR, quot);
+
+ /* set the mode, clock divisor, parity, stop bits and data size */
+ atmel_uart_writel(port, ATMEL_US_MR, mode);
+
+ /*
+ * when switching the mode, set the RTS line state according to the
+ * new mode, otherwise keep the former state
+ */
+ if ((old_mode & ATMEL_US_USMODE) != (mode & ATMEL_US_USMODE)) {
+ unsigned int rts_state;
+
+ if ((mode & ATMEL_US_USMODE) == ATMEL_US_USMODE_HWHS) {
+ /* let the hardware control the RTS line */
+ rts_state = ATMEL_US_RTSDIS;
+ } else {
+ /* force RTS line to low level */
+ rts_state = ATMEL_US_RTSEN;
+ }
+
+ atmel_uart_writel(port, ATMEL_US_CR, rts_state);
+ }
+
atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_RSTSTA | ATMEL_US_RSTRX);
atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN | ATMEL_US_RXEN);
atmel_port->tx_stopped = false;
diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index 1cbae0768b1f..f6c45a796433 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -1580,6 +1580,7 @@ static void __msm_console_write(struct uart_port *port, const char *s,
int num_newlines = 0;
bool replaced = false;
void __iomem *tf;
+ int locked = 1;
if (is_uartdm)
tf = port->membase + UARTDM_TF;
@@ -1592,7 +1593,13 @@ static void __msm_console_write(struct uart_port *port, const char *s,
num_newlines++;
count += num_newlines;
- spin_lock(&port->lock);
+ if (port->sysrq)
+ locked = 0;
+ else if (oops_in_progress)
+ locked = spin_trylock(&port->lock);
+ else
+ spin_lock(&port->lock);
+
if (is_uartdm)
msm_reset_dm_count(port, count);
@@ -1628,7 +1635,9 @@ static void __msm_console_write(struct uart_port *port, const char *s,
iowrite32_rep(tf, buf, 1);
i += num_chars;
}
- spin_unlock(&port->lock);
+
+ if (locked)
+ spin_unlock(&port->lock);
}
static void msm_console_write(struct console *co, const char *s,
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index b0a6eb106edb..7c2782785736 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2834,6 +2834,7 @@ int uart_add_one_port(struct uart_driver *drv, struct uart_port *uport)
if (uport->cons && uport->dev)
of_console_check(uport->dev->of_node, uport->cons->name, uport->line);
+ tty_port_link_device(port, drv->tty_driver, uport->line);
uart_configure_port(drv, state, uport);
port->console = uart_console(uport);
diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c
index 31df23502562..f60a59d9bf27 100644
--- a/drivers/tty/serial/sprd_serial.c
+++ b/drivers/tty/serial/sprd_serial.c
@@ -679,6 +679,9 @@ static irqreturn_t sprd_handle_irq(int irq, void *dev_id)
if (ims & SPRD_IMSR_TIMEOUT)
serial_out(port, SPRD_ICLR, SPRD_ICLR_TIMEOUT);
+ if (ims & SPRD_IMSR_BREAK_DETECT)
+ serial_out(port, SPRD_ICLR, SPRD_IMSR_BREAK_DETECT);
+
if (ims & (SPRD_IMSR_RX_FIFO_FULL | SPRD_IMSR_BREAK_DETECT |
SPRD_IMSR_TIMEOUT))
sprd_rx(port);
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 84f26e43b229..61dc6b4a43d0 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -7837,7 +7837,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
*
* dev pointer to network device structure
*/
-static void hdlcdev_tx_timeout(struct net_device *dev)
+static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct mgsl_struct *info = dev_to_port(dev);
unsigned long flags;
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index e8a9047de451..5d59e2369c8a 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1682,7 +1682,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
*
* dev pointer to network device structure
*/
-static void hdlcdev_tx_timeout(struct net_device *dev)
+static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
struct slgt_info *info = dev_to_port(dev);
unsigned long flags;
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index fcb91bf7a15b..33181fa6eb18 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -1807,7 +1807,7 @@ static int hdlcdev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
*
* dev pointer to network device structure
*/
-static void hdlcdev_tx_timeout(struct net_device *dev)
+static void hdlcdev_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
SLMP_INFO *info = dev_to_port(dev);
unsigned long flags;
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index 044c3cbdcfa4..5023c85ebc6e 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -89,7 +89,8 @@ void tty_port_link_device(struct tty_port *port,
{
if (WARN_ON(index >= driver->num))
return;
- driver->ports[index] = port;
+ if (!driver->ports[index])
+ driver->ports[index] = port;
}
EXPORT_SYMBOL_GPL(tty_port_link_device);
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index 8b0ea8c70d73..635cf0466b59 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -2124,10 +2124,11 @@ resubmit:
/*
* Start the modem : init the data and start kernel thread
*/
-static int uea_boot(struct uea_softc *sc)
+static int uea_boot(struct uea_softc *sc, struct usb_interface *intf)
{
- int ret, size;
struct intr_pkt *intr;
+ int ret = -ENOMEM;
+ int size;
uea_enters(INS_TO_USBDEV(sc));
@@ -2152,6 +2153,11 @@ static int uea_boot(struct uea_softc *sc)
if (UEA_CHIP_VERSION(sc) == ADI930)
load_XILINX_firmware(sc);
+ if (intf->cur_altsetting->desc.bNumEndpoints < 1) {
+ ret = -ENODEV;
+ goto err0;
+ }
+
intr = kmalloc(size, GFP_KERNEL);
if (!intr)
goto err0;
@@ -2163,8 +2169,7 @@ static int uea_boot(struct uea_softc *sc)
usb_fill_int_urb(sc->urb_int, sc->usb_dev,
usb_rcvintpipe(sc->usb_dev, UEA_INTR_PIPE),
intr, size, uea_intr, sc,
- sc->usb_dev->actconfig->interface[0]->altsetting[0].
- endpoint[0].desc.bInterval);
+ intf->cur_altsetting->endpoint[0].desc.bInterval);
ret = usb_submit_urb(sc->urb_int, GFP_KERNEL);
if (ret < 0) {
@@ -2179,6 +2184,7 @@ static int uea_boot(struct uea_softc *sc)
sc->kthread = kthread_create(uea_kthread, sc, "ueagle-atm");
if (IS_ERR(sc->kthread)) {
uea_err(INS_TO_USBDEV(sc), "failed to create thread\n");
+ ret = PTR_ERR(sc->kthread);
goto err2;
}
@@ -2193,7 +2199,7 @@ err1:
kfree(intr);
err0:
uea_leaves(INS_TO_USBDEV(sc));
- return -ENOMEM;
+ return ret;
}
/*
@@ -2548,7 +2554,7 @@ static int uea_bind(struct usbatm_data *usbatm, struct usb_interface *intf,
}
}
- ret = uea_boot(sc);
+ ret = uea_boot(sc, intf);
if (ret < 0)
goto error;
diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c
index dbea28495e1d..4e12a32ca392 100644
--- a/drivers/usb/atm/usbatm.c
+++ b/drivers/usb/atm/usbatm.c
@@ -1275,7 +1275,7 @@ EXPORT_SYMBOL_GPL(usbatm_usb_disconnect);
static int __init usbatm_usb_init(void)
{
- if (sizeof(struct usbatm_control) > FIELD_SIZEOF(struct sk_buff, cb)) {
+ if (sizeof(struct usbatm_control) > sizeof_field(struct sk_buff, cb)) {
printk(KERN_ERR "%s unusable with this kernel!\n", usbatm_driver_name);
return -EIO;
}
diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c
index 87338f9eb5be..ed204cbb63ea 100644
--- a/drivers/usb/common/usb-conn-gpio.c
+++ b/drivers/usb/common/usb-conn-gpio.c
@@ -156,7 +156,8 @@ static int usb_conn_probe(struct platform_device *pdev)
info->vbus = devm_regulator_get(dev, "vbus");
if (IS_ERR(info->vbus)) {
- dev_err(dev, "failed to get vbus\n");
+ if (PTR_ERR(info->vbus) != -EPROBE_DEFER)
+ dev_err(dev, "failed to get vbus\n");
return PTR_ERR(info->vbus);
}
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 281568d464f9..aa45840d8273 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1409,7 +1409,17 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
if (usb_endpoint_xfer_control(&urb->ep->desc)) {
if (hcd->self.uses_pio_for_control)
return ret;
- if (hcd_uses_dma(hcd)) {
+ if (hcd->localmem_pool) {
+ ret = hcd_alloc_coherent(
+ urb->dev->bus, mem_flags,
+ &urb->setup_dma,
+ (void **)&urb->setup_packet,
+ sizeof(struct usb_ctrlrequest),
+ DMA_TO_DEVICE);
+ if (ret)
+ return ret;
+ urb->transfer_flags |= URB_SETUP_MAP_LOCAL;
+ } else if (hcd_uses_dma(hcd)) {
if (object_is_on_stack(urb->setup_packet)) {
WARN_ONCE(1, "setup packet is on stack\n");
return -EAGAIN;
@@ -1424,23 +1434,22 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
urb->setup_dma))
return -EAGAIN;
urb->transfer_flags |= URB_SETUP_MAP_SINGLE;
- } else if (hcd->localmem_pool) {
- ret = hcd_alloc_coherent(
- urb->dev->bus, mem_flags,
- &urb->setup_dma,
- (void **)&urb->setup_packet,
- sizeof(struct usb_ctrlrequest),
- DMA_TO_DEVICE);
- if (ret)
- return ret;
- urb->transfer_flags |= URB_SETUP_MAP_LOCAL;
}
}
dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
if (urb->transfer_buffer_length != 0
&& !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
- if (hcd_uses_dma(hcd)) {
+ if (hcd->localmem_pool) {
+ ret = hcd_alloc_coherent(
+ urb->dev->bus, mem_flags,
+ &urb->transfer_dma,
+ &urb->transfer_buffer,
+ urb->transfer_buffer_length,
+ dir);
+ if (ret == 0)
+ urb->transfer_flags |= URB_MAP_LOCAL;
+ } else if (hcd_uses_dma(hcd)) {
if (urb->num_sgs) {
int n;
@@ -1491,15 +1500,6 @@ int usb_hcd_map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
else
urb->transfer_flags |= URB_DMA_MAP_SINGLE;
}
- } else if (hcd->localmem_pool) {
- ret = hcd_alloc_coherent(
- urb->dev->bus, mem_flags,
- &urb->transfer_dma,
- &urb->transfer_buffer,
- urb->transfer_buffer_length,
- dir);
- if (ret == 0)
- urb->transfer_flags |= URB_MAP_LOCAL;
}
if (ret && (urb->transfer_flags & (URB_SETUP_MAP_SINGLE |
URB_SETUP_MAP_LOCAL)))
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 0eab79f82ce4..da923ec17612 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -45,6 +45,7 @@ void usb_init_urb(struct urb *urb)
if (urb) {
memset(urb, 0, sizeof(*urb));
kref_init(&urb->kref);
+ INIT_LIST_HEAD(&urb->urb_list);
INIT_LIST_HEAD(&urb->anchor_list);
}
}
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 023f0357efd7..294276f7deb9 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -29,7 +29,8 @@
#define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa
#define PCI_DEVICE_ID_INTEL_APL 0x5aaa
#define PCI_DEVICE_ID_INTEL_KBP 0xa2b0
-#define PCI_DEVICE_ID_INTEL_CMLH 0x02ee
+#define PCI_DEVICE_ID_INTEL_CMLLP 0x02ee
+#define PCI_DEVICE_ID_INTEL_CMLH 0x06ee
#define PCI_DEVICE_ID_INTEL_GLK 0x31aa
#define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee
#define PCI_DEVICE_ID_INTEL_CNPH 0xa36e
@@ -308,6 +309,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD),
(kernel_ulong_t) &dwc3_pci_mrfld_properties, },
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLLP),
+ (kernel_ulong_t) &dwc3_pci_intel_properties, },
+
{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLH),
(kernel_ulong_t) &dwc3_pci_intel_properties, },
diff --git a/drivers/usb/dwc3/ep0.c b/drivers/usb/dwc3/ep0.c
index 3996b9c4ff8d..fd1b100d2927 100644
--- a/drivers/usb/dwc3/ep0.c
+++ b/drivers/usb/dwc3/ep0.c
@@ -1117,6 +1117,9 @@ static void dwc3_ep0_xfernotready(struct dwc3 *dwc,
void dwc3_ep0_interrupt(struct dwc3 *dwc,
const struct dwc3_event_depevt *event)
{
+ struct dwc3_ep *dep = dwc->eps[event->endpoint_number];
+ u8 cmd;
+
switch (event->endpoint_event) {
case DWC3_DEPEVT_XFERCOMPLETE:
dwc3_ep0_xfer_complete(dwc, event);
@@ -1129,7 +1132,12 @@ void dwc3_ep0_interrupt(struct dwc3 *dwc,
case DWC3_DEPEVT_XFERINPROGRESS:
case DWC3_DEPEVT_RXTXFIFOEVT:
case DWC3_DEPEVT_STREAMEVT:
+ break;
case DWC3_DEPEVT_EPCMDCMPLT:
+ cmd = DEPEVT_PARAMETER_CMD(event->parameters);
+
+ if (cmd == DWC3_DEPCMD_ENDTRANSFER)
+ dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
break;
}
}
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index a9aba716bf80..0c960a97ea02 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2491,7 +2491,7 @@ static int dwc3_gadget_ep_cleanup_completed_request(struct dwc3_ep *dep,
req->request.actual = req->request.length - req->remaining;
- if (!dwc3_gadget_ep_request_completed(req) &&
+ if (!dwc3_gadget_ep_request_completed(req) ||
req->num_pending_sgs) {
__dwc3_gadget_kick_transfer(dep);
goto out;
@@ -2719,6 +2719,9 @@ static void dwc3_stop_active_transfer(struct dwc3_ep *dep, bool force,
WARN_ON_ONCE(ret);
dep->resource_index = 0;
+ if (!interrupt)
+ dep->flags &= ~DWC3_EP_TRANSFER_STARTED;
+
if (dwc3_is_usb31(dwc) || dwc->revision < DWC3_REVISION_310A)
udelay(100);
}
diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c
index 6ce044008cf6..460d5d7c984f 100644
--- a/drivers/usb/gadget/function/f_ecm.c
+++ b/drivers/usb/gadget/function/f_ecm.c
@@ -621,8 +621,12 @@ static void ecm_disable(struct usb_function *f)
DBG(cdev, "ecm deactivated\n");
- if (ecm->port.in_ep->enabled)
+ if (ecm->port.in_ep->enabled) {
gether_disconnect(&ecm->port);
+ } else {
+ ecm->port.in_ep->desc = NULL;
+ ecm->port.out_ep->desc = NULL;
+ }
usb_ep_disable(ecm->notify);
ecm->notify->desc = NULL;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index ce1d0235969c..0bbccac94d6c 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -3509,7 +3509,7 @@ static void ffs_free_inst(struct usb_function_instance *f)
static int ffs_set_inst_name(struct usb_function_instance *fi, const char *name)
{
- if (strlen(name) >= FIELD_SIZEOF(struct ffs_dev, name))
+ if (strlen(name) >= sizeof_field(struct ffs_dev, name))
return -ENAMETOOLONG;
return ffs_name_dev(to_f_fs_opts(fi)->dev, name);
}
diff --git a/drivers/usb/gadget/function/f_rndis.c b/drivers/usb/gadget/function/f_rndis.c
index d48df36622b7..0d8e4a364ca6 100644
--- a/drivers/usb/gadget/function/f_rndis.c
+++ b/drivers/usb/gadget/function/f_rndis.c
@@ -618,6 +618,7 @@ static void rndis_disable(struct usb_function *f)
gether_disconnect(&rndis->port);
usb_ep_disable(rndis->notify);
+ rndis->notify->desc = NULL;
}
/*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index aa2f77f1506d..8a5c9b3ebe1e 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -27,6 +27,10 @@
/*-------------------------------------------------------------------------*/
+/* PID Codes that are used here, from EHCI specification, Table 3-16. */
+#define PID_CODE_IN 1
+#define PID_CODE_SETUP 2
+
/* fill a qtd, returning how much of the buffer we were able to queue up */
static int
@@ -190,7 +194,7 @@ static int qtd_copy_status (
int status = -EINPROGRESS;
/* count IN/OUT bytes, not SETUP (even short packets) */
- if (likely (QTD_PID (token) != 2))
+ if (likely(QTD_PID(token) != PID_CODE_SETUP))
urb->actual_length += length - QTD_LENGTH (token);
/* don't modify error codes */
@@ -206,6 +210,13 @@ static int qtd_copy_status (
if (token & QTD_STS_BABBLE) {
/* FIXME "must" disable babbling device's port too */
status = -EOVERFLOW;
+ /*
+ * When MMF is active and PID Code is IN, queue is halted.
+ * EHCI Specification, Table 4-13.
+ */
+ } else if ((token & QTD_STS_MMF) &&
+ (QTD_PID(token) == PID_CODE_IN)) {
+ status = -EPROTO;
/* CERR nonzero + halt --> stall */
} else if (QTD_CERR(token)) {
status = -EPIPE;
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index b7d23c438756..7a3a29e5e9d2 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -806,7 +806,7 @@ static void xhci_del_comp_mod_timer(struct xhci_hcd *xhci, u32 status,
static int xhci_handle_usb2_port_link_resume(struct xhci_port *port,
u32 *status, u32 portsc,
- unsigned long flags)
+ unsigned long *flags)
{
struct xhci_bus_state *bus_state;
struct xhci_hcd *xhci;
@@ -860,11 +860,11 @@ static int xhci_handle_usb2_port_link_resume(struct xhci_port *port,
xhci_test_and_clear_bit(xhci, port, PORT_PLC);
xhci_set_link_state(xhci, port, XDEV_U0);
- spin_unlock_irqrestore(&xhci->lock, flags);
+ spin_unlock_irqrestore(&xhci->lock, *flags);
time_left = wait_for_completion_timeout(
&bus_state->rexit_done[wIndex],
msecs_to_jiffies(XHCI_MAX_REXIT_TIMEOUT_MS));
- spin_lock_irqsave(&xhci->lock, flags);
+ spin_lock_irqsave(&xhci->lock, *flags);
if (time_left) {
slot_id = xhci_find_slot_id_by_port(hcd, xhci,
@@ -920,11 +920,13 @@ static void xhci_get_usb3_port_status(struct xhci_port *port, u32 *status,
{
struct xhci_bus_state *bus_state;
struct xhci_hcd *xhci;
+ struct usb_hcd *hcd;
u32 link_state;
u32 portnum;
bus_state = &port->rhub->bus_state;
xhci = hcd_to_xhci(port->rhub->hcd);
+ hcd = port->rhub->hcd;
link_state = portsc & PORT_PLS_MASK;
portnum = port->hcd_portnum;
@@ -952,12 +954,20 @@ static void xhci_get_usb3_port_status(struct xhci_port *port, u32 *status,
bus_state->suspended_ports &= ~(1 << portnum);
}
+ /* remote wake resume signaling complete */
+ if (bus_state->port_remote_wakeup & (1 << portnum) &&
+ link_state != XDEV_RESUME &&
+ link_state != XDEV_RECOVERY) {
+ bus_state->port_remote_wakeup &= ~(1 << portnum);
+ usb_hcd_end_port_resume(&hcd->self, portnum);
+ }
+
xhci_hub_report_usb3_link_state(xhci, status, portsc);
xhci_del_comp_mod_timer(xhci, portsc, portnum);
}
static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status,
- u32 portsc, unsigned long flags)
+ u32 portsc, unsigned long *flags)
{
struct xhci_bus_state *bus_state;
u32 link_state;
@@ -1007,7 +1017,7 @@ static void xhci_get_usb2_port_status(struct xhci_port *port, u32 *status,
static u32 xhci_get_port_status(struct usb_hcd *hcd,
struct xhci_bus_state *bus_state,
u16 wIndex, u32 raw_port_status,
- unsigned long flags)
+ unsigned long *flags)
__releases(&xhci->lock)
__acquires(&xhci->lock)
{
@@ -1130,7 +1140,7 @@ int xhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
}
trace_xhci_get_port_status(wIndex, temp);
status = xhci_get_port_status(hcd, bus_state, wIndex, temp,
- flags);
+ &flags);
if (status == 0xffffffff)
goto error;
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index e16eda6e2b8b..3b1388fa2f36 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1909,13 +1909,17 @@ no_bw:
xhci->usb3_rhub.num_ports = 0;
xhci->num_active_eps = 0;
kfree(xhci->usb2_rhub.ports);
+ kfree(xhci->usb2_rhub.psi);
kfree(xhci->usb3_rhub.ports);
+ kfree(xhci->usb3_rhub.psi);
kfree(xhci->hw_ports);
kfree(xhci->rh_bw);
kfree(xhci->ext_caps);
xhci->usb2_rhub.ports = NULL;
+ xhci->usb2_rhub.psi = NULL;
xhci->usb3_rhub.ports = NULL;
+ xhci->usb3_rhub.psi = NULL;
xhci->hw_ports = NULL;
xhci->rh_bw = NULL;
xhci->ext_caps = NULL;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index a0025d23b257..4917c5b033fa 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -519,6 +519,18 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
retval = xhci_resume(xhci, hibernated);
return retval;
}
+
+static void xhci_pci_shutdown(struct usb_hcd *hcd)
+{
+ struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ struct pci_dev *pdev = to_pci_dev(hcd->self.controller);
+
+ xhci_shutdown(hcd);
+
+ /* Yet another workaround for spurious wakeups at shutdown with HSW */
+ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
+ pci_set_power_state(pdev, PCI_D3hot);
+}
#endif /* CONFIG_PM */
/*-------------------------------------------------------------------------*/
@@ -556,6 +568,7 @@ static int __init xhci_pci_init(void)
#ifdef CONFIG_PM
xhci_pci_hc_driver.pci_suspend = xhci_pci_suspend;
xhci_pci_hc_driver.pci_resume = xhci_pci_resume;
+ xhci_pci_hc_driver.shutdown = xhci_pci_shutdown;
#endif
return pci_register_driver(&xhci_pci_driver);
}
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 6475c3d3b43b..d23f7408c81f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1628,7 +1628,6 @@ static void handle_port_status(struct xhci_hcd *xhci,
slot_id = xhci_find_slot_id_by_port(hcd, xhci, hcd_portnum + 1);
if (slot_id && xhci->devs[slot_id])
xhci->devs[slot_id]->flags |= VDEV_PORT_ERROR;
- bus_state->port_remote_wakeup &= ~(1 << hcd_portnum);
}
if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_RESUME) {
@@ -1648,6 +1647,7 @@ static void handle_port_status(struct xhci_hcd *xhci,
*/
bus_state->port_remote_wakeup |= 1 << hcd_portnum;
xhci_test_and_clear_bit(xhci, port, PORT_PLC);
+ usb_hcd_start_port_resume(&hcd->self, hcd_portnum);
xhci_set_link_state(xhci, port, XDEV_U0);
/* Need to wait until the next link state change
* indicates the device is actually in U0.
@@ -1688,7 +1688,6 @@ static void handle_port_status(struct xhci_hcd *xhci,
if (slot_id && xhci->devs[slot_id])
xhci_ring_device(xhci, slot_id);
if (bus_state->port_remote_wakeup & (1 << hcd_portnum)) {
- bus_state->port_remote_wakeup &= ~(1 << hcd_portnum);
xhci_test_and_clear_bit(xhci, port, PORT_PLC);
usb_wakeup_notification(hcd->self.root_hub,
hcd_portnum + 1);
@@ -2382,7 +2381,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
case COMP_SUCCESS:
if (EVENT_TRB_LEN(le32_to_cpu(event->transfer_len)) == 0)
break;
- if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
+ if (xhci->quirks & XHCI_TRUST_TX_LENGTH ||
+ ep_ring->last_td_was_short)
trb_comp_code = COMP_SHORT_PACKET;
else
xhci_warn_ratelimited(xhci,
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 6721d059f58a..dbac0fa9748d 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -770,7 +770,7 @@ static void xhci_stop(struct usb_hcd *hcd)
*
* This will only ever be called with the main usb_hcd (the USB3 roothub).
*/
-static void xhci_shutdown(struct usb_hcd *hcd)
+void xhci_shutdown(struct usb_hcd *hcd)
{
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
@@ -789,11 +789,8 @@ static void xhci_shutdown(struct usb_hcd *hcd)
xhci_dbg_trace(xhci, trace_xhci_dbg_init,
"xhci_shutdown completed - status = %x",
readl(&xhci->op_regs->status));
-
- /* Yet another workaround for spurious wakeups at shutdown with HSW */
- if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
- pci_set_power_state(to_pci_dev(hcd->self.sysdev), PCI_D3hot);
}
+EXPORT_SYMBOL_GPL(xhci_shutdown);
#ifdef CONFIG_PM
static void xhci_save_registers(struct xhci_hcd *xhci)
@@ -973,7 +970,7 @@ static bool xhci_pending_portevent(struct xhci_hcd *xhci)
int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
{
int rc = 0;
- unsigned int delay = XHCI_MAX_HALT_USEC;
+ unsigned int delay = XHCI_MAX_HALT_USEC * 2;
struct usb_hcd *hcd = xhci_to_hcd(xhci);
u32 command;
u32 res;
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index dc6f62a4b197..13d8838cd552 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -2050,6 +2050,7 @@ int xhci_start(struct xhci_hcd *xhci);
int xhci_reset(struct xhci_hcd *xhci);
int xhci_run(struct usb_hcd *hcd);
int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks);
+void xhci_shutdown(struct usb_hcd *hcd);
void xhci_init_driver(struct hc_driver *drv,
const struct xhci_driver_overrides *over);
int xhci_disable_slot(struct xhci_hcd *xhci, u32 slot_id);
diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c
index 6f5edb9fc61e..d8d157c4c271 100644
--- a/drivers/usb/misc/adutux.c
+++ b/drivers/usb/misc/adutux.c
@@ -669,7 +669,7 @@ static int adu_probe(struct usb_interface *interface,
init_waitqueue_head(&dev->read_wait);
init_waitqueue_head(&dev->write_wait);
- res = usb_find_common_endpoints_reverse(&interface->altsetting[0],
+ res = usb_find_common_endpoints_reverse(interface->cur_altsetting,
NULL, NULL,
&dev->interrupt_in_endpoint,
&dev->interrupt_out_endpoint);
diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index 4afb5ddfd361..e9437a176518 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -322,7 +322,7 @@ static int idmouse_probe(struct usb_interface *interface,
int result;
/* check if we have gotten the data or the hid interface */
- iface_desc = &interface->altsetting[0];
+ iface_desc = interface->cur_altsetting;
if (iface_desc->desc.bInterfaceClass != 0x0A)
return -ENODEV;
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index ac2b4fcc265f..f48a23adbc35 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1039,12 +1039,18 @@ static long mon_bin_ioctl(struct file *file, unsigned int cmd, unsigned long arg
mutex_lock(&rp->fetch_lock);
spin_lock_irqsave(&rp->b_lock, flags);
- mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE);
- kfree(rp->b_vec);
- rp->b_vec = vec;
- rp->b_size = size;
- rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0;
- rp->cnt_lost = 0;
+ if (rp->mmap_active) {
+ mon_free_buff(vec, size/CHUNK_SIZE);
+ kfree(vec);
+ ret = -EBUSY;
+ } else {
+ mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE);
+ kfree(rp->b_vec);
+ rp->b_vec = vec;
+ rp->b_size = size;
+ rp->b_read = rp->b_in = rp->b_out = rp->b_cnt = 0;
+ rp->cnt_lost = 0;
+ }
spin_unlock_irqrestore(&rp->b_lock, flags);
mutex_unlock(&rp->fetch_lock);
}
@@ -1216,13 +1222,21 @@ mon_bin_poll(struct file *file, struct poll_table_struct *wait)
static void mon_bin_vma_open(struct vm_area_struct *vma)
{
struct mon_reader_bin *rp = vma->vm_private_data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rp->b_lock, flags);
rp->mmap_active++;
+ spin_unlock_irqrestore(&rp->b_lock, flags);
}
static void mon_bin_vma_close(struct vm_area_struct *vma)
{
+ unsigned long flags;
+
struct mon_reader_bin *rp = vma->vm_private_data;
+ spin_lock_irqsave(&rp->b_lock, flags);
rp->mmap_active--;
+ spin_unlock_irqrestore(&rp->b_lock, flags);
}
/*
@@ -1234,16 +1248,12 @@ static vm_fault_t mon_bin_vma_fault(struct vm_fault *vmf)
unsigned long offset, chunk_idx;
struct page *pageptr;
- mutex_lock(&rp->fetch_lock);
offset = vmf->pgoff << PAGE_SHIFT;
- if (offset >= rp->b_size) {
- mutex_unlock(&rp->fetch_lock);
+ if (offset >= rp->b_size)
return VM_FAULT_SIGBUS;
- }
chunk_idx = offset / CHUNK_SIZE;
pageptr = rp->b_vec[chunk_idx].pg;
get_page(pageptr);
- mutex_unlock(&rp->fetch_lock);
vmf->page = pageptr;
return 0;
}
diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
index 8273126ffdf4..63a00ff26655 100644
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -169,8 +169,8 @@ EXPORT_SYMBOL_GPL(fwnode_usb_role_switch_get);
void usb_role_switch_put(struct usb_role_switch *sw)
{
if (!IS_ERR_OR_NULL(sw)) {
- put_device(&sw->dev);
module_put(sw->dev.parent->driver->owner);
+ put_device(&sw->dev);
}
}
EXPORT_SYMBOL_GPL(usb_role_switch_put);
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index 48a439298a68..9690a5f4b9d6 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -2901,16 +2901,18 @@ static int edge_startup(struct usb_serial *serial)
response = 0;
if (edge_serial->is_epic) {
+ struct usb_host_interface *alt;
+
+ alt = serial->interface->cur_altsetting;
+
/* EPIC thing, set up our interrupt polling now and our read
* urb, so that the device knows it really is connected. */
interrupt_in_found = bulk_in_found = bulk_out_found = false;
- for (i = 0; i < serial->interface->altsetting[0]
- .desc.bNumEndpoints; ++i) {
+ for (i = 0; i < alt->desc.bNumEndpoints; ++i) {
struct usb_endpoint_descriptor *endpoint;
int buffer_size;
- endpoint = &serial->interface->altsetting[0].
- endpoint[i].desc;
+ endpoint = &alt->endpoint[i].desc;
buffer_size = usb_endpoint_maxp(endpoint);
if (!interrupt_in_found &&
(usb_endpoint_is_int_in(endpoint))) {
diff --git a/drivers/usb/storage/scsiglue.c b/drivers/usb/storage/scsiglue.c
index 66a4dcbbb1fc..f4c2359abb1b 100644
--- a/drivers/usb/storage/scsiglue.c
+++ b/drivers/usb/storage/scsiglue.c
@@ -135,7 +135,8 @@ static int slave_configure(struct scsi_device *sdev)
* For such controllers we need to make sure the block layer sets
* up bounce buffers in addressable memory.
*/
- if (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)))
+ if (!hcd_uses_dma(bus_to_hcd(us->pusb_dev->bus)) ||
+ (bus_to_hcd(us->pusb_dev->bus)->localmem_pool != NULL))
blk_queue_bounce_limit(sdev->request_queue, BLK_BOUNCE_HIGH);
/*
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index 7ece6ca6e690..91d62276b56f 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1612,14 +1612,16 @@ struct typec_port *typec_register_port(struct device *parent,
port->sw = typec_switch_get(&port->dev);
if (IS_ERR(port->sw)) {
+ ret = PTR_ERR(port->sw);
put_device(&port->dev);
- return ERR_CAST(port->sw);
+ return ERR_PTR(ret);
}
port->mux = typec_mux_get(&port->dev, NULL);
if (IS_ERR(port->mux)) {
+ ret = PTR_ERR(port->mux);
put_device(&port->dev);
- return ERR_CAST(port->mux);
+ return ERR_PTR(ret);
}
ret = device_add(&port->dev);
diff --git a/drivers/usb/typec/tcpm/Kconfig b/drivers/usb/typec/tcpm/Kconfig
index 72481bbb2af3..5b986d6c801d 100644
--- a/drivers/usb/typec/tcpm/Kconfig
+++ b/drivers/usb/typec/tcpm/Kconfig
@@ -32,6 +32,7 @@ endif # TYPEC_TCPCI
config TYPEC_FUSB302
tristate "Fairchild FUSB302 Type-C chip driver"
depends on I2C
+ depends on EXTCON || !EXTCON
help
The Fairchild FUSB302 Type-C chip driver that works with
Type-C Port Controller Manager to provide USB PD and USB
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index 6532d68e8808..e4b96674c405 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -727,6 +727,9 @@ int usbip_recv_xbuff(struct usbip_device *ud, struct urb *urb)
copy -= recv;
ret += recv;
+
+ if (!copy)
+ break;
}
if (ret != size)
diff --git a/drivers/usb/usbip/vhci_rx.c b/drivers/usb/usbip/vhci_rx.c
index 33f8972ba842..00fc98741c5d 100644
--- a/drivers/usb/usbip/vhci_rx.c
+++ b/drivers/usb/usbip/vhci_rx.c
@@ -77,16 +77,21 @@ static void vhci_recv_ret_submit(struct vhci_device *vdev,
usbip_pack_pdu(pdu, urb, USBIP_RET_SUBMIT, 0);
/* recv transfer buffer */
- if (usbip_recv_xbuff(ud, urb) < 0)
- return;
+ if (usbip_recv_xbuff(ud, urb) < 0) {
+ urb->status = -EPROTO;
+ goto error;
+ }
/* recv iso_packet_descriptor */
- if (usbip_recv_iso(ud, urb) < 0)
- return;
+ if (usbip_recv_iso(ud, urb) < 0) {
+ urb->status = -EPROTO;
+ goto error;
+ }
/* restore the padding in iso packets */
usbip_pad_iso(ud, urb);
+error:
if (usbip_dbg_flag_vhci_rx)
usbip_dump_urb(urb);
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index e05679c478e2..93f995f6cf36 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -32,10 +32,11 @@
#define VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG (__GFP_NORETRY | __GFP_NOWARN | \
__GFP_NOMEMALLOC)
/* The order of free page blocks to report to host */
-#define VIRTIO_BALLOON_FREE_PAGE_ORDER (MAX_ORDER - 1)
+#define VIRTIO_BALLOON_HINT_BLOCK_ORDER (MAX_ORDER - 1)
/* The size of a free page block in bytes */
-#define VIRTIO_BALLOON_FREE_PAGE_SIZE \
- (1 << (VIRTIO_BALLOON_FREE_PAGE_ORDER + PAGE_SHIFT))
+#define VIRTIO_BALLOON_HINT_BLOCK_BYTES \
+ (1 << (VIRTIO_BALLOON_HINT_BLOCK_ORDER + PAGE_SHIFT))
+#define VIRTIO_BALLOON_HINT_BLOCK_PAGES (1 << VIRTIO_BALLOON_HINT_BLOCK_ORDER)
#ifdef CONFIG_BALLOON_COMPACTION
static struct vfsmount *balloon_mnt;
@@ -380,7 +381,7 @@ static unsigned long return_free_pages_to_mm(struct virtio_balloon *vb,
if (!page)
break;
free_pages((unsigned long)page_address(page),
- VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ VIRTIO_BALLOON_HINT_BLOCK_ORDER);
}
vb->num_free_page_blocks -= num_returned;
spin_unlock_irq(&vb->free_page_list_lock);
@@ -582,7 +583,7 @@ static int get_free_page_and_send(struct virtio_balloon *vb)
;
page = alloc_pages(VIRTIO_BALLOON_FREE_PAGE_ALLOC_FLAG,
- VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ VIRTIO_BALLOON_HINT_BLOCK_ORDER);
/*
* When the allocation returns NULL, it indicates that we have got all
* the possible free pages, so return -EINTR to stop.
@@ -591,13 +592,13 @@ static int get_free_page_and_send(struct virtio_balloon *vb)
return -EINTR;
p = page_address(page);
- sg_init_one(&sg, p, VIRTIO_BALLOON_FREE_PAGE_SIZE);
+ sg_init_one(&sg, p, VIRTIO_BALLOON_HINT_BLOCK_BYTES);
/* There is always 1 entry reserved for the cmd id to use. */
if (vq->num_free > 1) {
err = virtqueue_add_inbuf(vq, &sg, 1, p, GFP_KERNEL);
if (unlikely(err)) {
free_pages((unsigned long)p,
- VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ VIRTIO_BALLOON_HINT_BLOCK_ORDER);
return err;
}
virtqueue_kick(vq);
@@ -610,7 +611,7 @@ static int get_free_page_and_send(struct virtio_balloon *vb)
* The vq has no available entry to add this page block, so
* just free it.
*/
- free_pages((unsigned long)p, VIRTIO_BALLOON_FREE_PAGE_ORDER);
+ free_pages((unsigned long)p, VIRTIO_BALLOON_HINT_BLOCK_ORDER);
}
return 0;
@@ -721,6 +722,17 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
get_page(newpage); /* balloon reference */
+ /*
+ * When we migrate a page to a different zone and adjusted the
+ * managed page count when inflating, we have to fixup the count of
+ * both involved zones.
+ */
+ if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM) &&
+ page_zone(page) != page_zone(newpage)) {
+ adjust_managed_page_count(page, 1);
+ adjust_managed_page_count(newpage, -1);
+ }
+
/* balloon's page migration 1st step -- inflate "newpage" */
spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
balloon_page_insert(vb_dev_info, newpage);
@@ -765,11 +777,11 @@ static unsigned long shrink_free_pages(struct virtio_balloon *vb,
unsigned long blocks_to_free, blocks_freed;
pages_to_free = round_up(pages_to_free,
- 1 << VIRTIO_BALLOON_FREE_PAGE_ORDER);
- blocks_to_free = pages_to_free >> VIRTIO_BALLOON_FREE_PAGE_ORDER;
+ VIRTIO_BALLOON_HINT_BLOCK_PAGES);
+ blocks_to_free = pages_to_free / VIRTIO_BALLOON_HINT_BLOCK_PAGES;
blocks_freed = return_free_pages_to_mm(vb, blocks_to_free);
- return blocks_freed << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+ return blocks_freed * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
}
static unsigned long leak_balloon_pages(struct virtio_balloon *vb,
@@ -826,7 +838,7 @@ static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker,
unsigned long count;
count = vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE;
- count += vb->num_free_page_blocks << VIRTIO_BALLOON_FREE_PAGE_ORDER;
+ count += vb->num_free_page_blocks * VIRTIO_BALLOON_HINT_BLOCK_PAGES;
return count;
}
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 1679e0dc869b..cec868f8db3f 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -687,6 +687,7 @@ config MAX63XX_WATCHDOG
config MAX77620_WATCHDOG
tristate "Maxim Max77620 Watchdog Timer"
depends on MFD_MAX77620 || COMPILE_TEST
+ select WATCHDOG_CORE
help
This is the driver for the Max77620 watchdog timer.
Say 'Y' here to enable the watchdog timer support for
@@ -1444,6 +1445,7 @@ config SMSC37B787_WDT
config TQMX86_WDT
tristate "TQ-Systems TQMX86 Watchdog Timer"
depends on X86
+ select WATCHDOG_CORE
help
This is the driver for the hardware watchdog timer in the TQMX86 IO
controller found on some of their ComExpress Modules.
diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c
index 0a87c6f4bab2..11b9e7c6b7f5 100644
--- a/drivers/watchdog/imx7ulp_wdt.c
+++ b/drivers/watchdog/imx7ulp_wdt.c
@@ -112,7 +112,7 @@ static int imx7ulp_wdt_restart(struct watchdog_device *wdog,
{
struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
- imx7ulp_wdt_enable(wdt->base, true);
+ imx7ulp_wdt_enable(wdog, true);
imx7ulp_wdt_set_timeout(&wdt->wdd, 1);
/* wait for wdog to fire */
diff --git a/drivers/watchdog/orion_wdt.c b/drivers/watchdog/orion_wdt.c
index 1cccf8eb1c5d..8e6dfe76f9c9 100644
--- a/drivers/watchdog/orion_wdt.c
+++ b/drivers/watchdog/orion_wdt.c
@@ -602,7 +602,7 @@ static int orion_wdt_probe(struct platform_device *pdev)
set_bit(WDOG_HW_RUNNING, &dev->wdt.status);
/* Request the IRQ only after the watchdog is disabled */
- irq = platform_get_irq(pdev, 0);
+ irq = platform_get_irq_optional(pdev, 0);
if (irq > 0) {
/*
* Not all supported platforms specify an interrupt for the
@@ -617,7 +617,7 @@ static int orion_wdt_probe(struct platform_device *pdev)
}
/* Optional 2nd interrupt for pretimeout */
- irq = platform_get_irq(pdev, 1);
+ irq = platform_get_irq_optional(pdev, 1);
if (irq > 0) {
orion_wdt_info.options |= WDIOF_PRETIMEOUT;
ret = devm_request_irq(&pdev->dev, irq, orion_wdt_pre_irq,
diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c
index 234876047431..6e524c8e26a8 100644
--- a/drivers/watchdog/rn5t618_wdt.c
+++ b/drivers/watchdog/rn5t618_wdt.c
@@ -188,6 +188,7 @@ static struct platform_driver rn5t618_wdt_driver = {
module_platform_driver(rn5t618_wdt_driver);
+MODULE_ALIAS("platform:rn5t618-wdt");
MODULE_AUTHOR("Beniamino Galvani <[email protected]>");
MODULE_DESCRIPTION("RN5T618 watchdog driver");
MODULE_LICENSE("GPL v2");
diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
index fdf533fe0bb2..56a4a4030ca9 100644
--- a/drivers/watchdog/w83627hf_wdt.c
+++ b/drivers/watchdog/w83627hf_wdt.c
@@ -420,7 +420,7 @@ static int wdt_find(int addr)
cr_wdt_csr = NCT6102D_WDT_CSR;
break;
case NCT6116_ID:
- ret = nct6102;
+ ret = nct6116;
cr_wdt_timeout = NCT6102D_WDT_TIMEOUT;
cr_wdt_control = NCT6102D_WDT_CONTROL;
cr_wdt_csr = NCT6102D_WDT_CSR;
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 4f2e78a5e4db..0c142bcab79d 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -394,7 +394,8 @@ static struct notifier_block xen_memory_nb = {
#else
static enum bp_state reserve_additional_memory(void)
{
- balloon_stats.target_pages = balloon_stats.current_pages;
+ balloon_stats.target_pages = balloon_stats.current_pages +
+ balloon_stats.target_unpopulated;
return BP_ECANCELED;
}
#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 49b381e104ef..7b36b51cdb9f 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -664,7 +664,6 @@ static int grow_gnttab_list(unsigned int more_frames)
unsigned int nr_glist_frames, new_nr_glist_frames;
unsigned int grefs_per_frame;
- BUG_ON(gnttab_interface == NULL);
grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
new_nr_grant_frames = nr_grant_frames + more_frames;
@@ -1160,7 +1159,6 @@ EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
static unsigned int nr_status_frames(unsigned int nr_grant_frames)
{
- BUG_ON(gnttab_interface == NULL);
return gnttab_frames(nr_grant_frames, SPP);
}
@@ -1388,7 +1386,6 @@ static int gnttab_expand(unsigned int req_entries)
int rc;
unsigned int cur, extra;
- BUG_ON(gnttab_interface == NULL);
cur = nr_grant_frames;
extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
gnttab_interface->grefs_per_grant_frame);
@@ -1423,7 +1420,6 @@ int gnttab_init(void)
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
- BUG_ON(gnttab_interface == NULL);
max_nr_glist_frames = (max_nr_grant_frames *
gnttab_interface->grefs_per_grant_frame / RPP);
diff --git a/drivers/xen/xenbus/xenbus.h b/drivers/xen/xenbus/xenbus.h
index d75a2385b37c..5f5b8a7d5b80 100644
--- a/drivers/xen/xenbus/xenbus.h
+++ b/drivers/xen/xenbus/xenbus.h
@@ -116,8 +116,6 @@ int xenbus_probe_devices(struct xen_bus_type *bus);
void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
-void xenbus_dev_shutdown(struct device *_dev);
-
int xenbus_dev_suspend(struct device *dev);
int xenbus_dev_resume(struct device *dev);
int xenbus_dev_cancel(struct device *dev);
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index c21be6e9d38a..378486b79f96 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -255,7 +255,6 @@ fail_put:
module_put(drv->driver.owner);
fail:
xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename);
- xenbus_switch_state(dev, XenbusStateClosed);
return err;
}
EXPORT_SYMBOL_GPL(xenbus_dev_probe);
@@ -276,34 +275,20 @@ int xenbus_dev_remove(struct device *_dev)
free_otherend_details(dev);
- xenbus_switch_state(dev, XenbusStateClosed);
+ /*
+ * If the toolstack has forced the device state to closing then set
+ * the state to closed now to allow it to be cleaned up.
+ * Similarly, if the driver does not support re-bind, set the
+ * closed.
+ */
+ if (!drv->allow_rebind ||
+ xenbus_read_driver_state(dev->nodename) == XenbusStateClosing)
+ xenbus_switch_state(dev, XenbusStateClosed);
+
return 0;
}
EXPORT_SYMBOL_GPL(xenbus_dev_remove);
-void xenbus_dev_shutdown(struct device *_dev)
-{
- struct xenbus_device *dev = to_xenbus_device(_dev);
- unsigned long timeout = 5*HZ;
-
- DPRINTK("%s", dev->nodename);
-
- get_device(&dev->dev);
- if (dev->state != XenbusStateConnected) {
- pr_info("%s: %s: %s != Connected, skipping\n",
- __func__, dev->nodename, xenbus_strstate(dev->state));
- goto out;
- }
- xenbus_switch_state(dev, XenbusStateClosing);
- timeout = wait_for_completion_timeout(&dev->down, timeout);
- if (!timeout)
- pr_info("%s: %s timeout closing device\n",
- __func__, dev->nodename);
- out:
- put_device(&dev->dev);
-}
-EXPORT_SYMBOL_GPL(xenbus_dev_shutdown);
-
int xenbus_register_driver_common(struct xenbus_driver *drv,
struct xen_bus_type *bus,
struct module *owner, const char *mod_name)
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index b0bed4faf44c..14876faff3b0 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -198,7 +198,6 @@ static struct xen_bus_type xenbus_backend = {
.uevent = xenbus_uevent_backend,
.probe = xenbus_dev_probe,
.remove = xenbus_dev_remove,
- .shutdown = xenbus_dev_shutdown,
.dev_groups = xenbus_dev_groups,
},
};
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index a7d90a719cea..8a1650bbe18f 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -126,6 +126,28 @@ static int xenbus_frontend_dev_probe(struct device *dev)
return xenbus_dev_probe(dev);
}
+static void xenbus_frontend_dev_shutdown(struct device *_dev)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned long timeout = 5*HZ;
+
+ DPRINTK("%s", dev->nodename);
+
+ get_device(&dev->dev);
+ if (dev->state != XenbusStateConnected) {
+ pr_info("%s: %s: %s != Connected, skipping\n",
+ __func__, dev->nodename, xenbus_strstate(dev->state));
+ goto out;
+ }
+ xenbus_switch_state(dev, XenbusStateClosing);
+ timeout = wait_for_completion_timeout(&dev->down, timeout);
+ if (!timeout)
+ pr_info("%s: %s timeout closing device\n",
+ __func__, dev->nodename);
+ out:
+ put_device(&dev->dev);
+}
+
static const struct dev_pm_ops xenbus_pm_ops = {
.suspend = xenbus_dev_suspend,
.resume = xenbus_frontend_dev_resume,
@@ -146,7 +168,7 @@ static struct xen_bus_type xenbus_frontend = {
.uevent = xenbus_uevent_frontend,
.probe = xenbus_frontend_dev_probe,
.remove = xenbus_dev_remove,
- .shutdown = xenbus_dev_shutdown,
+ .shutdown = xenbus_frontend_dev_shutdown,
.dev_groups = xenbus_dev_groups,
.pm = &xenbus_pm_ops,
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index 4150280509ff..7503899c0a1b 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -136,6 +136,9 @@ static struct dentry *afs_dynroot_lookup(struct inode *dir, struct dentry *dentr
ASSERTCMP(d_inode(dentry), ==, NULL);
+ if (flags & LOOKUP_CREATE)
+ return ERR_PTR(-EOPNOTSUPP);
+
if (dentry->d_name.len >= AFSNAMEMAX) {
_leave(" = -ENAMETOOLONG");
return ERR_PTR(-ENAMETOOLONG);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index f532d6d3bd28..79bc5f1338ed 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -126,7 +126,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
if (src_as->cell)
ctx->cell = afs_get_cell(src_as->cell);
- if (size > PAGE_SIZE - 1)
+ if (size < 2 || size > PAGE_SIZE - 1)
return -EINVAL;
page = read_mapping_page(d_inode(mntpt)->i_mapping, 0, NULL);
@@ -140,7 +140,9 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
}
buf = kmap(page);
- ret = vfs_parse_fs_string(fc, "source", buf, size);
+ ret = -EINVAL;
+ if (buf[size - 1] == '.')
+ ret = vfs_parse_fs_string(fc, "source", buf, size - 1);
kunmap(page);
put_page(page);
if (ret < 0)
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index fba2ec3a3a9c..468e1713bce1 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -213,13 +213,14 @@ static int afs_proc_cell_volumes_show(struct seq_file *m, void *v)
/* Display header on line 1 */
if (v == &cell->proc_volumes) {
- seq_puts(m, "USE VID TY\n");
+ seq_puts(m, "USE VID TY NAME\n");
return 0;
}
- seq_printf(m, "%3d %08llx %s\n",
+ seq_printf(m, "%3d %08llx %s %s\n",
atomic_read(&vol->usage), vol->vid,
- afs_vol_types[vol->type]);
+ afs_vol_types[vol->type],
+ vol->name);
return 0;
}
diff --git a/fs/afs/server.c b/fs/afs/server.c
index 1686bf188ccd..b7f3cb2130ca 100644
--- a/fs/afs/server.c
+++ b/fs/afs/server.c
@@ -32,18 +32,11 @@ static void afs_dec_servers_outstanding(struct afs_net *net)
struct afs_server *afs_find_server(struct afs_net *net,
const struct sockaddr_rxrpc *srx)
{
- const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
const struct afs_addr_list *alist;
struct afs_server *server = NULL;
unsigned int i;
- bool ipv6 = true;
int seq = 0, diff;
- if (srx->transport.sin6.sin6_addr.s6_addr32[0] == 0 ||
- srx->transport.sin6.sin6_addr.s6_addr32[1] == 0 ||
- srx->transport.sin6.sin6_addr.s6_addr32[2] == htonl(0xffff))
- ipv6 = false;
-
rcu_read_lock();
do {
@@ -52,7 +45,8 @@ struct afs_server *afs_find_server(struct afs_net *net,
server = NULL;
read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
- if (ipv6) {
+ if (srx->transport.family == AF_INET6) {
+ const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
alist = rcu_dereference(server->addresses);
for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
@@ -68,15 +62,16 @@ struct afs_server *afs_find_server(struct afs_net *net,
}
}
} else {
+ const struct sockaddr_in *a = &srx->transport.sin, *b;
hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
alist = rcu_dereference(server->addresses);
for (i = 0; i < alist->nr_ipv4; i++) {
- b = &alist->addrs[i].transport.sin6;
- diff = ((u16 __force)a->sin6_port -
- (u16 __force)b->sin6_port);
+ b = &alist->addrs[i].transport.sin;
+ diff = ((u16 __force)a->sin_port -
+ (u16 __force)b->sin_port);
if (diff == 0)
- diff = ((u32 __force)a->sin6_addr.s6_addr32[3] -
- (u32 __force)b->sin6_addr.s6_addr32[3]);
+ diff = ((u32 __force)a->sin_addr.s_addr -
+ (u32 __force)b->sin_addr.s_addr);
if (diff == 0)
goto found;
}
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 488641b1a418..7f8a9b3137bf 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -404,6 +404,7 @@ static int afs_test_super(struct super_block *sb, struct fs_context *fc)
return (as->net_ns == fc->net_ns &&
as->volume &&
as->volume->vid == ctx->volume->vid &&
+ as->cell == ctx->cell &&
!as->dyn_root);
}
@@ -448,7 +449,6 @@ static int afs_fill_super(struct super_block *sb, struct afs_fs_context *ctx)
/* allocate the root inode and dentry */
if (as->dyn_root) {
inode = afs_iget_pseudo_dir(sb, true);
- sb->s_flags |= SB_RDONLY;
} else {
sprintf(sb->s_id, "%llu", as->volume->vid);
afs_activate_volume(as->volume);
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 75b6d10c9845..575636f6491e 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -7,6 +7,7 @@ config BTRFS_FS
select LIBCRC32C
select CRYPTO_XXHASH
select CRYPTO_SHA256
+ select CRYPTO_BLAKE2B
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index ee834ef7beb4..43e1660f450f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -447,7 +447,7 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
if (blkcg_css) {
bio->bi_opf |= REQ_CGROUP_PUNT;
- bio_associate_blkg_from_css(bio, blkcg_css);
+ kthread_associate_blkcg(blkcg_css);
}
refcount_set(&cb->pending_bios, 1);
@@ -491,6 +491,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio->bi_opf = REQ_OP_WRITE | write_flags;
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
+ if (blkcg_css)
+ bio->bi_opf |= REQ_CGROUP_PUNT;
bio_add_page(bio, page, PAGE_SIZE, 0);
}
if (bytes_left < PAGE_SIZE) {
@@ -517,6 +519,9 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start,
bio_endio(bio);
}
+ if (blkcg_css)
+ kthread_associate_blkcg(NULL);
+
return 0;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5b6e86aaf2e1..24658b5a5787 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -379,7 +379,7 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
for (node = rb_first(tm_root); node; node = next) {
next = rb_next(node);
tm = rb_entry(node, struct tree_mod_elem, node);
- if (tm->seq > min_seq)
+ if (tm->seq >= min_seq)
continue;
rb_erase(node, tm_root);
kfree(tm);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b2e8fd8a8e59..54efb21c2727 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2787,7 +2787,7 @@ struct btrfs_inode_extref *btrfs_find_name_in_ext_backref(
/* file-item.c */
struct btrfs_dio_private;
int btrfs_del_csums(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr, u64 len);
+ struct btrfs_root *root, u64 bytenr, u64 len);
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
u8 *dst);
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 153f71a5bba9..274318e9114e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1869,8 +1869,8 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
btrfs_pin_extent(fs_info, head->bytenr,
head->num_bytes, 1);
if (head->is_data) {
- ret = btrfs_del_csums(trans, fs_info, head->bytenr,
- head->num_bytes);
+ ret = btrfs_del_csums(trans, fs_info->csum_root,
+ head->bytenr, head->num_bytes);
}
}
@@ -3175,7 +3175,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
btrfs_release_path(path);
if (is_data) {
- ret = btrfs_del_csums(trans, info, bytenr, num_bytes);
+ ret = btrfs_del_csums(trans, info->csum_root, bytenr,
+ num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
@@ -3799,6 +3800,7 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
u64 flags, int delalloc)
{
int ret = 0;
+ int cache_block_group_error = 0;
struct btrfs_free_cluster *last_ptr = NULL;
struct btrfs_block_group *block_group = NULL;
struct find_free_extent_ctl ffe_ctl = {0};
@@ -3958,7 +3960,20 @@ have_block_group:
if (unlikely(!ffe_ctl.cached)) {
ffe_ctl.have_caching_bg = true;
ret = btrfs_cache_block_group(block_group, 0);
- BUG_ON(ret < 0);
+
+ /*
+ * If we get ENOMEM here or something else we want to
+ * try other block groups, because it may not be fatal.
+ * However if we can't find anything else we need to
+ * save our return here so that we return the actual
+ * error that caused problems, not ENOSPC.
+ */
+ if (ret < 0) {
+ if (!cache_block_group_error)
+ cache_block_group_error = ret;
+ ret = 0;
+ goto loop;
+ }
ret = 0;
}
@@ -4045,7 +4060,7 @@ loop:
if (ret > 0)
goto search;
- if (ret == -ENOSPC) {
+ if (ret == -ENOSPC && !cache_block_group_error) {
/*
* Use ffe_ctl->total_free_space as fallback if we can't find
* any contiguous hole.
@@ -4056,6 +4071,8 @@ loop:
space_info->max_extent_size = ffe_ctl.max_extent_size;
spin_unlock(&space_info->lock);
ins->offset = ffe_ctl.max_extent_size;
+ } else if (ret == -ENOSPC) {
+ ret = cache_block_group_error;
}
return ret;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index eb8bd0258360..2f4802f405a2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5074,12 +5074,14 @@ struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
return eb;
eb = alloc_dummy_extent_buffer(fs_info, start);
if (!eb)
- return NULL;
+ return ERR_PTR(-ENOMEM);
eb->fs_info = fs_info;
again:
ret = radix_tree_preload(GFP_NOFS);
- if (ret)
+ if (ret) {
+ exists = ERR_PTR(ret);
goto free_eb;
+ }
spin_lock(&fs_info->buffer_lock);
ret = radix_tree_insert(&fs_info->buffer_radix,
start >> PAGE_SHIFT, eb);
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 3270a40b0777..b1bfdc5c1387 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -590,9 +590,9 @@ static noinline void truncate_one_csum(struct btrfs_fs_info *fs_info,
* range of bytes.
*/
int btrfs_del_csums(struct btrfs_trans_handle *trans,
- struct btrfs_fs_info *fs_info, u64 bytenr, u64 len)
+ struct btrfs_root *root, u64 bytenr, u64 len)
{
- struct btrfs_root *root = fs_info->csum_root;
+ struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_path *path;
struct btrfs_key key;
u64 end_byte = bytenr + len;
@@ -602,6 +602,9 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans,
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
int blocksize_bits = fs_info->sb->s_blocksize_bits;
+ ASSERT(root == fs_info->csum_root ||
+ root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID);
+
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0cb43b682789..8d47c76b7bd1 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2599,8 +2599,8 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
}
}
- if (clone_info) {
- u64 clone_len = drop_end - cur_offset;
+ if (clone_info && drop_end > clone_info->file_offset) {
+ u64 clone_len = drop_end - clone_info->file_offset;
ret = btrfs_insert_clone_extent(trans, inode, path,
clone_info, clone_len);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 56032c518b26..5509c41a4f43 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1479,10 +1479,10 @@ next_slot:
disk_num_bytes =
btrfs_file_extent_disk_num_bytes(leaf, fi);
/*
- * If extent we got ends before our range starts, skip
- * to next extent
+ * If the extent we got ends before our current offset,
+ * skip to the next extent.
*/
- if (extent_end <= start) {
+ if (extent_end <= cur_offset) {
path->slots[0]++;
goto next_slot;
}
@@ -5728,7 +5728,6 @@ static void inode_tree_add(struct inode *inode)
static void inode_tree_del(struct inode *inode)
{
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root;
int empty = 0;
@@ -5741,7 +5740,6 @@ static void inode_tree_del(struct inode *inode)
spin_unlock(&root->inode_lock);
if (empty && btrfs_root_refs(&root->root_item) == 0) {
- synchronize_srcu(&fs_info->subvol_srcu);
spin_lock(&root->inode_lock);
empty = RB_EMPTY_ROOT(&root->inode_tree);
spin_unlock(&root->inode_lock);
@@ -9556,9 +9554,8 @@ static int btrfs_rename_exchange(struct inode *old_dir,
btrfs_init_log_ctx(&ctx_dest, new_inode);
/* close the race window with snapshot create/destroy ioctl */
- if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
- down_read(&fs_info->subvol_sem);
- if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
+ if (old_ino == BTRFS_FIRST_FREE_OBJECTID ||
+ new_ino == BTRFS_FIRST_FREE_OBJECTID)
down_read(&fs_info->subvol_sem);
/*
@@ -9792,9 +9789,8 @@ out_fail:
ret = ret ? ret : ret2;
}
out_notrans:
- if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
- up_read(&fs_info->subvol_sem);
- if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
+ if (new_ino == BTRFS_FIRST_FREE_OBJECTID ||
+ old_ino == BTRFS_FIRST_FREE_OBJECTID)
up_read(&fs_info->subvol_sem);
ASSERT(list_empty(&ctx_root.list));
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a1ee0b775e65..18e328ce4b54 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -704,11 +704,17 @@ static noinline int create_subvol(struct inode *dir,
btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2);
ret = btrfs_update_inode(trans, root, dir);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto fail;
+ }
ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid,
btrfs_ino(BTRFS_I(dir)), index, name, namelen);
- BUG_ON(ret);
+ if (ret) {
+ btrfs_abort_transaction(trans, ret);
+ goto fail;
+ }
ret = btrfs_uuid_tree_add(trans, root_item->uuid,
BTRFS_UUID_KEY_SUBVOL, objectid);
@@ -3720,24 +3726,18 @@ process_slot:
ret = 0;
if (last_dest_end < destoff + len) {
- struct btrfs_clone_extent_info clone_info = { 0 };
/*
- * We have an implicit hole (NO_HOLES feature is enabled) that
- * fully or partially overlaps our cloning range at its end.
+ * We have an implicit hole that fully or partially overlaps our
+ * cloning range at its end. This means that we either have the
+ * NO_HOLES feature enabled or the implicit hole happened due to
+ * mixing buffered and direct IO writes against this file.
*/
btrfs_release_path(path);
path->leave_spinning = 0;
- /*
- * We are dealing with a hole and our clone_info already has a
- * disk_offset of 0, we only need to fill the data length and
- * file offset.
- */
- clone_info.data_len = destoff + len - last_dest_end;
- clone_info.file_offset = last_dest_end;
ret = btrfs_punch_hole_range(inode, path,
last_dest_end, destoff + len - 1,
- &clone_info, &trans);
+ NULL, &trans);
if (ret)
goto out;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 93aeb2e539a4..d4282e12f2a6 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3232,12 +3232,12 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
if (!(fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_RESCAN)) {
btrfs_warn(fs_info,
- "qgroup rescan init failed, qgroup is not enabled");
+ "qgroup rescan init failed, qgroup rescan is not queued");
ret = -EINVAL;
} else if (!(fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAG_ON)) {
btrfs_warn(fs_info,
- "qgroup rescan init failed, qgroup rescan is not queued");
+ "qgroup rescan init failed, qgroup is not enabled");
ret = -EINVAL;
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index d897a8e5e430..c58245797f30 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4552,6 +4552,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
fs_root = read_fs_root(fs_info, reloc_root->root_key.offset);
if (IS_ERR(fs_root)) {
err = PTR_ERR(fs_root);
+ list_add_tail(&reloc_root->root_list, &reloc_roots);
goto out_free;
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index ae2db5eb1549..091e5bc8c7ea 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7084,12 +7084,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
spin_unlock(&send_root->root_item_lock);
/*
- * This is done when we lookup the root, it should already be complete
- * by the time we get here.
- */
- WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
-
- /*
* Userspace tools do the checks and warn the user if it's
* not RO.
*/
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c
index 1a846bf6e197..914eea5ba6a7 100644
--- a/fs/btrfs/tests/free-space-tree-tests.c
+++ b/fs/btrfs/tests/free-space-tree-tests.c
@@ -452,9 +452,9 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize,
root->fs_info->tree_root = root;
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
- if (!root->node) {
+ if (IS_ERR(root->node)) {
test_std_err(TEST_ALLOC_EXTENT_BUFFER);
- ret = -ENOMEM;
+ ret = PTR_ERR(root->node);
goto out;
}
btrfs_set_header_level(root->node, 0);
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c
index 09aaca1efd62..ac035a6fa003 100644
--- a/fs/btrfs/tests/qgroup-tests.c
+++ b/fs/btrfs/tests/qgroup-tests.c
@@ -484,9 +484,9 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
* *cough*backref walking code*cough*
*/
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
- if (!root->node) {
+ if (IS_ERR(root->node)) {
test_err("couldn't allocate dummy buffer");
- ret = -ENOMEM;
+ ret = PTR_ERR(root->node);
goto out;
}
btrfs_set_header_level(root->node, 0);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 493d4d9e0f79..97f3520b8d98 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -227,7 +227,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
*/
if (item_size < BTRFS_FILE_EXTENT_INLINE_DATA_START) {
file_extent_err(leaf, slot,
- "invalid item size, have %u expect [%lu, %u)",
+ "invalid item size, have %u expect [%zu, %u)",
item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START,
SZ_4K);
return -EUCLEAN;
@@ -332,7 +332,7 @@ static int check_extent_data_item(struct extent_buffer *leaf,
}
static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
- int slot)
+ int slot, struct btrfs_key *prev_key)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
u32 sectorsize = fs_info->sectorsize;
@@ -356,6 +356,20 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
btrfs_item_size_nr(leaf, slot), csumsize);
return -EUCLEAN;
}
+ if (slot > 0 && prev_key->type == BTRFS_EXTENT_CSUM_KEY) {
+ u64 prev_csum_end;
+ u32 prev_item_size;
+
+ prev_item_size = btrfs_item_size_nr(leaf, slot - 1);
+ prev_csum_end = (prev_item_size / csumsize) * sectorsize;
+ prev_csum_end += prev_key->offset;
+ if (prev_csum_end > key->offset) {
+ generic_err(leaf, slot - 1,
+"csum end range (%llu) goes beyond the start range (%llu) of the next csum item",
+ prev_csum_end, key->offset);
+ return -EUCLEAN;
+ }
+ }
return 0;
}
@@ -1355,7 +1369,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
ret = check_extent_data_item(leaf, key, slot, prev_key);
break;
case BTRFS_EXTENT_CSUM_KEY:
- ret = check_csum_item(leaf, key, slot);
+ ret = check_csum_item(leaf, key, slot, prev_key);
break;
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 6f757361db53..d3f115909ff0 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -808,7 +808,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = btrfs_del_csums(trans, fs_info,
+ ret = btrfs_del_csums(trans,
+ fs_info->csum_root,
sums->bytenr,
sums->len);
if (!ret)
@@ -3909,6 +3910,28 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
return 0;
}
+static int log_csums(struct btrfs_trans_handle *trans,
+ struct btrfs_root *log_root,
+ struct btrfs_ordered_sum *sums)
+{
+ int ret;
+
+ /*
+ * Due to extent cloning, we might have logged a csum item that covers a
+ * subrange of a cloned extent, and later we can end up logging a csum
+ * item for a larger subrange of the same extent or the entire range.
+ * This would leave csum items in the log tree that cover the same range
+ * and break the searches for checksums in the log tree, resulting in
+ * some checksums missing in the fs/subvolume tree. So just delete (or
+ * trim and adjust) any existing csum items in the log for this range.
+ */
+ ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
+ if (ret)
+ return ret;
+
+ return btrfs_csum_file_blocks(trans, log_root, sums);
+}
+
static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *dst_path,
@@ -4054,7 +4077,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = btrfs_csum_file_blocks(trans, log, sums);
+ ret = log_csums(trans, log, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -4274,7 +4297,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
if (!ret)
- ret = btrfs_csum_file_blocks(trans, log_root, sums);
+ ret = log_csums(trans, log_root, sums);
list_del(&sums->list);
kfree(sums);
}
@@ -6294,9 +6317,28 @@ again:
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
if (IS_ERR(wc.replay_dest)) {
ret = PTR_ERR(wc.replay_dest);
+
+ /*
+ * We didn't find the subvol, likely because it was
+ * deleted. This is ok, simply skip this log and go to
+ * the next one.
+ *
+ * We need to exclude the root because we can't have
+ * other log replays overwriting this log as we'll read
+ * it back in a few more times. This will keep our
+ * block from being modified, and we'll just bail for
+ * each subsequent pass.
+ */
+ if (ret == -ENOENT)
+ ret = btrfs_pin_extent_for_log_replay(fs_info,
+ log->node->start,
+ log->node->len);
free_extent_buffer(log->node);
free_extent_buffer(log->commit_root);
kfree(log);
+
+ if (!ret)
+ goto next;
btrfs_handle_fs_error(fs_info, ret,
"Couldn't read target root for tree log recovery.");
goto error;
@@ -6328,7 +6370,6 @@ again:
&root->highest_objectid);
}
- key.offset = found_key.offset - 1;
wc.replay_dest->log_root = NULL;
free_extent_buffer(log->node);
free_extent_buffer(log->commit_root);
@@ -6336,9 +6377,10 @@ again:
if (ret)
goto error;
-
+next:
if (found_key.offset == 0)
break;
+ key.offset = found_key.offset - 1;
}
btrfs_release_path(path);
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
index 91caab63bdf5..76b84f2397b1 100644
--- a/fs/btrfs/uuid-tree.c
+++ b/fs/btrfs/uuid-tree.c
@@ -324,6 +324,8 @@ again_search_slot:
}
if (ret < 0 && ret != -ENOENT)
goto out;
+ key.offset++;
+ goto again_search_slot;
}
item_size -= sizeof(subid_le);
offset += sizeof(subid_le);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index d8e5560db285..a6d3f08bfff3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -61,7 +61,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID1C3] = {
.sub_stripes = 1,
.dev_stripes = 1,
- .devs_max = 0,
+ .devs_max = 3,
.devs_min = 3,
.tolerated_failures = 2,
.devs_increment = 3,
@@ -73,7 +73,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID1C4] = {
.sub_stripes = 1,
.dev_stripes = 1,
- .devs_max = 0,
+ .devs_max = 4,
.devs_min = 4,
.tolerated_failures = 3,
.devs_increment = 4,
diff --git a/fs/buffer.c b/fs/buffer.c
index d8c7242426bb..e94a6619464c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3034,8 +3034,6 @@ static void end_bio_bh_io_sync(struct bio *bio)
void guard_bio_eod(int op, struct bio *bio)
{
sector_t maxsector;
- struct bio_vec *bvec = bio_last_bvec_all(bio);
- unsigned truncated_bytes;
struct hd_struct *part;
rcu_read_lock();
@@ -3061,28 +3059,7 @@ void guard_bio_eod(int op, struct bio *bio)
if (likely((bio->bi_iter.bi_size >> 9) <= maxsector))
return;
- /* Uhhuh. We've got a bio that straddles the device size! */
- truncated_bytes = bio->bi_iter.bi_size - (maxsector << 9);
-
- /*
- * The bio contains more than one segment which spans EOD, just return
- * and let IO layer turn it into an EIO
- */
- if (truncated_bytes > bvec->bv_len)
- return;
-
- /* Truncate the bio.. */
- bio->bi_iter.bi_size -= truncated_bytes;
- bvec->bv_len -= truncated_bytes;
-
- /* ..and clear the end of the buffer for reads */
- if (op == REQ_OP_READ) {
- struct bio_vec bv;
-
- mp_bvec_last_segment(bvec, &bv);
- zero_user(bv.bv_page, bv.bv_offset + bv.bv_len,
- truncated_bytes);
- }
+ bio_truncate(bio, maxsector << 9);
}
static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f5a38910a82b..9d09bb53c1ab 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1011,18 +1011,13 @@ static int __ceph_is_single_caps(struct ceph_inode_info *ci)
return rb_first(&ci->i_caps) == rb_last(&ci->i_caps);
}
-static int __ceph_is_any_caps(struct ceph_inode_info *ci)
-{
- return !RB_EMPTY_ROOT(&ci->i_caps);
-}
-
int ceph_is_any_caps(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int ret;
spin_lock(&ci->i_ceph_lock);
- ret = __ceph_is_any_caps(ci);
+ ret = __ceph_is_any_real_caps(ci);
spin_unlock(&ci->i_ceph_lock);
return ret;
@@ -1099,15 +1094,16 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
if (removed)
ceph_put_cap(mdsc, cap);
- /* when reconnect denied, we remove session caps forcibly,
- * i_wr_ref can be non-zero. If there are ongoing write,
- * keep i_snap_realm.
- */
- if (!__ceph_is_any_caps(ci) && ci->i_wr_ref == 0 && ci->i_snap_realm)
- drop_inode_snap_realm(ci);
+ if (!__ceph_is_any_real_caps(ci)) {
+ /* when reconnect denied, we remove session caps forcibly,
+ * i_wr_ref can be non-zero. If there are ongoing write,
+ * keep i_snap_realm.
+ */
+ if (ci->i_wr_ref == 0 && ci->i_snap_realm)
+ drop_inode_snap_realm(ci);
- if (!__ceph_is_any_real_caps(ci))
__cap_delay_cancel(mdsc, ci);
+ }
}
struct cap_msg_args {
@@ -2764,7 +2760,19 @@ int ceph_get_caps(struct file *filp, int need, int want,
if (ret == -EAGAIN)
continue;
if (!ret) {
+ struct ceph_mds_client *mdsc = fsc->mdsc;
+ struct cap_wait cw;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
+
+ cw.ino = inode->i_ino;
+ cw.tgid = current->tgid;
+ cw.need = need;
+ cw.want = want;
+
+ spin_lock(&mdsc->caps_list_lock);
+ list_add(&cw.list, &mdsc->cap_wait_list);
+ spin_unlock(&mdsc->caps_list_lock);
+
add_wait_queue(&ci->i_cap_wq, &wait);
flags |= NON_BLOCKING;
@@ -2778,6 +2786,11 @@ int ceph_get_caps(struct file *filp, int need, int want,
}
remove_wait_queue(&ci->i_cap_wq, &wait);
+
+ spin_lock(&mdsc->caps_list_lock);
+ list_del(&cw.list);
+ spin_unlock(&mdsc->caps_list_lock);
+
if (ret == -EAGAIN)
continue;
}
@@ -2928,7 +2941,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
ci->i_head_snapc = NULL;
}
/* see comment in __ceph_remove_cap() */
- if (!__ceph_is_any_caps(ci) && ci->i_snap_realm)
+ if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
drop_inode_snap_realm(ci);
}
spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index facb387c2735..c281f32b54f7 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -139,6 +139,7 @@ static int caps_show(struct seq_file *s, void *p)
struct ceph_fs_client *fsc = s->private;
struct ceph_mds_client *mdsc = fsc->mdsc;
int total, avail, used, reserved, min, i;
+ struct cap_wait *cw;
ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min);
seq_printf(s, "total\t\t%d\n"
@@ -166,6 +167,18 @@ static int caps_show(struct seq_file *s, void *p)
}
mutex_unlock(&mdsc->mutex);
+ seq_printf(s, "\n\nWaiters:\n--------\n");
+ seq_printf(s, "tgid ino need want\n");
+ seq_printf(s, "-----------------------------------------------------\n");
+
+ spin_lock(&mdsc->caps_list_lock);
+ list_for_each_entry(cw, &mdsc->cap_wait_list, list) {
+ seq_printf(s, "%-13d0x%-17lx%-17s%-17s\n", cw->tgid, cw->ino,
+ ceph_cap_string(cw->need),
+ ceph_cap_string(cw->want));
+ }
+ spin_unlock(&mdsc->caps_list_lock);
+
return 0;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 068b029cf073..374db1bd57d1 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2015,7 +2015,7 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
if (!nr)
return;
val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
- if (!(val % CEPH_CAPS_PER_RELEASE)) {
+ if ((val % CEPH_CAPS_PER_RELEASE) < nr) {
atomic_set(&mdsc->cap_reclaim_pending, 0);
ceph_queue_cap_reclaim_work(mdsc);
}
@@ -2032,12 +2032,13 @@ int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
size_t size = sizeof(struct ceph_mds_reply_dir_entry);
- int order, num_entries;
+ unsigned int num_entries;
+ int order;
spin_lock(&ci->i_ceph_lock);
num_entries = ci->i_files + ci->i_subdirs;
spin_unlock(&ci->i_ceph_lock);
- num_entries = max(num_entries, 1);
+ num_entries = max(num_entries, 1U);
num_entries = min(num_entries, opt->max_readdir);
order = get_order(size * num_entries);
@@ -4168,6 +4169,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
mdsc->last_renew_caps = jiffies;
INIT_LIST_HEAD(&mdsc->cap_delay_list);
+ INIT_LIST_HEAD(&mdsc->cap_wait_list);
spin_lock_init(&mdsc->cap_delay_lock);
INIT_LIST_HEAD(&mdsc->snap_flush_list);
spin_lock_init(&mdsc->snap_flush_lock);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 5cd131b41d84..14c7e8c49970 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -340,6 +340,14 @@ struct ceph_quotarealm_inode {
struct inode *inode;
};
+struct cap_wait {
+ struct list_head list;
+ unsigned long ino;
+ pid_t tgid;
+ int need;
+ int want;
+};
+
/*
* mds client state
*/
@@ -416,6 +424,7 @@ struct ceph_mds_client {
spinlock_t caps_list_lock;
struct list_head caps_list; /* unused (reserved or
unreserved) */
+ struct list_head cap_wait_list;
int caps_total_count; /* total caps allocated */
int caps_use_count; /* in use */
int caps_use_max; /* max used caps */
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index aeec1d6e3769..471bac335fae 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -158,6 +158,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
void *pexport_targets = NULL;
struct ceph_timespec laggy_since;
struct ceph_mds_info *info;
+ bool laggy;
ceph_decode_need(p, end, sizeof(u64) + 1, bad);
global_id = ceph_decode_64(p);
@@ -190,6 +191,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
if (err)
goto corrupt;
ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
+ laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
*p += sizeof(u32);
ceph_decode_32_safe(p, end, namelen, bad);
*p += namelen;
@@ -207,10 +209,11 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
*p = info_end;
}
- dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n",
+ dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s%s\n",
i+1, n, global_id, mds, inc,
ceph_pr_addr(&addr),
- ceph_mds_state_name(state));
+ ceph_mds_state_name(state),
+ laggy ? "(laggy)" : "");
if (mds < 0 || state <= 0)
continue;
@@ -230,8 +233,7 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
info->global_id = global_id;
info->state = state;
info->addr = addr;
- info->laggy = (laggy_since.tv_sec != 0 ||
- laggy_since.tv_nsec != 0);
+ info->laggy = laggy;
info->num_export_targets = num_export_targets;
if (num_export_targets) {
info->export_targets = kcalloc(num_export_targets,
@@ -355,6 +357,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_damaged = false;
}
bad_ext:
+ dout("mdsmap_decode m_enabled: %d, m_damaged: %d, m_num_laggy: %d\n",
+ !!m->m_enabled, !!m->m_damaged, m->m_num_laggy);
*p = end;
dout("mdsmap_decode success epoch %u\n", m->m_epoch);
return m;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 9c9a7c68eea3..29a795f975df 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -172,10 +172,10 @@ static const struct fs_parameter_enum ceph_mount_param_enums[] = {
static const struct fs_parameter_spec ceph_mount_param_specs[] = {
fsparam_flag_no ("acl", Opt_acl),
fsparam_flag_no ("asyncreaddir", Opt_asyncreaddir),
- fsparam_u32 ("caps_max", Opt_caps_max),
+ fsparam_s32 ("caps_max", Opt_caps_max),
fsparam_u32 ("caps_wanted_delay_max", Opt_caps_wanted_delay_max),
fsparam_u32 ("caps_wanted_delay_min", Opt_caps_wanted_delay_min),
- fsparam_s32 ("write_congestion_kb", Opt_congestion_kb),
+ fsparam_u32 ("write_congestion_kb", Opt_congestion_kb),
fsparam_flag_no ("copyfrom", Opt_copyfrom),
fsparam_flag_no ("dcache", Opt_dcache),
fsparam_flag_no ("dirstat", Opt_dirstat),
@@ -187,8 +187,8 @@ static const struct fs_parameter_spec ceph_mount_param_specs[] = {
fsparam_flag_no ("quotadf", Opt_quotadf),
fsparam_u32 ("rasize", Opt_rasize),
fsparam_flag_no ("rbytes", Opt_rbytes),
- fsparam_s32 ("readdir_max_bytes", Opt_readdir_max_bytes),
- fsparam_s32 ("readdir_max_entries", Opt_readdir_max_entries),
+ fsparam_u32 ("readdir_max_bytes", Opt_readdir_max_bytes),
+ fsparam_u32 ("readdir_max_entries", Opt_readdir_max_entries),
fsparam_enum ("recover_session", Opt_recover_session),
fsparam_flag_no ("require_active_mds", Opt_require_active_mds),
fsparam_u32 ("rsize", Opt_rsize),
@@ -328,7 +328,9 @@ static int ceph_parse_mount_param(struct fs_context *fc,
fsopt->caps_wanted_delay_max = result.uint_32;
break;
case Opt_caps_max:
- fsopt->caps_max = result.uint_32;
+ if (result.int_32 < 0)
+ goto out_of_range;
+ fsopt->caps_max = result.int_32;
break;
case Opt_readdir_max_entries:
if (result.uint_32 < 1)
@@ -547,25 +549,25 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
seq_show_option(m, "recover_session", "clean");
if (fsopt->wsize != CEPH_MAX_WRITE_SIZE)
- seq_printf(m, ",wsize=%d", fsopt->wsize);
+ seq_printf(m, ",wsize=%u", fsopt->wsize);
if (fsopt->rsize != CEPH_MAX_READ_SIZE)
- seq_printf(m, ",rsize=%d", fsopt->rsize);
+ seq_printf(m, ",rsize=%u", fsopt->rsize);
if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
- seq_printf(m, ",rasize=%d", fsopt->rasize);
+ seq_printf(m, ",rasize=%u", fsopt->rasize);
if (fsopt->congestion_kb != default_congestion_kb())
- seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
+ seq_printf(m, ",write_congestion_kb=%u", fsopt->congestion_kb);
if (fsopt->caps_max)
seq_printf(m, ",caps_max=%d", fsopt->caps_max);
if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
- seq_printf(m, ",caps_wanted_delay_min=%d",
+ seq_printf(m, ",caps_wanted_delay_min=%u",
fsopt->caps_wanted_delay_min);
if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT)
- seq_printf(m, ",caps_wanted_delay_max=%d",
+ seq_printf(m, ",caps_wanted_delay_max=%u",
fsopt->caps_wanted_delay_max);
if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT)
- seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir);
+ seq_printf(m, ",readdir_max_entries=%u", fsopt->max_readdir);
if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
- seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
+ seq_printf(m, ",readdir_max_bytes=%u", fsopt->max_readdir_bytes);
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
seq_show_option(m, "snapdirname", fsopt->snapdir_name);
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f0f9cb7447ac..3bf1a01cd536 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -73,16 +73,16 @@
#define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */
struct ceph_mount_options {
- int flags;
+ unsigned int flags;
- int wsize; /* max write size */
- int rsize; /* max read size */
- int rasize; /* max readahead */
- int congestion_kb; /* max writeback in flight */
- int caps_wanted_delay_min, caps_wanted_delay_max;
+ unsigned int wsize; /* max write size */
+ unsigned int rsize; /* max read size */
+ unsigned int rasize; /* max readahead */
+ unsigned int congestion_kb; /* max writeback in flight */
+ unsigned int caps_wanted_delay_min, caps_wanted_delay_max;
int caps_max;
- int max_readdir; /* max readdir result (entires) */
- int max_readdir_bytes; /* max readdir result (bytes) */
+ unsigned int max_readdir; /* max readdir result (entries) */
+ unsigned int max_readdir_bytes; /* max readdir result (bytes) */
/*
* everything above this point can be memcmp'd; everything below
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index fd0262ce5ad5..40705e862451 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1061,7 +1061,7 @@ cap_unix(struct cifs_ses *ses)
struct cached_fid {
bool is_valid:1; /* Do we have a useable root fid */
bool file_all_info_is_valid:1;
-
+ bool has_lease:1;
struct kref refcount;
struct cifs_fid *fid;
struct mutex fid_mutex;
@@ -1693,6 +1693,7 @@ struct cifs_fattr {
struct timespec64 cf_atime;
struct timespec64 cf_mtime;
struct timespec64 cf_ctime;
+ u32 cf_cifstag;
};
static inline void free_dfs_info_param(struct dfs_info3_param *param)
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 4f554f019a98..cc86a67225d1 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -42,6 +42,7 @@
#include "cifsproto.h"
#include "cifs_unicode.h"
#include "cifs_debug.h"
+#include "smb2proto.h"
#include "fscache.h"
#include "smbdirect.h"
#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -112,6 +113,8 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
mutex_lock(&tcon->crfid.fid_mutex);
tcon->crfid.is_valid = false;
+ /* cached handle is not valid, so SMB2_CLOSE won't be sent below */
+ close_shroot_lease_locked(&tcon->crfid);
memset(tcon->crfid.fid, 0, sizeof(struct cifs_fid));
mutex_unlock(&tcon->crfid.fid_mutex);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 3925a7bfc74d..d17587c2c4ab 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -139,6 +139,28 @@ retry:
dput(dentry);
}
+static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
+{
+ if (!(fattr->cf_cifsattrs & ATTR_REPARSE))
+ return false;
+ /*
+ * The DFS tags should be only intepreted by server side as per
+ * MS-FSCC 2.1.2.1, but let's include them anyway.
+ *
+ * Besides, if cf_cifstag is unset (0), then we still need it to be
+ * revalidated to know exactly what reparse point it is.
+ */
+ switch (fattr->cf_cifstag) {
+ case IO_REPARSE_TAG_DFS:
+ case IO_REPARSE_TAG_DFSR:
+ case IO_REPARSE_TAG_SYMLINK:
+ case IO_REPARSE_TAG_NFS:
+ case 0:
+ return true;
+ }
+ return false;
+}
+
static void
cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
{
@@ -158,7 +180,7 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
* is a symbolic link, DFS referral or a reparse point with a direct
* access like junctions, deduplicated files, NFS symlinks.
*/
- if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ if (reparse_file_needs_reval(fattr))
fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
/* non-unix readdir doesn't provide nlink */
@@ -194,19 +216,37 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
}
}
+static void __dir_info_to_fattr(struct cifs_fattr *fattr, const void *info)
+{
+ const FILE_DIRECTORY_INFO *fi = info;
+
+ memset(fattr, 0, sizeof(*fattr));
+ fattr->cf_cifsattrs = le32_to_cpu(fi->ExtFileAttributes);
+ fattr->cf_eof = le64_to_cpu(fi->EndOfFile);
+ fattr->cf_bytes = le64_to_cpu(fi->AllocationSize);
+ fattr->cf_createtime = le64_to_cpu(fi->CreationTime);
+ fattr->cf_atime = cifs_NTtimeToUnix(fi->LastAccessTime);
+ fattr->cf_ctime = cifs_NTtimeToUnix(fi->ChangeTime);
+ fattr->cf_mtime = cifs_NTtimeToUnix(fi->LastWriteTime);
+}
+
void
cifs_dir_info_to_fattr(struct cifs_fattr *fattr, FILE_DIRECTORY_INFO *info,
struct cifs_sb_info *cifs_sb)
{
- memset(fattr, 0, sizeof(*fattr));
- fattr->cf_cifsattrs = le32_to_cpu(info->ExtFileAttributes);
- fattr->cf_eof = le64_to_cpu(info->EndOfFile);
- fattr->cf_bytes = le64_to_cpu(info->AllocationSize);
- fattr->cf_createtime = le64_to_cpu(info->CreationTime);
- fattr->cf_atime = cifs_NTtimeToUnix(info->LastAccessTime);
- fattr->cf_ctime = cifs_NTtimeToUnix(info->ChangeTime);
- fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime);
+ __dir_info_to_fattr(fattr, info);
+ cifs_fill_common_info(fattr, cifs_sb);
+}
+static void cifs_fulldir_info_to_fattr(struct cifs_fattr *fattr,
+ SEARCH_ID_FULL_DIR_INFO *info,
+ struct cifs_sb_info *cifs_sb)
+{
+ __dir_info_to_fattr(fattr, info);
+
+ /* See MS-FSCC 2.4.18 FileIdFullDirectoryInformation */
+ if (fattr->cf_cifsattrs & ATTR_REPARSE)
+ fattr->cf_cifstag = le32_to_cpu(info->EaSize);
cifs_fill_common_info(fattr, cifs_sb);
}
@@ -755,6 +795,11 @@ static int cifs_filldir(char *find_entry, struct file *file,
(FIND_FILE_STANDARD_INFO *)find_entry,
cifs_sb);
break;
+ case SMB_FIND_FILE_ID_FULL_DIR_INFO:
+ cifs_fulldir_info_to_fattr(&fattr,
+ (SEARCH_ID_FULL_DIR_INFO *)find_entry,
+ cifs_sb);
+ break;
default:
cifs_dir_info_to_fattr(&fattr,
(FILE_DIRECTORY_INFO *)find_entry,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 8b0b512c5792..afe1f03aabe3 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -67,7 +67,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
goto out;
- if (oparms->tcon->use_resilient) {
+ if (oparms->tcon->use_resilient) {
/* default timeout is 0, servers pick default (120 seconds) */
nr_ioctl_req.Timeout =
cpu_to_le32(oparms->tcon->handle_timeout);
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 18c7a33adceb..5ef5e97a6d13 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -95,6 +95,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
goto finished;
}
+ memset(&oparms, 0, sizeof(struct cifs_open_parms));
oparms.tcon = tcon;
oparms.desired_access = desired_access;
oparms.disposition = create_disposition;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a5c96bc522cb..6250370c1170 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -616,6 +616,7 @@ smb2_close_cached_fid(struct kref *ref)
cfid->fid->volatile_fid);
cfid->is_valid = false;
cfid->file_all_info_is_valid = false;
+ cfid->has_lease = false;
}
}
@@ -626,13 +627,28 @@ void close_shroot(struct cached_fid *cfid)
mutex_unlock(&cfid->fid_mutex);
}
+void close_shroot_lease_locked(struct cached_fid *cfid)
+{
+ if (cfid->has_lease) {
+ cfid->has_lease = false;
+ kref_put(&cfid->refcount, smb2_close_cached_fid);
+ }
+}
+
+void close_shroot_lease(struct cached_fid *cfid)
+{
+ mutex_lock(&cfid->fid_mutex);
+ close_shroot_lease_locked(cfid);
+ mutex_unlock(&cfid->fid_mutex);
+}
+
void
smb2_cached_lease_break(struct work_struct *work)
{
struct cached_fid *cfid = container_of(work,
struct cached_fid, lease_break);
- close_shroot(cfid);
+ close_shroot_lease(cfid);
}
/*
@@ -773,6 +789,7 @@ int open_shroot(unsigned int xid, struct cifs_tcon *tcon, struct cifs_fid *pfid)
/* BB TBD check to see if oplock level check can be removed below */
if (o_rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE) {
kref_get(&tcon->crfid.refcount);
+ tcon->crfid.has_lease = true;
smb2_parse_contexts(server, o_rsp,
&oparms.fid->epoch,
oparms.fid->lease_key, &oplock, NULL);
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 0ab6b1200288..9434f6dd8df3 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -1847,7 +1847,7 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
if ((tcon->need_reconnect) || (tcon->ses->need_reconnect))
return 0;
- close_shroot(&tcon->crfid);
+ close_shroot_lease(&tcon->crfid);
rc = smb2_plain_req_init(SMB2_TREE_DISCONNECT, tcon, (void **) &req,
&total_len);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index a18272c987fe..27d29f2eb6c8 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -70,6 +70,8 @@ extern int smb3_handle_read_data(struct TCP_Server_Info *server,
extern int open_shroot(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *pfid);
extern void close_shroot(struct cached_fid *cfid);
+extern void close_shroot_lease(struct cached_fid *cfid);
+extern void close_shroot_lease_locked(struct cached_fid *cfid);
extern void move_smb2_info_to_cifs(FILE_ALL_INFO *dst,
struct smb2_file_all_info *src);
extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
diff --git a/fs/crypto/keyring.c b/fs/crypto/keyring.c
index 040df1f5e1c8..40cca351273f 100644
--- a/fs/crypto/keyring.c
+++ b/fs/crypto/keyring.c
@@ -151,7 +151,7 @@ static struct key *search_fscrypt_keyring(struct key *keyring,
}
#define FSCRYPT_FS_KEYRING_DESCRIPTION_SIZE \
- (CONST_STRLEN("fscrypt-") + FIELD_SIZEOF(struct super_block, s_id))
+ (CONST_STRLEN("fscrypt-") + sizeof_field(struct super_block, s_id))
#define FSCRYPT_MK_DESCRIPTION_SIZE (2 * FSCRYPT_KEY_IDENTIFIER_SIZE + 1)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0ec4f270139f..00b4d15bb811 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -39,6 +39,8 @@
#include <linux/atomic.h>
#include <linux/prefetch.h>
+#include "internal.h"
+
/*
* How many user pages to map in one call to get_user_pages(). This determines
* the size of a structure in the slab cache
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index d31b6c72b476..dc1a1d5d825b 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -35,11 +35,11 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
spin_unlock(&inode->i_lock);
spin_unlock(&sb->s_inode_list_lock);
- cond_resched();
invalidate_mapping_pages(inode->i_mapping, 0, -1);
iput(toput_inode);
toput_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index a13a78725c57..b766c3ee5fa8 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -649,6 +649,8 @@ ssize_t erofs_listxattr(struct dentry *dentry,
struct listxattr_iter it;
ret = init_inode_xattrs(d_inode(dentry));
+ if (ret == -ENOATTR)
+ return 0;
if (ret)
return ret;
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index d4d4fdfac1a6..1ee04e76bbe0 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -133,10 +133,13 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
{
struct rb_node *node;
struct ext4_system_zone *entry;
+ struct ext4_system_blocks *system_blks;
int first = 1;
printk(KERN_INFO "System zones: ");
- node = rb_first(&sbi->system_blks->root);
+ rcu_read_lock();
+ system_blks = rcu_dereference(sbi->system_blks);
+ node = rb_first(&system_blks->root);
while (node) {
entry = rb_entry(node, struct ext4_system_zone, node);
printk(KERN_CONT "%s%llu-%llu", first ? "" : ", ",
@@ -144,6 +147,7 @@ static void debug_print_tree(struct ext4_sb_info *sbi)
first = 0;
node = rb_next(node);
}
+ rcu_read_unlock();
printk(KERN_CONT "\n");
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 9fdd2b269d61..9f00fc0bf21d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -72,6 +72,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
const char *error_msg = NULL;
const int rlen = ext4_rec_len_from_disk(de->rec_len,
dir->i_sb->s_blocksize);
+ const int next_offset = ((char *) de - buf) + rlen;
if (unlikely(rlen < EXT4_DIR_REC_LEN(1)))
error_msg = "rec_len is smaller than minimal";
@@ -79,8 +80,11 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
error_msg = "rec_len % 4 != 0";
else if (unlikely(rlen < EXT4_DIR_REC_LEN(de->name_len)))
error_msg = "rec_len is too small for name_len";
- else if (unlikely(((char *) de - buf) + rlen > size))
+ else if (unlikely(next_offset > size))
error_msg = "directory entry overrun";
+ else if (unlikely(next_offset > size - EXT4_DIR_REC_LEN(1) &&
+ next_offset != size))
+ error_msg = "directory entry too close to block end";
else if (unlikely(le32_to_cpu(de->inode) >
le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
error_msg = "inode out of bounds";
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index dc333e8e51e8..8ca4a23129aa 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -921,8 +921,8 @@ repeat_in_this_group:
if (!handle) {
BUG_ON(nblocks <= 0);
handle = __ext4_journal_start_sb(dir->i_sb, line_no,
- handle_type, nblocks,
- 0, 0);
+ handle_type, nblocks, 0,
+ ext4_trans_default_revoke_credits(sb));
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
ext4_std_error(sb, err);
diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
index 92a9da1774aa..bbce1c328d85 100644
--- a/fs/ext4/inode-test.c
+++ b/fs/ext4/inode-test.c
@@ -25,7 +25,7 @@
* For constructing the negative timestamp lower bound value.
* binary: 10000000 00000000 00000000 00000000
*/
-#define LOWER_MSB_1 (-0x80000000L)
+#define LOWER_MSB_1 (-(UPPER_MSB_0) - 1L) /* avoid overflow */
/*
* For constructing the negative timestamp upper bound value.
* binary: 11111111 11111111 11111111 11111111
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 28f28de0c1b6..629a25d999f0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5692,7 +5692,7 @@ int ext4_expand_extra_isize(struct inode *inode,
error = ext4_journal_get_write_access(handle, iloc->bh);
if (error) {
brelse(iloc->bh);
- goto out_stop;
+ goto out_unlock;
}
error = __ext4_expand_extra_isize(inode, new_extra_isize, iloc,
@@ -5702,8 +5702,8 @@ int ext4_expand_extra_isize(struct inode *inode,
if (!error)
error = rc;
+out_unlock:
ext4_write_unlock_xattr(inode, &no_expand);
-out_stop:
ext4_journal_stop(handle);
return error;
}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a856997d87b5..1cb42d940784 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2164,7 +2164,9 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
struct super_block *sb;
+#ifdef CONFIG_UNICODE
struct ext4_sb_info *sbi;
+#endif
struct ext4_filename fname;
int retval;
int dx_fallback=0;
@@ -2176,12 +2178,12 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
csum_size = sizeof(struct ext4_dir_entry_tail);
sb = dir->i_sb;
- sbi = EXT4_SB(sb);
blocksize = sb->s_blocksize;
if (!dentry->d_name.len)
return -EINVAL;
#ifdef CONFIG_UNICODE
+ sbi = EXT4_SB(sb);
if (ext4_has_strict_mode(sbi) && IS_CASEFOLDED(dir) &&
sbi->s_encoding && utf8_validate(sbi->s_encoding, &dentry->d_name))
return -EINVAL;
@@ -2822,7 +2824,7 @@ bool ext4_empty_dir(struct inode *inode)
{
unsigned int offset;
struct buffer_head *bh;
- struct ext4_dir_entry_2 *de, *de1;
+ struct ext4_dir_entry_2 *de;
struct super_block *sb;
if (ext4_has_inline_data(inode)) {
@@ -2847,19 +2849,25 @@ bool ext4_empty_dir(struct inode *inode)
return true;
de = (struct ext4_dir_entry_2 *) bh->b_data;
- de1 = ext4_next_entry(de, sb->s_blocksize);
- if (le32_to_cpu(de->inode) != inode->i_ino ||
- le32_to_cpu(de1->inode) == 0 ||
- strcmp(".", de->name) || strcmp("..", de1->name)) {
- ext4_warning_inode(inode, "directory missing '.' and/or '..'");
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
+ 0) ||
+ le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
+ ext4_warning_inode(inode, "directory missing '.'");
+ brelse(bh);
+ return true;
+ }
+ offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
+ de = ext4_next_entry(de, sb->s_blocksize);
+ if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
+ offset) ||
+ le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
+ ext4_warning_inode(inode, "directory missing '..'");
brelse(bh);
return true;
}
- offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
- ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
- de = ext4_next_entry(de1, sb->s_blocksize);
+ offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
while (offset < inode->i_size) {
- if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+ if (!(offset & (sb->s_blocksize - 1))) {
unsigned int lblock;
brelse(bh);
lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
@@ -2870,12 +2878,11 @@ bool ext4_empty_dir(struct inode *inode)
}
if (IS_ERR(bh))
return true;
- de = (struct ext4_dir_entry_2 *) bh->b_data;
}
+ de = (struct ext4_dir_entry_2 *) (bh->b_data +
+ (offset & (sb->s_blocksize - 1)));
if (ext4_check_dir_entry(inode, NULL, de, bh,
bh->b_data, bh->b_size, offset)) {
- de = (struct ext4_dir_entry_2 *)(bh->b_data +
- sb->s_blocksize);
offset = (offset | (sb->s_blocksize - 1)) + 1;
continue;
}
@@ -2884,7 +2891,6 @@ bool ext4_empty_dir(struct inode *inode)
return false;
}
offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
- de = ext4_next_entry(de, sb->s_blocksize);
}
brelse(bh);
return true;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1d82b56d9b11..2937a8873fe1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1900,6 +1900,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
}
sbi->s_commit_interval = HZ * arg;
} else if (token == Opt_debug_want_extra_isize) {
+ if ((arg & 1) ||
+ (arg < 4) ||
+ (arg > (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE))) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid want_extra_isize %d", arg);
+ return -1;
+ }
sbi->s_want_extra_isize = arg;
} else if (token == Opt_max_batch_time) {
sbi->s_max_batch_time = arg;
@@ -3554,40 +3561,6 @@ int ext4_calculate_overhead(struct super_block *sb)
return 0;
}
-static void ext4_clamp_want_extra_isize(struct super_block *sb)
-{
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- struct ext4_super_block *es = sbi->s_es;
- unsigned def_extra_isize = sizeof(struct ext4_inode) -
- EXT4_GOOD_OLD_INODE_SIZE;
-
- if (sbi->s_inode_size == EXT4_GOOD_OLD_INODE_SIZE) {
- sbi->s_want_extra_isize = 0;
- return;
- }
- if (sbi->s_want_extra_isize < 4) {
- sbi->s_want_extra_isize = def_extra_isize;
- if (ext4_has_feature_extra_isize(sb)) {
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_want_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_want_extra_isize);
- if (sbi->s_want_extra_isize <
- le16_to_cpu(es->s_min_extra_isize))
- sbi->s_want_extra_isize =
- le16_to_cpu(es->s_min_extra_isize);
- }
- }
- /* Check if enough inode space is available */
- if ((sbi->s_want_extra_isize > sbi->s_inode_size) ||
- (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
- sbi->s_inode_size)) {
- sbi->s_want_extra_isize = def_extra_isize;
- ext4_msg(sb, KERN_INFO,
- "required extra inode space not available");
- }
-}
-
static void ext4_set_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
@@ -3795,6 +3768,68 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
+ if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
+ sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
+ sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
+ } else {
+ sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
+ sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+ if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
+ ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
+ sbi->s_first_ino);
+ goto failed_mount;
+ }
+ if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
+ (!is_power_of_2(sbi->s_inode_size)) ||
+ (sbi->s_inode_size > blocksize)) {
+ ext4_msg(sb, KERN_ERR,
+ "unsupported inode size: %d",
+ sbi->s_inode_size);
+ goto failed_mount;
+ }
+ /*
+ * i_atime_extra is the last extra field available for
+ * [acm]times in struct ext4_inode. Checking for that
+ * field should suffice to ensure we have extra space
+ * for all three.
+ */
+ if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
+ sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
+ sb->s_time_gran = 1;
+ sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
+ } else {
+ sb->s_time_gran = NSEC_PER_SEC;
+ sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
+ }
+ sb->s_time_min = EXT4_TIMESTAMP_MIN;
+ }
+ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
+ sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
+ EXT4_GOOD_OLD_INODE_SIZE;
+ if (ext4_has_feature_extra_isize(sb)) {
+ unsigned v, max = (sbi->s_inode_size -
+ EXT4_GOOD_OLD_INODE_SIZE);
+
+ v = le16_to_cpu(es->s_want_extra_isize);
+ if (v > max) {
+ ext4_msg(sb, KERN_ERR,
+ "bad s_want_extra_isize: %d", v);
+ goto failed_mount;
+ }
+ if (sbi->s_want_extra_isize < v)
+ sbi->s_want_extra_isize = v;
+
+ v = le16_to_cpu(es->s_min_extra_isize);
+ if (v > max) {
+ ext4_msg(sb, KERN_ERR,
+ "bad s_min_extra_isize: %d", v);
+ goto failed_mount;
+ }
+ if (sbi->s_want_extra_isize < v)
+ sbi->s_want_extra_isize = v;
+ }
+ }
+
if (sbi->s_es->s_mount_opts[0]) {
char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
sizeof(sbi->s_es->s_mount_opts),
@@ -4033,42 +4068,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
has_huge_files);
sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
- if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
- sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
- sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
- } else {
- sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
- sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
- if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
- ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
- sbi->s_first_ino);
- goto failed_mount;
- }
- if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
- (!is_power_of_2(sbi->s_inode_size)) ||
- (sbi->s_inode_size > blocksize)) {
- ext4_msg(sb, KERN_ERR,
- "unsupported inode size: %d",
- sbi->s_inode_size);
- goto failed_mount;
- }
- /*
- * i_atime_extra is the last extra field available for [acm]times in
- * struct ext4_inode. Checking for that field should suffice to ensure
- * we have extra space for all three.
- */
- if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
- sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
- sb->s_time_gran = 1;
- sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
- } else {
- sb->s_time_gran = NSEC_PER_SEC;
- sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
- }
-
- sb->s_time_min = EXT4_TIMESTAMP_MIN;
- }
-
sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
if (ext4_has_feature_64bit(sb)) {
if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
@@ -4517,8 +4516,6 @@ no_journal:
} else if (ret)
goto failed_mount4a;
- ext4_clamp_want_extra_isize(sb);
-
ext4_set_resv_clusters(sb);
err = ext4_setup_system_zone(sb);
@@ -5306,8 +5303,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
- ext4_clamp_want_extra_isize(sb);
-
if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
test_opt(sb, JOURNAL_CHECKSUM)) {
ext4_msg(sb, KERN_ERR, "changing journal_checksum "
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d5c2a3158610..a66e425884d1 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1498,8 +1498,10 @@ static int __init init_hugetlbfs_fs(void)
/* other hstates are optional */
i = 0;
for_each_hstate(h) {
- if (i == default_hstate_idx)
+ if (i == default_hstate_idx) {
+ i++;
continue;
+ }
mnt = mount_one_hugetlbfs(h);
if (IS_ERR(mnt))
diff --git a/fs/inode.c b/fs/inode.c
index fef457a42882..96d62d97694e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -676,6 +676,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
struct inode *inode, *next;
LIST_HEAD(dispose);
+again:
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
spin_lock(&inode->i_lock);
@@ -698,6 +699,12 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
inode_lru_list_del(inode);
spin_unlock(&inode->i_lock);
list_add(&inode->i_lru, &dispose);
+ if (need_resched()) {
+ spin_unlock(&sb->s_inode_list_lock);
+ cond_resched();
+ dispose_list(&dispose);
+ goto again;
+ }
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/io-wq.c b/fs/io-wq.c
index 74b40506c5d9..541c8a3e0bbb 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -49,7 +49,6 @@ struct io_worker {
struct hlist_nulls_node nulls_node;
struct list_head all_list;
struct task_struct *task;
- wait_queue_head_t wait;
struct io_wqe *wqe;
struct io_wq_work *cur_work;
@@ -93,7 +92,6 @@ struct io_wqe {
struct io_wqe_acct acct[2];
struct hlist_nulls_head free_list;
- struct hlist_nulls_head busy_list;
struct list_head all_list;
struct io_wq *wq;
@@ -258,7 +256,7 @@ static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
if (io_worker_get(worker)) {
- wake_up(&worker->wait);
+ wake_up_process(worker->task);
io_worker_release(worker);
return true;
}
@@ -328,7 +326,6 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
if (worker->flags & IO_WORKER_F_FREE) {
worker->flags &= ~IO_WORKER_F_FREE;
hlist_nulls_del_init_rcu(&worker->nulls_node);
- hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->busy_list);
}
/*
@@ -366,7 +363,6 @@ static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
{
if (!(worker->flags & IO_WORKER_F_FREE)) {
worker->flags |= IO_WORKER_F_FREE;
- hlist_nulls_del_init_rcu(&worker->nulls_node);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
}
@@ -433,6 +429,8 @@ next:
if (signal_pending(current))
flush_signals(current);
+ cond_resched();
+
spin_lock_irq(&worker->lock);
worker->cur_work = work;
spin_unlock_irq(&worker->lock);
@@ -492,28 +490,46 @@ next:
} while (1);
}
+static inline void io_worker_spin_for_work(struct io_wqe *wqe)
+{
+ int i = 0;
+
+ while (++i < 1000) {
+ if (io_wqe_run_queue(wqe))
+ break;
+ if (need_resched())
+ break;
+ cpu_relax();
+ }
+}
+
static int io_wqe_worker(void *data)
{
struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
- DEFINE_WAIT(wait);
+ bool did_work;
io_worker_start(wqe, worker);
+ did_work = false;
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
- prepare_to_wait(&worker->wait, &wait, TASK_INTERRUPTIBLE);
-
+ set_current_state(TASK_INTERRUPTIBLE);
+loop:
+ if (did_work)
+ io_worker_spin_for_work(wqe);
spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
__set_current_state(TASK_RUNNING);
io_worker_handle_work(worker);
- continue;
+ did_work = true;
+ goto loop;
}
+ did_work = false;
/* drops the lock on success, retry */
if (__io_worker_idle(wqe, worker)) {
__release(&wqe->lock);
- continue;
+ goto loop;
}
spin_unlock_irq(&wqe->lock);
if (signal_pending(current))
@@ -526,8 +542,6 @@ static int io_wqe_worker(void *data)
break;
}
- finish_wait(&worker->wait, &wait);
-
if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
spin_lock_irq(&wqe->lock);
if (!wq_list_empty(&wqe->work_list))
@@ -589,7 +603,6 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
refcount_set(&worker->ref, 1);
worker->nulls_node.pprev = NULL;
- init_waitqueue_head(&worker->wait);
worker->wqe = wqe;
spin_lock_init(&worker->lock);
@@ -784,10 +797,6 @@ void io_wq_cancel_all(struct io_wq *wq)
set_bit(IO_WQ_BIT_CANCEL, &wq->state);
- /*
- * Browse both lists, as there's a gap between handing work off
- * to a worker and the worker putting itself on the busy_list
- */
rcu_read_lock();
for_each_node(node) {
struct io_wqe *wqe = wq->wqes[node];
@@ -934,7 +943,7 @@ static enum io_wq_cancel io_wqe_cancel_work(struct io_wqe *wqe,
/*
* Now check if a free (going busy) or busy worker has the work
* currently running. If we find it there, we'll return CANCEL_RUNNING
- * as an indication that we attempte to signal cancellation. The
+ * as an indication that we attempt to signal cancellation. The
* completion will run normally in this case.
*/
rcu_read_lock();
@@ -1035,7 +1044,6 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list);
INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
- INIT_HLIST_NULLS_HEAD(&wqe->busy_list, 1);
INIT_LIST_HEAD(&wqe->all_list);
}
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 7c333a28e2a7..3f5e356de980 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -35,7 +35,8 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
struct io_wq_work_list *list)
{
if (!list->first) {
- list->first = list->last = node;
+ list->last = node;
+ WRITE_ONCE(list->first, node);
} else {
list->last->next = node;
list->last = node;
@@ -47,7 +48,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
struct io_wq_work_node *prev)
{
if (node == list->first)
- list->first = node->next;
+ WRITE_ONCE(list->first, node->next);
if (node == list->last)
list->last = prev;
if (prev)
@@ -58,7 +59,7 @@ static inline void wq_node_del(struct io_wq_work_list *list,
#define wq_list_for_each(pos, prv, head) \
for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next)
-#define wq_list_empty(list) ((list)->first == NULL)
+#define wq_list_empty(list) (READ_ONCE((list)->first) == NULL)
#define INIT_WQ_LIST(list) do { \
(list)->first = NULL; \
(list)->last = NULL; \
@@ -119,6 +120,10 @@ static inline void io_wq_worker_sleeping(struct task_struct *tsk)
static inline void io_wq_worker_running(struct task_struct *tsk)
{
}
-#endif /* CONFIG_IO_WQ */
+#endif
-#endif /* INTERNAL_IO_WQ_H */
+static inline bool io_wq_current_is_worker(void)
+{
+ return in_task() && (current->flags & PF_IO_WORKER);
+}
+#endif
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 405be10da73d..562e3a1a1bf9 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -289,11 +289,14 @@ struct io_ring_ctx {
*/
struct io_poll_iocb {
struct file *file;
- struct wait_queue_head *head;
+ union {
+ struct wait_queue_head *head;
+ u64 addr;
+ };
__poll_t events;
bool done;
bool canceled;
- struct wait_queue_entry *wait;
+ struct wait_queue_entry wait;
};
struct io_timeout_data {
@@ -304,6 +307,51 @@ struct io_timeout_data {
u32 seq_offset;
};
+struct io_accept {
+ struct file *file;
+ struct sockaddr __user *addr;
+ int __user *addr_len;
+ int flags;
+};
+
+struct io_sync {
+ struct file *file;
+ loff_t len;
+ loff_t off;
+ int flags;
+};
+
+struct io_cancel {
+ struct file *file;
+ u64 addr;
+};
+
+struct io_timeout {
+ struct file *file;
+ u64 addr;
+ int flags;
+ unsigned count;
+};
+
+struct io_rw {
+ /* NOTE: kiocb has the file as the first member, so don't do it here */
+ struct kiocb kiocb;
+ u64 addr;
+ u64 len;
+};
+
+struct io_connect {
+ struct file *file;
+ struct sockaddr __user *addr;
+ int addr_len;
+};
+
+struct io_sr_msg {
+ struct file *file;
+ struct user_msghdr __user *msg;
+ int msg_flags;
+};
+
struct io_async_connect {
struct sockaddr_storage address;
};
@@ -323,7 +371,6 @@ struct io_async_rw {
};
struct io_async_ctx {
- struct io_uring_sqe sqe;
union {
struct io_async_rw rw;
struct io_async_msghdr msg;
@@ -341,17 +388,23 @@ struct io_async_ctx {
struct io_kiocb {
union {
struct file *file;
- struct kiocb rw;
+ struct io_rw rw;
struct io_poll_iocb poll;
+ struct io_accept accept;
+ struct io_sync sync;
+ struct io_cancel cancel;
+ struct io_timeout timeout;
+ struct io_connect connect;
+ struct io_sr_msg sr_msg;
};
- const struct io_uring_sqe *sqe;
struct io_async_ctx *io;
struct file *ring_file;
int ring_fd;
bool has_user;
bool in_async;
bool needs_fixed_file;
+ u8 opcode;
struct io_ring_ctx *ctx;
union {
@@ -377,6 +430,7 @@ struct io_kiocb {
#define REQ_F_TIMEOUT_NOSEQ 8192 /* no timeout sequence */
#define REQ_F_INFLIGHT 16384 /* on inflight list */
#define REQ_F_COMP_LOCKED 32768 /* completion under lock */
+#define REQ_F_HARDLINK 65536 /* doesn't sever on completion < 0 */
u64 user_data;
u32 result;
u32 sequence;
@@ -563,12 +617,10 @@ static void __io_commit_cqring(struct io_ring_ctx *ctx)
}
}
-static inline bool io_sqe_needs_user(const struct io_uring_sqe *sqe)
+static inline bool io_req_needs_user(struct io_kiocb *req)
{
- u8 opcode = READ_ONCE(sqe->opcode);
-
- return !(opcode == IORING_OP_READ_FIXED ||
- opcode == IORING_OP_WRITE_FIXED);
+ return !(req->opcode == IORING_OP_READ_FIXED ||
+ req->opcode == IORING_OP_WRITE_FIXED);
}
static inline bool io_prep_async_work(struct io_kiocb *req,
@@ -576,31 +628,31 @@ static inline bool io_prep_async_work(struct io_kiocb *req,
{
bool do_hashed = false;
- if (req->sqe) {
- switch (req->sqe->opcode) {
- case IORING_OP_WRITEV:
- case IORING_OP_WRITE_FIXED:
+ switch (req->opcode) {
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ /* only regular files should be hashed for writes */
+ if (req->flags & REQ_F_ISREG)
do_hashed = true;
- /* fall-through */
- case IORING_OP_READV:
- case IORING_OP_READ_FIXED:
- case IORING_OP_SENDMSG:
- case IORING_OP_RECVMSG:
- case IORING_OP_ACCEPT:
- case IORING_OP_POLL_ADD:
- case IORING_OP_CONNECT:
- /*
- * We know REQ_F_ISREG is not set on some of these
- * opcodes, but this enables us to keep the check in
- * just one place.
- */
- if (!(req->flags & REQ_F_ISREG))
- req->work.flags |= IO_WQ_WORK_UNBOUND;
- break;
- }
- if (io_sqe_needs_user(req->sqe))
- req->work.flags |= IO_WQ_WORK_NEEDS_USER;
+ /* fall-through */
+ case IORING_OP_READV:
+ case IORING_OP_READ_FIXED:
+ case IORING_OP_SENDMSG:
+ case IORING_OP_RECVMSG:
+ case IORING_OP_ACCEPT:
+ case IORING_OP_POLL_ADD:
+ case IORING_OP_CONNECT:
+ /*
+ * We know REQ_F_ISREG is not set on some of these
+ * opcodes, but this enables us to keep the check in
+ * just one place.
+ */
+ if (!(req->flags & REQ_F_ISREG))
+ req->work.flags |= IO_WQ_WORK_UNBOUND;
+ break;
}
+ if (io_req_needs_user(req))
+ req->work.flags |= IO_WQ_WORK_NEEDS_USER;
*link = io_prep_linked_timeout(req);
return do_hashed;
@@ -969,7 +1021,7 @@ static void io_fail_links(struct io_kiocb *req)
trace_io_uring_fail_link(req, link);
if ((req->flags & REQ_F_LINK_TIMEOUT) &&
- link->sqe->opcode == IORING_OP_LINK_TIMEOUT) {
+ link->opcode == IORING_OP_LINK_TIMEOUT) {
io_link_cancel_timeout(link);
} else {
io_cqring_fill_event(link, -ECANCELED);
@@ -1145,7 +1197,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
ret = 0;
list_for_each_entry_safe(req, tmp, &ctx->poll_list, list) {
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
/*
* Move completed entries to our local list. If we find a
@@ -1175,7 +1227,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
}
/*
- * Poll for a mininum of 'min' events. Note that if min == 0 we consider that a
+ * Poll for a minimum of 'min' events. Note that if min == 0 we consider that a
* non-spinning poll check - we'll still enter the driver poll loop, but only
* as a non-spinning completion check.
*/
@@ -1292,21 +1344,27 @@ static void kiocb_end_write(struct io_kiocb *req)
file_end_write(req->file);
}
+static inline void req_set_fail_links(struct io_kiocb *req)
+{
+ if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
+ req->flags |= REQ_F_FAIL_LINK;
+}
+
static void io_complete_rw_common(struct kiocb *kiocb, long res)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
- if ((req->flags & REQ_F_LINK) && res != req->result)
- req->flags |= REQ_F_FAIL_LINK;
+ if (res != req->result)
+ req_set_fail_links(req);
io_cqring_add_event(req, res);
}
static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
io_complete_rw_common(kiocb, res);
io_put_req(req);
@@ -1314,7 +1372,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
struct io_kiocb *nxt = NULL;
io_complete_rw_common(kiocb, res);
@@ -1325,13 +1383,13 @@ static struct io_kiocb *__io_complete_rw(struct kiocb *kiocb, long res)
static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
- struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw);
+ struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
- if ((req->flags & REQ_F_LINK) && res != req->result)
- req->flags |= REQ_F_FAIL_LINK;
+ if (res != req->result)
+ req_set_fail_links(req);
req->result = res;
if (res != -EAGAIN)
req->flags |= REQ_F_IOPOLL_COMPLETED;
@@ -1359,7 +1417,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
list_req = list_first_entry(&ctx->poll_list, struct io_kiocb,
list);
- if (list_req->rw.ki_filp != req->rw.ki_filp)
+ if (list_req->file != req->file)
ctx->poll_multi_file = true;
}
@@ -1422,7 +1480,7 @@ static bool io_file_supports_async(struct file *file)
{
umode_t mode = file_inode(file)->i_mode;
- if (S_ISBLK(mode) || S_ISCHR(mode))
+ if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISSOCK(mode))
return true;
if (S_ISREG(mode) && file->f_op != &io_uring_fops)
return true;
@@ -1430,11 +1488,11 @@ static bool io_file_supports_async(struct file *file)
return false;
}
-static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
+static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_ring_ctx *ctx = req->ctx;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
unsigned ioprio;
int ret;
@@ -1483,6 +1541,12 @@ static int io_prep_rw(struct io_kiocb *req, bool force_nonblock)
return -EINVAL;
kiocb->ki_complete = io_complete_rw;
}
+
+ req->rw.addr = READ_ONCE(sqe->addr);
+ req->rw.len = READ_ONCE(sqe->len);
+ /* we own ->private, reuse it for the buffer index */
+ req->rw.kiocb.private = (void *) (unsigned long)
+ READ_ONCE(sqe->buf_index);
return 0;
}
@@ -1516,11 +1580,11 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, struct io_kiocb **nxt,
io_rw_done(kiocb, ret);
}
-static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
- const struct io_uring_sqe *sqe,
+static ssize_t io_import_fixed(struct io_kiocb *req, int rw,
struct iov_iter *iter)
{
- size_t len = READ_ONCE(sqe->len);
+ struct io_ring_ctx *ctx = req->ctx;
+ size_t len = req->rw.len;
struct io_mapped_ubuf *imu;
unsigned index, buf_index;
size_t offset;
@@ -1530,13 +1594,13 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
if (unlikely(!ctx->user_bufs))
return -EFAULT;
- buf_index = READ_ONCE(sqe->buf_index);
+ buf_index = (unsigned long) req->rw.kiocb.private;
if (unlikely(buf_index >= ctx->nr_user_bufs))
return -EFAULT;
index = array_index_nospec(buf_index, ctx->nr_user_bufs);
imu = &ctx->user_bufs[index];
- buf_addr = READ_ONCE(sqe->addr);
+ buf_addr = req->rw.addr;
/* overflow */
if (buf_addr + len < buf_addr)
@@ -1593,25 +1657,20 @@ static ssize_t io_import_fixed(struct io_ring_ctx *ctx, int rw,
static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
struct iovec **iovec, struct iov_iter *iter)
{
- const struct io_uring_sqe *sqe = req->sqe;
- void __user *buf = u64_to_user_ptr(READ_ONCE(sqe->addr));
- size_t sqe_len = READ_ONCE(sqe->len);
+ void __user *buf = u64_to_user_ptr(req->rw.addr);
+ size_t sqe_len = req->rw.len;
u8 opcode;
- /*
- * We're reading ->opcode for the second time, but the first read
- * doesn't care whether it's _FIXED or not, so it doesn't matter
- * whether ->opcode changes concurrently. The first read does care
- * about whether it is a READ or a WRITE, so we don't trust this read
- * for that purpose and instead let the caller pass in the read/write
- * flag.
- */
- opcode = READ_ONCE(sqe->opcode);
+ opcode = req->opcode;
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
*iovec = NULL;
- return io_import_fixed(req->ctx, rw, sqe, iter);
+ return io_import_fixed(req, rw, iter);
}
+ /* buffer index only valid with fixed read/write */
+ if (req->rw.kiocb.private)
+ return -EINVAL;
+
if (req->io) {
struct io_async_rw *iorw = &req->io->rw;
@@ -1692,7 +1751,7 @@ static ssize_t loop_rw_iter(int rw, struct file *file, struct kiocb *kiocb,
return ret;
}
-static void io_req_map_io(struct io_kiocb *req, ssize_t io_size,
+static void io_req_map_rw(struct io_kiocb *req, ssize_t io_size,
struct iovec *iovec, struct iovec *fast_iov,
struct iov_iter *iter)
{
@@ -1706,57 +1765,81 @@ static void io_req_map_io(struct io_kiocb *req, ssize_t io_size,
}
}
-static int io_setup_async_io(struct io_kiocb *req, ssize_t io_size,
+static int io_alloc_async_ctx(struct io_kiocb *req)
+{
+ req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
+ return req->io == NULL;
+}
+
+static void io_rw_async(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct iovec *iov = NULL;
+
+ if (req->io->rw.iov != req->io->rw.fast_iov)
+ iov = req->io->rw.iov;
+ io_wq_submit_work(workptr);
+ kfree(iov);
+}
+
+static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
struct iovec *iovec, struct iovec *fast_iov,
struct iov_iter *iter)
{
- req->io = kmalloc(sizeof(*req->io), GFP_KERNEL);
- if (req->io) {
- io_req_map_io(req, io_size, iovec, fast_iov, iter);
- memcpy(&req->io->sqe, req->sqe, sizeof(req->io->sqe));
- req->sqe = &req->io->sqe;
- return 0;
- }
+ if (!req->io && io_alloc_async_ctx(req))
+ return -ENOMEM;
- return -ENOMEM;
+ io_req_map_rw(req, io_size, iovec, fast_iov, iter);
+ req->work.func = io_rw_async;
+ return 0;
}
-static int io_read_prep(struct io_kiocb *req, struct iovec **iovec,
- struct iov_iter *iter, bool force_nonblock)
+static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
+ struct io_async_ctx *io;
+ struct iov_iter iter;
ssize_t ret;
- ret = io_prep_rw(req, force_nonblock);
+ ret = io_prep_rw(req, sqe, force_nonblock);
if (ret)
return ret;
if (unlikely(!(req->file->f_mode & FMODE_READ)))
return -EBADF;
- return io_import_iovec(READ, req, iovec, iter);
+ if (!req->io)
+ return 0;
+
+ io = req->io;
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(READ, req, &io->rw.iov, &iter);
+ req->io = io;
+ if (ret < 0)
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
}
static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter;
- struct file *file;
size_t iov_count;
ssize_t io_size, ret;
- if (!req->io) {
- ret = io_read_prep(req, &iovec, &iter, force_nonblock);
- if (ret < 0)
- return ret;
- } else {
- ret = io_import_iovec(READ, req, &iovec, &iter);
- if (ret < 0)
- return ret;
- }
+ ret = io_import_iovec(READ, req, &iovec, &iter);
+ if (ret < 0)
+ return ret;
+
+ /* Ensure we clear previously set non-block flag */
+ if (!force_nonblock)
+ req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
- file = req->file;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1765,20 +1848,20 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
* If the file doesn't support async, mark it as REQ_F_MUST_PUNT so
* we know to async punt it even if it was opened O_NONBLOCK
*/
- if (force_nonblock && !io_file_supports_async(file)) {
+ if (force_nonblock && !io_file_supports_async(req->file)) {
req->flags |= REQ_F_MUST_PUNT;
goto copy_iov;
}
iov_count = iov_iter_count(&iter);
- ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
- if (file->f_op->read_iter)
- ret2 = call_read_iter(file, kiocb, &iter);
+ if (req->file->f_op->read_iter)
+ ret2 = call_read_iter(req->file, kiocb, &iter);
else
- ret2 = loop_rw_iter(READ, file, kiocb, &iter);
+ ret2 = loop_rw_iter(READ, req->file, kiocb, &iter);
/*
* In case of a short read, punt to async. This can happen
@@ -1797,7 +1880,7 @@ static int io_read(struct io_kiocb *req, struct io_kiocb **nxt,
kiocb_done(kiocb, ret2, nxt, req->in_async);
} else {
copy_iov:
- ret = io_setup_async_io(req, io_size, iovec,
+ ret = io_setup_async_rw(req, io_size, iovec,
inline_vecs, &iter);
if (ret)
goto out_free;
@@ -1805,46 +1888,57 @@ copy_iov:
}
}
out_free:
- kfree(iovec);
+ if (!io_wq_current_is_worker())
+ kfree(iovec);
return ret;
}
-static int io_write_prep(struct io_kiocb *req, struct iovec **iovec,
- struct iov_iter *iter, bool force_nonblock)
+static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ bool force_nonblock)
{
+ struct io_async_ctx *io;
+ struct iov_iter iter;
ssize_t ret;
- ret = io_prep_rw(req, force_nonblock);
+ ret = io_prep_rw(req, sqe, force_nonblock);
if (ret)
return ret;
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF;
- return io_import_iovec(WRITE, req, iovec, iter);
+ if (!req->io)
+ return 0;
+
+ io = req->io;
+ io->rw.iov = io->rw.fast_iov;
+ req->io = NULL;
+ ret = io_import_iovec(WRITE, req, &io->rw.iov, &iter);
+ req->io = io;
+ if (ret < 0)
+ return ret;
+
+ io_req_map_rw(req, ret, io->rw.iov, io->rw.fast_iov, &iter);
+ return 0;
}
static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
bool force_nonblock)
{
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct kiocb *kiocb = &req->rw;
+ struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter iter;
- struct file *file;
size_t iov_count;
ssize_t ret, io_size;
- if (!req->io) {
- ret = io_write_prep(req, &iovec, &iter, force_nonblock);
- if (ret < 0)
- return ret;
- } else {
- ret = io_import_iovec(WRITE, req, &iovec, &iter);
- if (ret < 0)
- return ret;
- }
+ ret = io_import_iovec(WRITE, req, &iovec, &iter);
+ if (ret < 0)
+ return ret;
+
+ /* Ensure we clear previously set non-block flag */
+ if (!force_nonblock)
+ req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT;
- file = kiocb->ki_filp;
io_size = ret;
if (req->flags & REQ_F_LINK)
req->result = io_size;
@@ -1858,11 +1952,13 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
goto copy_iov;
}
- if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT))
+ /* file path doesn't support NOWAIT for non-direct_IO */
+ if (force_nonblock && !(kiocb->ki_flags & IOCB_DIRECT) &&
+ (req->flags & REQ_F_ISREG))
goto copy_iov;
iov_count = iov_iter_count(&iter);
- ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count);
+ ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
if (!ret) {
ssize_t ret2;
@@ -1874,22 +1970,22 @@ static int io_write(struct io_kiocb *req, struct io_kiocb **nxt,
* we return to userspace.
*/
if (req->flags & REQ_F_ISREG) {
- __sb_start_write(file_inode(file)->i_sb,
+ __sb_start_write(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE, true);
- __sb_writers_release(file_inode(file)->i_sb,
+ __sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
kiocb->ki_flags |= IOCB_WRITE;
- if (file->f_op->write_iter)
- ret2 = call_write_iter(file, kiocb, &iter);
+ if (req->file->f_op->write_iter)
+ ret2 = call_write_iter(req->file, kiocb, &iter);
else
- ret2 = loop_rw_iter(WRITE, file, kiocb, &iter);
+ ret2 = loop_rw_iter(WRITE, req->file, kiocb, &iter);
if (!force_nonblock || ret2 != -EAGAIN) {
kiocb_done(kiocb, ret2, nxt, req->in_async);
} else {
copy_iov:
- ret = io_setup_async_io(req, io_size, iovec,
+ ret = io_setup_async_rw(req, io_size, iovec,
inline_vecs, &iter);
if (ret)
goto out_free;
@@ -1897,7 +1993,8 @@ copy_iov:
}
}
out_free:
- kfree(iovec);
+ if (!io_wq_current_is_worker())
+ kfree(iovec);
return ret;
}
@@ -1928,45 +2025,70 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
return -EINVAL;
+ req->sync.flags = READ_ONCE(sqe->fsync_flags);
+ if (unlikely(req->sync.flags & ~IORING_FSYNC_DATASYNC))
+ return -EINVAL;
+
+ req->sync.off = READ_ONCE(sqe->off);
+ req->sync.len = READ_ONCE(sqe->len);
return 0;
}
-static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt, bool force_nonblock)
+static bool io_req_cancelled(struct io_kiocb *req)
{
- loff_t sqe_off = READ_ONCE(sqe->off);
- loff_t sqe_len = READ_ONCE(sqe->len);
- loff_t end = sqe_off + sqe_len;
- unsigned fsync_flags;
+ if (req->work.flags & IO_WQ_WORK_CANCEL) {
+ req_set_fail_links(req);
+ io_cqring_add_event(req, -ECANCELED);
+ io_put_req(req);
+ return true;
+ }
+
+ return false;
+}
+
+static void io_fsync_finish(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ loff_t end = req->sync.off + req->sync.len;
+ struct io_kiocb *nxt = NULL;
int ret;
- fsync_flags = READ_ONCE(sqe->fsync_flags);
- if (unlikely(fsync_flags & ~IORING_FSYNC_DATASYNC))
- return -EINVAL;
+ if (io_req_cancelled(req))
+ return;
- ret = io_prep_fsync(req, sqe);
- if (ret)
- return ret;
+ ret = vfs_fsync_range(req->file, req->sync.off,
+ end > 0 ? end : LLONG_MAX,
+ req->sync.flags & IORING_FSYNC_DATASYNC);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, &nxt);
+ if (nxt)
+ *workptr = &nxt->work;
+}
+
+static int io_fsync(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+ struct io_wq_work *work, *old_work;
/* fsync always requires a blocking context */
- if (force_nonblock)
+ if (force_nonblock) {
+ io_put_req(req);
+ req->work.func = io_fsync_finish;
return -EAGAIN;
+ }
- ret = vfs_fsync_range(req->rw.ki_filp, sqe_off,
- end > 0 ? end : LLONG_MAX,
- fsync_flags & IORING_FSYNC_DATASYNC);
-
- if (ret < 0 && (req->flags & REQ_F_LINK))
- req->flags |= REQ_F_FAIL_LINK;
- io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
+ work = old_work = &req->work;
+ io_fsync_finish(&work);
+ if (work && work != old_work)
+ *nxt = container_of(work, struct io_kiocb, work);
return 0;
}
static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
- int ret = 0;
if (!req->file)
return -EBADF;
@@ -1976,59 +2098,88 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
return -EINVAL;
- return ret;
+ req->sync.off = READ_ONCE(sqe->off);
+ req->sync.len = READ_ONCE(sqe->len);
+ req->sync.flags = READ_ONCE(sqe->sync_range_flags);
+ return 0;
}
-static int io_sync_file_range(struct io_kiocb *req,
- const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt,
- bool force_nonblock)
+static void io_sync_file_range_finish(struct io_wq_work **workptr)
{
- loff_t sqe_off;
- loff_t sqe_len;
- unsigned flags;
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct io_kiocb *nxt = NULL;
int ret;
- ret = io_prep_sfr(req, sqe);
- if (ret)
- return ret;
+ if (io_req_cancelled(req))
+ return;
+
+ ret = sync_file_range(req->file, req->sync.off, req->sync.len,
+ req->sync.flags);
+ if (ret < 0)
+ req_set_fail_links(req);
+ io_cqring_add_event(req, ret);
+ io_put_req_find_next(req, &nxt);
+ if (nxt)
+ *workptr = &nxt->work;
+}
+
+static int io_sync_file_range(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+ struct io_wq_work *work, *old_work;
/* sync_file_range always requires a blocking context */
- if (force_nonblock)
+ if (force_nonblock) {
+ io_put_req(req);
+ req->work.func = io_sync_file_range_finish;
return -EAGAIN;
+ }
- sqe_off = READ_ONCE(sqe->off);
- sqe_len = READ_ONCE(sqe->len);
- flags = READ_ONCE(sqe->sync_range_flags);
+ work = old_work = &req->work;
+ io_sync_file_range_finish(&work);
+ if (work && work != old_work)
+ *nxt = container_of(work, struct io_kiocb, work);
+ return 0;
+}
- ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags);
+#if defined(CONFIG_NET)
+static void io_sendrecv_async(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct iovec *iov = NULL;
- if (ret < 0 && (req->flags & REQ_F_LINK))
- req->flags |= REQ_F_FAIL_LINK;
- io_cqring_add_event(req, ret);
- io_put_req_find_next(req, nxt);
- return 0;
+ if (req->io->rw.iov != req->io->rw.fast_iov)
+ iov = req->io->msg.iov;
+ io_wq_submit_work(workptr);
+ kfree(iov);
}
+#endif
-static int io_sendmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct user_msghdr __user *msg;
- unsigned flags;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_async_ctx *io = req->io;
- flags = READ_ONCE(sqe->msg_flags);
- msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
- return sendmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.iov);
+ sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ if (!io)
+ return 0;
+
+ io->msg.iov = io->msg.fast_iov;
+ return sendmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ &io->msg.iov);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
-static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt, bool force_nonblock)
+static int io_sendmsg(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
{
#if defined(CONFIG_NET)
+ struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -2037,50 +2188,55 @@ static int io_sendmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
sock = sock_from_file(req->file, &ret);
if (sock) {
- struct io_async_ctx io, *copy;
+ struct io_async_ctx io;
struct sockaddr_storage addr;
- struct msghdr *kmsg;
unsigned flags;
- flags = READ_ONCE(sqe->msg_flags);
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
if (req->io) {
- kmsg = &req->io->msg.msg;
- kmsg->msg_name = &addr;
+ kmsg = &req->io->msg;
+ kmsg->msg.msg_name = &addr;
+ /* if iov is set, it's allocated already */
+ if (!kmsg->iov)
+ kmsg->iov = kmsg->fast_iov;
+ kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
- kmsg = &io.msg.msg;
- kmsg->msg_name = &addr;
+ struct io_sr_msg *sr = &req->sr_msg;
+
+ kmsg = &io.msg;
+ kmsg->msg.msg_name = &addr;
+
io.msg.iov = io.msg.fast_iov;
- ret = io_sendmsg_prep(req, &io);
+ ret = sendmsg_copy_msghdr(&io.msg.msg, sr->msg,
+ sr->msg_flags, &io.msg.iov);
if (ret)
- goto out;
+ return ret;
}
- ret = __sys_sendmsg_sock(sock, kmsg, flags);
+ flags = req->sr_msg.msg_flags;
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
+ ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
if (force_nonblock && ret == -EAGAIN) {
- copy = kmalloc(sizeof(*copy), GFP_KERNEL);
- if (!copy) {
- ret = -ENOMEM;
- goto out;
- }
- memcpy(&copy->msg, &io.msg, sizeof(copy->msg));
- req->io = copy;
- memcpy(&req->io->sqe, req->sqe, sizeof(*req->sqe));
- req->sqe = &req->io->sqe;
- return ret;
+ if (req->io)
+ return -EAGAIN;
+ if (io_alloc_async_ctx(req))
+ return -ENOMEM;
+ memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
+ req->work.func = io_sendrecv_async;
+ return -EAGAIN;
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
}
-out:
+ if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
+ kfree(kmsg->iov);
io_cqring_add_event(req, ret);
- if (ret < 0 && (req->flags & REQ_F_LINK))
- req->flags |= REQ_F_FAIL_LINK;
+ if (ret < 0)
+ req_set_fail_links(req);
io_put_req_find_next(req, nxt);
return 0;
#else
@@ -2088,26 +2244,32 @@ out:
#endif
}
-static int io_recvmsg_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_recvmsg_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct user_msghdr __user *msg;
- unsigned flags;
+ struct io_sr_msg *sr = &req->sr_msg;
+ struct io_async_ctx *io = req->io;
- flags = READ_ONCE(sqe->msg_flags);
- msg = (struct user_msghdr __user *)(unsigned long) READ_ONCE(sqe->addr);
- return recvmsg_copy_msghdr(&io->msg.msg, msg, flags, &io->msg.uaddr,
- &io->msg.iov);
+ sr->msg_flags = READ_ONCE(sqe->msg_flags);
+ sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
+
+ if (!io)
+ return 0;
+
+ io->msg.iov = io->msg.fast_iov;
+ return recvmsg_copy_msghdr(&io->msg.msg, sr->msg, sr->msg_flags,
+ &io->msg.uaddr, &io->msg.iov);
#else
- return 0;
+ return -EOPNOTSUPP;
#endif
}
-static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt, bool force_nonblock)
+static int io_recvmsg(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
{
#if defined(CONFIG_NET)
+ struct io_async_msghdr *kmsg = NULL;
struct socket *sock;
int ret;
@@ -2116,53 +2278,57 @@ static int io_recvmsg(struct io_kiocb *req, const struct io_uring_sqe *sqe,
sock = sock_from_file(req->file, &ret);
if (sock) {
- struct user_msghdr __user *msg;
- struct io_async_ctx io, *copy;
+ struct io_async_ctx io;
struct sockaddr_storage addr;
- struct msghdr *kmsg;
unsigned flags;
- flags = READ_ONCE(sqe->msg_flags);
- if (flags & MSG_DONTWAIT)
- req->flags |= REQ_F_NOWAIT;
- else if (force_nonblock)
- flags |= MSG_DONTWAIT;
-
- msg = (struct user_msghdr __user *) (unsigned long)
- READ_ONCE(sqe->addr);
if (req->io) {
- kmsg = &req->io->msg.msg;
- kmsg->msg_name = &addr;
+ kmsg = &req->io->msg;
+ kmsg->msg.msg_name = &addr;
+ /* if iov is set, it's allocated already */
+ if (!kmsg->iov)
+ kmsg->iov = kmsg->fast_iov;
+ kmsg->msg.msg_iter.iov = kmsg->iov;
} else {
- kmsg = &io.msg.msg;
- kmsg->msg_name = &addr;
+ struct io_sr_msg *sr = &req->sr_msg;
+
+ kmsg = &io.msg;
+ kmsg->msg.msg_name = &addr;
+
io.msg.iov = io.msg.fast_iov;
- ret = io_recvmsg_prep(req, &io);
+ ret = recvmsg_copy_msghdr(&io.msg.msg, sr->msg,
+ sr->msg_flags, &io.msg.uaddr,
+ &io.msg.iov);
if (ret)
- goto out;
+ return ret;
}
- ret = __sys_recvmsg_sock(sock, kmsg, msg, io.msg.uaddr, flags);
+ flags = req->sr_msg.msg_flags;
+ if (flags & MSG_DONTWAIT)
+ req->flags |= REQ_F_NOWAIT;
+ else if (force_nonblock)
+ flags |= MSG_DONTWAIT;
+
+ ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.msg,
+ kmsg->uaddr, flags);
if (force_nonblock && ret == -EAGAIN) {
- copy = kmalloc(sizeof(*copy), GFP_KERNEL);
- if (!copy) {
- ret = -ENOMEM;
- goto out;
- }
- memcpy(copy, &io, sizeof(*copy));
- req->io = copy;
- memcpy(&req->io->sqe, req->sqe, sizeof(*req->sqe));
- req->sqe = &req->io->sqe;
- return ret;
+ if (req->io)
+ return -EAGAIN;
+ if (io_alloc_async_ctx(req))
+ return -ENOMEM;
+ memcpy(&req->io->msg, &io.msg, sizeof(io.msg));
+ req->work.func = io_sendrecv_async;
+ return -EAGAIN;
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
}
-out:
+ if (!io_wq_current_is_worker() && kmsg && kmsg->iov != kmsg->fast_iov)
+ kfree(kmsg->iov);
io_cqring_add_event(req, ret);
- if (ret < 0 && (req->flags & REQ_F_LINK))
- req->flags |= REQ_F_FAIL_LINK;
+ if (ret < 0)
+ req_set_fail_links(req);
io_put_req_find_next(req, nxt);
return 0;
#else
@@ -2170,101 +2336,141 @@ out:
#endif
}
-static int io_accept(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt, bool force_nonblock)
+static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- struct sockaddr __user *addr;
- int __user *addr_len;
- unsigned file_flags;
- int flags, ret;
+ struct io_accept *accept = &req->accept;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->len || sqe->buf_index)
return -EINVAL;
- addr = (struct sockaddr __user *) (unsigned long) READ_ONCE(sqe->addr);
- addr_len = (int __user *) (unsigned long) READ_ONCE(sqe->addr2);
- flags = READ_ONCE(sqe->accept_flags);
- file_flags = force_nonblock ? O_NONBLOCK : 0;
+ accept->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ accept->addr_len = u64_to_user_ptr(READ_ONCE(sqe->addr2));
+ accept->flags = READ_ONCE(sqe->accept_flags);
+ return 0;
+#else
+ return -EOPNOTSUPP;
+#endif
+}
- ret = __sys_accept4_file(req->file, file_flags, addr, addr_len, flags);
- if (ret == -EAGAIN && force_nonblock) {
- req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+#if defined(CONFIG_NET)
+static int __io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+ struct io_accept *accept = &req->accept;
+ unsigned file_flags;
+ int ret;
+
+ file_flags = force_nonblock ? O_NONBLOCK : 0;
+ ret = __sys_accept4_file(req->file, file_flags, accept->addr,
+ accept->addr_len, accept->flags);
+ if (ret == -EAGAIN && force_nonblock)
return -EAGAIN;
- }
if (ret == -ERESTARTSYS)
ret = -EINTR;
- if (ret < 0 && (req->flags & REQ_F_LINK))
- req->flags |= REQ_F_FAIL_LINK;
+ if (ret < 0)
+ req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req_find_next(req, nxt);
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
-static int io_connect_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static void io_accept_finish(struct io_wq_work **workptr)
+{
+ struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+ struct io_kiocb *nxt = NULL;
+
+ if (io_req_cancelled(req))
+ return;
+ __io_accept(req, &nxt, false);
+ if (nxt)
+ *workptr = &nxt->work;
+}
+#endif
+
+static int io_accept(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
{
#if defined(CONFIG_NET)
- const struct io_uring_sqe *sqe = req->sqe;
- struct sockaddr __user *addr;
- int addr_len;
+ int ret;
- addr = (struct sockaddr __user *) (unsigned long) READ_ONCE(sqe->addr);
- addr_len = READ_ONCE(sqe->addr2);
- return move_addr_to_kernel(addr, addr_len, &io->connect.address);
-#else
+ ret = __io_accept(req, nxt, force_nonblock);
+ if (ret == -EAGAIN && force_nonblock) {
+ req->work.func = io_accept_finish;
+ req->work.flags |= IO_WQ_WORK_NEEDS_FILES;
+ io_put_req(req);
+ return -EAGAIN;
+ }
return 0;
+#else
+ return -EOPNOTSUPP;
#endif
}
-static int io_connect(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt, bool force_nonblock)
+static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
#if defined(CONFIG_NET)
- struct io_async_ctx __io, *io;
- unsigned file_flags;
- int addr_len, ret;
+ struct io_connect *conn = &req->connect;
+ struct io_async_ctx *io = req->io;
if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
return -EINVAL;
if (sqe->ioprio || sqe->len || sqe->buf_index || sqe->rw_flags)
return -EINVAL;
- addr_len = READ_ONCE(sqe->addr2);
- file_flags = force_nonblock ? O_NONBLOCK : 0;
+ conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
+ conn->addr_len = READ_ONCE(sqe->addr2);
+
+ if (!io)
+ return 0;
+
+ return move_addr_to_kernel(conn->addr, conn->addr_len,
+ &io->connect.address);
+#else
+ return -EOPNOTSUPP;
+#endif
+}
+
+static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
+ bool force_nonblock)
+{
+#if defined(CONFIG_NET)
+ struct io_async_ctx __io, *io;
+ unsigned file_flags;
+ int ret;
if (req->io) {
io = req->io;
} else {
- ret = io_connect_prep(req, &__io);
+ ret = move_addr_to_kernel(req->connect.addr,
+ req->connect.addr_len,
+ &__io.connect.address);
if (ret)
goto out;
io = &__io;
}
- ret = __sys_connect_file(req->file, &io->connect.address, addr_len,
- file_flags);
+ file_flags = force_nonblock ? O_NONBLOCK : 0;
+
+ ret = __sys_connect_file(req->file, &io->connect.address,
+ req->connect.addr_len, file_flags);
if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
- io = kmalloc(sizeof(*io), GFP_KERNEL);
- if (!io) {
+ if (req->io)
+ return -EAGAIN;
+ if (io_alloc_async_ctx(req)) {
ret = -ENOMEM;
goto out;
}
- memcpy(&io->connect, &__io.connect, sizeof(io->connect));
- req->io = io;
- memcpy(&io->sqe, req->sqe, sizeof(*req->sqe));
- req->sqe = &io->sqe;
+ memcpy(&req->io->connect, &__io.connect, sizeof(__io.connect));
return -EAGAIN;
}
if (ret == -ERESTARTSYS)
ret = -EINTR;
out:
- if (ret < 0 && (req->flags & REQ_F_LINK))
- req->flags |= REQ_F_FAIL_LINK;
+ if (ret < 0)
+ req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req_find_next(req, nxt);
return 0;
@@ -2279,8 +2485,8 @@ static void io_poll_remove_one(struct io_kiocb *req)
spin_lock(&poll->head->lock);
WRITE_ONCE(poll->canceled, true);
- if (!list_empty(&poll->wait->entry)) {
- list_del_init(&poll->wait->entry);
+ if (!list_empty(&poll->wait.entry)) {
+ list_del_init(&poll->wait.entry);
io_queue_async_work(req);
}
spin_unlock(&poll->head->lock);
@@ -2320,28 +2526,37 @@ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr)
return -ENOENT;
}
+static int io_poll_remove_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
+ sqe->poll_events)
+ return -EINVAL;
+
+ req->poll.addr = READ_ONCE(sqe->addr);
+ return 0;
+}
+
/*
* Find a running poll command that matches one specified in sqe->addr,
* and remove it if found.
*/
-static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_poll_remove(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
+ u64 addr;
int ret;
- if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->ioprio || sqe->off || sqe->len || sqe->buf_index ||
- sqe->poll_events)
- return -EINVAL;
-
+ addr = req->poll.addr;
spin_lock_irq(&ctx->completion_lock);
- ret = io_poll_cancel(ctx, READ_ONCE(sqe->addr));
+ ret = io_poll_cancel(ctx, addr);
spin_unlock_irq(&ctx->completion_lock);
io_cqring_add_event(req, ret);
- if (ret < 0 && (req->flags & REQ_F_LINK))
- req->flags |= REQ_F_FAIL_LINK;
+ if (ret < 0)
+ req_set_fail_links(req);
io_put_req(req);
return 0;
}
@@ -2351,7 +2566,6 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask, int error)
struct io_ring_ctx *ctx = req->ctx;
req->poll.done = true;
- kfree(req->poll.wait);
if (error)
io_cqring_fill_event(req, error);
else
@@ -2389,7 +2603,7 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
*/
spin_lock_irq(&ctx->completion_lock);
if (!mask && ret != -ECANCELED) {
- add_wait_queue(poll->head, poll->wait);
+ add_wait_queue(poll->head, &poll->wait);
spin_unlock_irq(&ctx->completion_lock);
return;
}
@@ -2399,8 +2613,8 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
io_cqring_ev_posted(ctx);
- if (ret < 0 && req->flags & REQ_F_LINK)
- req->flags |= REQ_F_FAIL_LINK;
+ if (ret < 0)
+ req_set_fail_links(req);
io_put_req_find_next(req, &nxt);
if (nxt)
*workptr = &nxt->work;
@@ -2419,7 +2633,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
if (mask && !(mask & poll->events))
return 0;
- list_del_init(&poll->wait->entry);
+ list_del_init(&poll->wait.entry);
/*
* Run completion inline if we can. We're using trylock here because
@@ -2460,7 +2674,7 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
pt->error = 0;
pt->req->poll.head = head;
- add_wait_queue(head, pt->req->poll.wait);
+ add_wait_queue(head, &pt->req->poll.wait);
}
static void io_poll_req_insert(struct io_kiocb *req)
@@ -2472,14 +2686,9 @@ static void io_poll_req_insert(struct io_kiocb *req)
hlist_add_head(&req->hash_node, list);
}
-static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt)
+static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_poll_iocb *poll = &req->poll;
- struct io_ring_ctx *ctx = req->ctx;
- struct io_poll_table ipt;
- bool cancel = false;
- __poll_t mask;
u16 events;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -2489,14 +2698,20 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
if (!poll->file)
return -EBADF;
- poll->wait = kmalloc(sizeof(*poll->wait), GFP_KERNEL);
- if (!poll->wait)
- return -ENOMEM;
-
- req->io = NULL;
- INIT_IO_WORK(&req->work, io_poll_complete_work);
events = READ_ONCE(sqe->poll_events);
poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP;
+ return 0;
+}
+
+static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
+{
+ struct io_poll_iocb *poll = &req->poll;
+ struct io_ring_ctx *ctx = req->ctx;
+ struct io_poll_table ipt;
+ bool cancel = false;
+ __poll_t mask;
+
+ INIT_IO_WORK(&req->work, io_poll_complete_work);
INIT_HLIST_NODE(&req->hash_node);
poll->head = NULL;
@@ -2509,9 +2724,9 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
ipt.error = -EINVAL; /* same as no support for IOCB_CMD_POLL */
/* initialized the list so that we can do list_empty checks */
- INIT_LIST_HEAD(&poll->wait->entry);
- init_waitqueue_func_entry(poll->wait, io_poll_wake);
- poll->wait->private = poll;
+ INIT_LIST_HEAD(&poll->wait.entry);
+ init_waitqueue_func_entry(&poll->wait, io_poll_wake);
+ poll->wait.private = poll;
INIT_LIST_HEAD(&req->list);
@@ -2520,14 +2735,14 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
spin_lock_irq(&ctx->completion_lock);
if (likely(poll->head)) {
spin_lock(&poll->head->lock);
- if (unlikely(list_empty(&poll->wait->entry))) {
+ if (unlikely(list_empty(&poll->wait.entry))) {
if (ipt.error)
cancel = true;
ipt.error = 0;
mask = 0;
}
if (mask || ipt.error)
- list_del_init(&poll->wait->entry);
+ list_del_init(&poll->wait.entry);
else if (cancel)
WRITE_ONCE(poll->canceled, true);
else if (!poll->done) /* actually waiting for an event */
@@ -2567,7 +2782,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
/*
* Adjust the reqs sequence before the current one because it
- * will consume a slot in the cq_ring and the the cq_tail
+ * will consume a slot in the cq_ring and the cq_tail
* pointer will be increased, otherwise other timeout reqs may
* return in advance without waiting for enough wait_nr.
*/
@@ -2582,8 +2797,7 @@ static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer)
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
- if (req->flags & REQ_F_LINK)
- req->flags |= REQ_F_FAIL_LINK;
+ req_set_fail_links(req);
io_put_req(req);
return HRTIMER_NORESTART;
}
@@ -2608,48 +2822,52 @@ static int io_timeout_cancel(struct io_ring_ctx *ctx, __u64 user_data)
if (ret == -1)
return -EALREADY;
- if (req->flags & REQ_F_LINK)
- req->flags |= REQ_F_FAIL_LINK;
+ req_set_fail_links(req);
io_cqring_fill_event(req, -ECANCELED);
io_put_req(req);
return 0;
}
+static int io_timeout_remove_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+ return -EINVAL;
+ if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len)
+ return -EINVAL;
+
+ req->timeout.addr = READ_ONCE(sqe->addr);
+ req->timeout.flags = READ_ONCE(sqe->timeout_flags);
+ if (req->timeout.flags)
+ return -EINVAL;
+
+ return 0;
+}
+
/*
* Remove or update an existing timeout command
*/
-static int io_timeout_remove(struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+static int io_timeout_remove(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
- unsigned flags;
int ret;
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
- return -EINVAL;
- if (sqe->flags || sqe->ioprio || sqe->buf_index || sqe->len)
- return -EINVAL;
- flags = READ_ONCE(sqe->timeout_flags);
- if (flags)
- return -EINVAL;
-
spin_lock_irq(&ctx->completion_lock);
- ret = io_timeout_cancel(ctx, READ_ONCE(sqe->addr));
+ ret = io_timeout_cancel(ctx, req->timeout.addr);
io_cqring_fill_event(req, ret);
io_commit_cqring(ctx);
spin_unlock_irq(&ctx->completion_lock);
io_cqring_ev_posted(ctx);
- if (ret < 0 && req->flags & REQ_F_LINK)
- req->flags |= REQ_F_FAIL_LINK;
+ if (ret < 0)
+ req_set_fail_links(req);
io_put_req(req);
return 0;
}
-static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
+static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
bool is_timeout_link)
{
- const struct io_uring_sqe *sqe = req->sqe;
struct io_timeout_data *data;
unsigned flags;
@@ -2663,7 +2881,12 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
if (flags & ~IORING_TIMEOUT_ABS)
return -EINVAL;
- data = &io->timeout;
+ req->timeout.count = READ_ONCE(sqe->off);
+
+ if (!req->io && io_alloc_async_ctx(req))
+ return -ENOMEM;
+
+ data = &req->io->timeout;
data->req = req;
req->flags |= REQ_F_TIMEOUT;
@@ -2676,32 +2899,17 @@ static int io_timeout_prep(struct io_kiocb *req, struct io_async_ctx *io,
data->mode = HRTIMER_MODE_REL;
hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
- req->io = io;
return 0;
}
-static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_timeout(struct io_kiocb *req)
{
unsigned count;
struct io_ring_ctx *ctx = req->ctx;
struct io_timeout_data *data;
- struct io_async_ctx *io;
struct list_head *entry;
unsigned span = 0;
- io = req->io;
- if (!io) {
- int ret;
-
- io = kmalloc(sizeof(*io), GFP_KERNEL);
- if (!io)
- return -ENOMEM;
- ret = io_timeout_prep(req, io, false);
- if (ret) {
- kfree(io);
- return ret;
- }
- }
data = &req->io->timeout;
/*
@@ -2709,7 +2917,7 @@ static int io_timeout(struct io_kiocb *req, const struct io_uring_sqe *sqe)
* timeout event to be satisfied. If it isn't set, then this is
* a pure timeout request, sequence isn't used.
*/
- count = READ_ONCE(sqe->off);
+ count = req->timeout.count;
if (!count) {
req->flags |= REQ_F_TIMEOUT_NOSEQ;
spin_lock_irq(&ctx->completion_lock);
@@ -2822,89 +3030,109 @@ done:
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
- if (ret < 0 && (req->flags & REQ_F_LINK))
- req->flags |= REQ_F_FAIL_LINK;
+ if (ret < 0)
+ req_set_fail_links(req);
io_put_req_find_next(req, nxt);
}
-static int io_async_cancel(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_kiocb **nxt)
+static int io_async_cancel_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
+ if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (sqe->flags || sqe->ioprio || sqe->off || sqe->len ||
sqe->cancel_flags)
return -EINVAL;
- io_async_find_and_cancel(ctx, req, READ_ONCE(sqe->addr), nxt, 0);
+ req->cancel.addr = READ_ONCE(sqe->addr);
return 0;
}
-static int io_req_defer_prep(struct io_kiocb *req, struct io_async_ctx *io)
+static int io_async_cancel(struct io_kiocb *req, struct io_kiocb **nxt)
{
- struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
- struct iov_iter iter;
- ssize_t ret;
+ struct io_ring_ctx *ctx = req->ctx;
+
+ io_async_find_and_cancel(ctx, req, req->cancel.addr, nxt, 0);
+ return 0;
+}
- memcpy(&io->sqe, req->sqe, sizeof(io->sqe));
- req->sqe = &io->sqe;
+static int io_req_defer_prep(struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ ssize_t ret = 0;
- switch (io->sqe.opcode) {
+ switch (req->opcode) {
+ case IORING_OP_NOP:
+ break;
case IORING_OP_READV:
case IORING_OP_READ_FIXED:
- ret = io_read_prep(req, &iovec, &iter, true);
+ ret = io_read_prep(req, sqe, true);
break;
case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
- ret = io_write_prep(req, &iovec, &iter, true);
+ ret = io_write_prep(req, sqe, true);
+ break;
+ case IORING_OP_POLL_ADD:
+ ret = io_poll_add_prep(req, sqe);
+ break;
+ case IORING_OP_POLL_REMOVE:
+ ret = io_poll_remove_prep(req, sqe);
+ break;
+ case IORING_OP_FSYNC:
+ ret = io_prep_fsync(req, sqe);
+ break;
+ case IORING_OP_SYNC_FILE_RANGE:
+ ret = io_prep_sfr(req, sqe);
break;
case IORING_OP_SENDMSG:
- ret = io_sendmsg_prep(req, io);
+ ret = io_sendmsg_prep(req, sqe);
break;
case IORING_OP_RECVMSG:
- ret = io_recvmsg_prep(req, io);
+ ret = io_recvmsg_prep(req, sqe);
break;
case IORING_OP_CONNECT:
- ret = io_connect_prep(req, io);
+ ret = io_connect_prep(req, sqe);
break;
case IORING_OP_TIMEOUT:
- return io_timeout_prep(req, io, false);
+ ret = io_timeout_prep(req, sqe, false);
+ break;
+ case IORING_OP_TIMEOUT_REMOVE:
+ ret = io_timeout_remove_prep(req, sqe);
+ break;
+ case IORING_OP_ASYNC_CANCEL:
+ ret = io_async_cancel_prep(req, sqe);
+ break;
case IORING_OP_LINK_TIMEOUT:
- return io_timeout_prep(req, io, true);
+ ret = io_timeout_prep(req, sqe, true);
+ break;
+ case IORING_OP_ACCEPT:
+ ret = io_accept_prep(req, sqe);
+ break;
default:
- req->io = io;
- return 0;
+ printk_once(KERN_WARNING "io_uring: unhandled opcode %d\n",
+ req->opcode);
+ ret = -EINVAL;
+ break;
}
- if (ret < 0)
- return ret;
-
- req->io = io;
- io_req_map_io(req, ret, iovec, inline_vecs, &iter);
- return 0;
+ return ret;
}
-static int io_req_defer(struct io_kiocb *req)
+static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
- struct io_async_ctx *io;
int ret;
/* Still need defer if there is pending req in defer list. */
if (!req_need_defer(req) && list_empty(&ctx->defer_list))
return 0;
- io = kmalloc(sizeof(*io), GFP_KERNEL);
- if (!io)
+ if (!req->io && io_alloc_async_ctx(req))
return -EAGAIN;
- ret = io_req_defer_prep(req, io);
- if (ret < 0) {
- kfree(io);
+ ret = io_req_defer_prep(req, sqe);
+ if (ret < 0)
return ret;
- }
spin_lock_irq(&ctx->completion_lock);
if (!req_need_defer(req) && list_empty(&ctx->defer_list)) {
@@ -2918,66 +3146,121 @@ static int io_req_defer(struct io_kiocb *req)
return -EIOCBQUEUED;
}
-__attribute__((nonnull))
-static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
- bool force_nonblock)
+static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ struct io_kiocb **nxt, bool force_nonblock)
{
- int ret, opcode;
struct io_ring_ctx *ctx = req->ctx;
+ int ret;
- opcode = READ_ONCE(req->sqe->opcode);
- switch (opcode) {
+ switch (req->opcode) {
case IORING_OP_NOP:
ret = io_nop(req);
break;
case IORING_OP_READV:
- if (unlikely(req->sqe->buf_index))
- return -EINVAL;
- ret = io_read(req, nxt, force_nonblock);
- break;
- case IORING_OP_WRITEV:
- if (unlikely(req->sqe->buf_index))
- return -EINVAL;
- ret = io_write(req, nxt, force_nonblock);
- break;
case IORING_OP_READ_FIXED:
+ if (sqe) {
+ ret = io_read_prep(req, sqe, force_nonblock);
+ if (ret < 0)
+ break;
+ }
ret = io_read(req, nxt, force_nonblock);
break;
+ case IORING_OP_WRITEV:
case IORING_OP_WRITE_FIXED:
+ if (sqe) {
+ ret = io_write_prep(req, sqe, force_nonblock);
+ if (ret < 0)
+ break;
+ }
ret = io_write(req, nxt, force_nonblock);
break;
case IORING_OP_FSYNC:
- ret = io_fsync(req, req->sqe, nxt, force_nonblock);
+ if (sqe) {
+ ret = io_prep_fsync(req, sqe);
+ if (ret < 0)
+ break;
+ }
+ ret = io_fsync(req, nxt, force_nonblock);
break;
case IORING_OP_POLL_ADD:
- ret = io_poll_add(req, req->sqe, nxt);
+ if (sqe) {
+ ret = io_poll_add_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_poll_add(req, nxt);
break;
case IORING_OP_POLL_REMOVE:
- ret = io_poll_remove(req, req->sqe);
+ if (sqe) {
+ ret = io_poll_remove_prep(req, sqe);
+ if (ret < 0)
+ break;
+ }
+ ret = io_poll_remove(req);
break;
case IORING_OP_SYNC_FILE_RANGE:
- ret = io_sync_file_range(req, req->sqe, nxt, force_nonblock);
+ if (sqe) {
+ ret = io_prep_sfr(req, sqe);
+ if (ret < 0)
+ break;
+ }
+ ret = io_sync_file_range(req, nxt, force_nonblock);
break;
case IORING_OP_SENDMSG:
- ret = io_sendmsg(req, req->sqe, nxt, force_nonblock);
+ if (sqe) {
+ ret = io_sendmsg_prep(req, sqe);
+ if (ret < 0)
+ break;
+ }
+ ret = io_sendmsg(req, nxt, force_nonblock);
break;
case IORING_OP_RECVMSG:
- ret = io_recvmsg(req, req->sqe, nxt, force_nonblock);
+ if (sqe) {
+ ret = io_recvmsg_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_recvmsg(req, nxt, force_nonblock);
break;
case IORING_OP_TIMEOUT:
- ret = io_timeout(req, req->sqe);
+ if (sqe) {
+ ret = io_timeout_prep(req, sqe, false);
+ if (ret)
+ break;
+ }
+ ret = io_timeout(req);
break;
case IORING_OP_TIMEOUT_REMOVE:
- ret = io_timeout_remove(req, req->sqe);
+ if (sqe) {
+ ret = io_timeout_remove_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_timeout_remove(req);
break;
case IORING_OP_ACCEPT:
- ret = io_accept(req, req->sqe, nxt, force_nonblock);
+ if (sqe) {
+ ret = io_accept_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_accept(req, nxt, force_nonblock);
break;
case IORING_OP_CONNECT:
- ret = io_connect(req, req->sqe, nxt, force_nonblock);
+ if (sqe) {
+ ret = io_connect_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_connect(req, nxt, force_nonblock);
break;
case IORING_OP_ASYNC_CANCEL:
- ret = io_async_cancel(req, req->sqe, nxt);
+ if (sqe) {
+ ret = io_async_cancel_prep(req, sqe);
+ if (ret)
+ break;
+ }
+ ret = io_async_cancel(req, nxt);
break;
default:
ret = -EINVAL;
@@ -2991,12 +3274,7 @@ static int io_issue_sqe(struct io_kiocb *req, struct io_kiocb **nxt,
if (req->result == -EAGAIN)
return -EAGAIN;
- /* workqueue context doesn't hold uring_lock, grab it now */
- if (req->in_async)
- mutex_lock(&ctx->uring_lock);
io_iopoll_req_issued(req);
- if (req->in_async)
- mutex_unlock(&ctx->uring_lock);
}
return 0;
@@ -3018,9 +3296,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
struct io_kiocb *nxt = NULL;
int ret = 0;
- /* Ensure we clear previously set non-block flag */
- req->rw.ki_flags &= ~IOCB_NOWAIT;
-
if (work->flags & IO_WQ_WORK_CANCEL)
ret = -ECANCELED;
@@ -3028,7 +3303,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
req->has_user = (work->flags & IO_WQ_WORK_HAS_MM) != 0;
req->in_async = true;
do {
- ret = io_issue_sqe(req, &nxt, false);
+ ret = io_issue_sqe(req, NULL, &nxt, false);
/*
* We can get EAGAIN for polled IO even though we're
* forcing a sync submission from here, since we can't
@@ -3044,8 +3319,7 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
io_put_req(req);
if (ret) {
- if (req->flags & REQ_F_LINK)
- req->flags |= REQ_F_FAIL_LINK;
+ req_set_fail_links(req);
io_cqring_add_event(req, ret);
io_put_req(req);
}
@@ -3064,20 +3338,25 @@ static void io_wq_submit_work(struct io_wq_work **workptr)
}
}
-static bool io_op_needs_file(const struct io_uring_sqe *sqe)
+static bool io_req_op_valid(int op)
{
- int op = READ_ONCE(sqe->opcode);
+ return op >= IORING_OP_NOP && op < IORING_OP_LAST;
+}
- switch (op) {
+static int io_req_needs_file(struct io_kiocb *req)
+{
+ switch (req->opcode) {
case IORING_OP_NOP:
case IORING_OP_POLL_REMOVE:
case IORING_OP_TIMEOUT:
case IORING_OP_TIMEOUT_REMOVE:
case IORING_OP_ASYNC_CANCEL:
case IORING_OP_LINK_TIMEOUT:
- return false;
+ return 0;
default:
- return true;
+ if (io_req_op_valid(req->opcode))
+ return 1;
+ return -EINVAL;
}
}
@@ -3090,20 +3369,22 @@ static inline struct file *io_file_from_index(struct io_ring_ctx *ctx,
return table->files[index & IORING_FILE_TABLE_MASK];
}
-static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req)
+static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
unsigned flags;
- int fd;
+ int fd, ret;
- flags = READ_ONCE(req->sqe->flags);
- fd = READ_ONCE(req->sqe->fd);
+ flags = READ_ONCE(sqe->flags);
+ fd = READ_ONCE(sqe->fd);
if (flags & IOSQE_IO_DRAIN)
req->flags |= REQ_F_IO_DRAIN;
- if (!io_op_needs_file(req->sqe))
- return 0;
+ ret = io_req_needs_file(req);
+ if (ret <= 0)
+ return ret;
if (flags & IOSQE_FIXED_FILE) {
if (unlikely(!ctx->file_table ||
@@ -3179,8 +3460,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
spin_unlock_irqrestore(&ctx->completion_lock, flags);
if (prev) {
- if (prev->flags & REQ_F_LINK)
- prev->flags |= REQ_F_FAIL_LINK;
+ req_set_fail_links(prev);
io_async_find_and_cancel(ctx, req, prev->user_data, NULL,
-ETIME);
io_put_req(prev);
@@ -3222,22 +3502,23 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
nxt = list_first_entry_or_null(&req->link_list, struct io_kiocb,
link_list);
- if (!nxt || nxt->sqe->opcode != IORING_OP_LINK_TIMEOUT)
+ if (!nxt || nxt->opcode != IORING_OP_LINK_TIMEOUT)
return NULL;
req->flags |= REQ_F_LINK_TIMEOUT;
return nxt;
}
-static void __io_queue_sqe(struct io_kiocb *req)
+static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
+ struct io_kiocb *linked_timeout;
struct io_kiocb *nxt = NULL;
int ret;
- ret = io_issue_sqe(req, &nxt, true);
- if (nxt)
- io_queue_async_work(nxt);
+again:
+ linked_timeout = io_prep_linked_timeout(req);
+
+ ret = io_issue_sqe(req, sqe, &nxt, true);
/*
* We async punt it if the file wasn't marked NOWAIT, or if the file
@@ -3256,7 +3537,7 @@ static void __io_queue_sqe(struct io_kiocb *req)
* submit reference when the iocb is actually submitted.
*/
io_queue_async_work(req);
- return;
+ goto done_req;
}
err:
@@ -3273,13 +3554,18 @@ err:
/* and drop final reference, if we failed */
if (ret) {
io_cqring_add_event(req, ret);
- if (req->flags & REQ_F_LINK)
- req->flags |= REQ_F_FAIL_LINK;
+ req_set_fail_links(req);
io_put_req(req);
}
+done_req:
+ if (nxt) {
+ req = nxt;
+ nxt = NULL;
+ goto again;
+ }
}
-static void io_queue_sqe(struct io_kiocb *req)
+static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
int ret;
@@ -3289,16 +3575,15 @@ static void io_queue_sqe(struct io_kiocb *req)
}
req->ctx->drain_next = (req->flags & REQ_F_DRAIN_LINK);
- ret = io_req_defer(req);
+ ret = io_req_defer(req, sqe);
if (ret) {
if (ret != -EIOCBQUEUED) {
io_cqring_add_event(req, ret);
- if (req->flags & REQ_F_LINK)
- req->flags |= REQ_F_FAIL_LINK;
+ req_set_fail_links(req);
io_double_put_req(req);
}
} else
- __io_queue_sqe(req);
+ __io_queue_sqe(req, sqe);
}
static inline void io_queue_link_head(struct io_kiocb *req)
@@ -3307,27 +3592,25 @@ static inline void io_queue_link_head(struct io_kiocb *req)
io_cqring_add_event(req, -ECANCELED);
io_double_put_req(req);
} else
- io_queue_sqe(req);
+ io_queue_sqe(req, NULL);
}
+#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
+ IOSQE_IO_HARDLINK)
-#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
-
-static bool io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
- struct io_kiocb **link)
+static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+ struct io_submit_state *state, struct io_kiocb **link)
{
struct io_ring_ctx *ctx = req->ctx;
int ret;
- req->user_data = req->sqe->user_data;
-
/* enforce forwards compatibility on users */
- if (unlikely(req->sqe->flags & ~SQE_VALID_FLAGS)) {
+ if (unlikely(sqe->flags & ~SQE_VALID_FLAGS)) {
ret = -EINVAL;
goto err_req;
}
- ret = io_req_set_file(state, req);
+ ret = io_req_set_file(state, req, sqe);
if (unlikely(ret)) {
err_req:
io_cqring_add_event(req, ret);
@@ -3344,32 +3627,38 @@ err_req:
*/
if (*link) {
struct io_kiocb *prev = *link;
- struct io_async_ctx *io;
- if (req->sqe->flags & IOSQE_IO_DRAIN)
+ if (sqe->flags & IOSQE_IO_DRAIN)
(*link)->flags |= REQ_F_DRAIN_LINK | REQ_F_IO_DRAIN;
- io = kmalloc(sizeof(*io), GFP_KERNEL);
- if (!io) {
+ if (sqe->flags & IOSQE_IO_HARDLINK)
+ req->flags |= REQ_F_HARDLINK;
+
+ if (io_alloc_async_ctx(req)) {
ret = -EAGAIN;
goto err_req;
}
- ret = io_req_defer_prep(req, io);
+ ret = io_req_defer_prep(req, sqe);
if (ret) {
- kfree(io);
+ /* fail even hard links since we don't submit */
prev->flags |= REQ_F_FAIL_LINK;
goto err_req;
}
trace_io_uring_link(ctx, req, prev);
list_add_tail(&req->link_list, &prev->link_list);
- } else if (req->sqe->flags & IOSQE_IO_LINK) {
+ } else if (sqe->flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) {
req->flags |= REQ_F_LINK;
+ if (sqe->flags & IOSQE_IO_HARDLINK)
+ req->flags |= REQ_F_HARDLINK;
INIT_LIST_HEAD(&req->link_list);
+ ret = io_req_defer_prep(req, sqe);
+ if (ret)
+ req->flags |= REQ_F_FAIL_LINK;
*link = req;
} else {
- io_queue_sqe(req);
+ io_queue_sqe(req, sqe);
}
return true;
@@ -3414,14 +3703,15 @@ static void io_commit_sqring(struct io_ring_ctx *ctx)
}
/*
- * Fetch an sqe, if one is available. Note that s->sqe will point to memory
+ * Fetch an sqe, if one is available. Note that sqe_ptr will point to memory
* that is mapped by userspace. This means that care needs to be taken to
* ensure that reads are stable, as we cannot rely on userspace always
* being a good citizen. If members of the sqe are validated and then later
* used, it's important that those reads are done through READ_ONCE() to
* prevent a re-load down the line.
*/
-static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req)
+static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ const struct io_uring_sqe **sqe_ptr)
{
struct io_rings *rings = ctx->rings;
u32 *sq_array = ctx->sq_array;
@@ -3448,7 +3738,9 @@ static bool io_get_sqring(struct io_ring_ctx *ctx, struct io_kiocb *req)
* link list.
*/
req->sequence = ctx->cached_sq_head;
- req->sqe = &ctx->sq_sqes[head];
+ *sqe_ptr = &ctx->sq_sqes[head];
+ req->opcode = READ_ONCE((*sqe_ptr)->opcode);
+ req->user_data = READ_ONCE((*sqe_ptr)->user_data);
ctx->cached_sq_head++;
return true;
}
@@ -3480,6 +3772,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
}
for (i = 0; i < nr; i++) {
+ const struct io_uring_sqe *sqe;
struct io_kiocb *req;
unsigned int sqe_flags;
@@ -3489,12 +3782,12 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
submitted = -EAGAIN;
break;
}
- if (!io_get_sqring(ctx, req)) {
+ if (!io_get_sqring(ctx, req, &sqe)) {
__io_free_req(req);
break;
}
- if (io_sqe_needs_user(req->sqe) && !*mm) {
+ if (io_req_needs_user(req) && !*mm) {
mm_fault = mm_fault || !mmget_not_zero(ctx->sqo_mm);
if (!mm_fault) {
use_mm(ctx->sqo_mm);
@@ -3503,22 +3796,21 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
}
submitted++;
- sqe_flags = req->sqe->flags;
+ sqe_flags = sqe->flags;
req->ring_file = ring_file;
req->ring_fd = ring_fd;
req->has_user = *mm != NULL;
req->in_async = async;
req->needs_fixed_file = async;
- trace_io_uring_submit_sqe(ctx, req->sqe->user_data,
- true, async);
- if (!io_submit_sqe(req, statep, &link))
+ trace_io_uring_submit_sqe(ctx, req->user_data, true, async);
+ if (!io_submit_sqe(req, sqe, statep, &link))
break;
/*
* If previous wasn't linked and we have a linked command,
* that's the end of the chain. Submit the previous link.
*/
- if (!(sqe_flags & IOSQE_IO_LINK) && link) {
+ if (!(sqe_flags & (IOSQE_IO_LINK|IOSQE_IO_HARDLINK)) && link) {
io_queue_link_head(link);
link = NULL;
}
@@ -3647,7 +3939,9 @@ static int io_sq_thread(void *data)
}
to_submit = min(to_submit, ctx->sq_entries);
+ mutex_lock(&ctx->uring_lock);
ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
+ mutex_unlock(&ctx->uring_lock);
if (ret > 0)
inflight += ret;
}
@@ -3676,7 +3970,7 @@ static inline bool io_should_wake(struct io_wait_queue *iowq, bool noflush)
struct io_ring_ctx *ctx = iowq->ctx;
/*
- * Wake up if we have enough events, or if a timeout occured since we
+ * Wake up if we have enough events, or if a timeout occurred since we
* started waiting. For timeouts, we always want to return to userspace,
* regardless of event count.
*/
@@ -4428,7 +4722,7 @@ static int io_copy_iov(struct io_ring_ctx *ctx, struct iovec *dst,
if (copy_from_user(&ciov, &ciovs[index], sizeof(ciov)))
return -EFAULT;
- dst->iov_base = (void __user *) (unsigned long) ciov.iov_base;
+ dst->iov_base = u64_to_user_ptr((u64)ciov.iov_base);
dst->iov_len = ciov.iov_len;
return 0;
}
@@ -4866,6 +5160,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
submitted = io_submit_sqes(ctx, to_submit, f.file, fd,
&cur_mm, false);
mutex_unlock(&ctx->uring_lock);
+
+ if (submitted != to_submit)
+ goto out;
}
if (flags & IORING_ENTER_GETEVENTS) {
unsigned nr_events = 0;
@@ -4879,6 +5176,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
}
}
+out:
percpu_ref_put(&ctx->refs);
out_fput:
fdput(f);
diff --git a/fs/locks.c b/fs/locks.c
index 6970f55daf54..44b6da032842 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2853,7 +2853,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
}
if (inode) {
/* userspace relies on this representation of dev_t */
- seq_printf(f, "%d %02x:%02x:%ld ", fl_pid,
+ seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
MAJOR(inode->i_sb->s_dev),
MINOR(inode->i_sb->s_dev), inode->i_ino);
} else {
diff --git a/fs/namespace.c b/fs/namespace.c
index 2fd0c8bcb8c1..5e1bf611a9eb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1728,7 +1728,7 @@ static bool is_mnt_ns_file(struct dentry *dentry)
dentry->d_fsdata == &mntns_operations;
}
-struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
+static struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
{
return container_of(ns, struct mnt_namespace, ns);
}
@@ -3325,8 +3325,8 @@ struct dentry *mount_subtree(struct vfsmount *m, const char *name)
}
EXPORT_SYMBOL(mount_subtree);
-int ksys_mount(const char __user *dev_name, const char __user *dir_name,
- const char __user *type, unsigned long flags, void __user *data)
+SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
+ char __user *, type, unsigned long, flags, void __user *, data)
{
int ret;
char *kernel_type;
@@ -3359,12 +3359,6 @@ out_type:
return ret;
}
-SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
- char __user *, type, unsigned long, flags, void __user *, data)
-{
- return ksys_mount(dev_name, dir_name, type, flags, data);
-}
-
/*
* Create a kernel mount representation for a new, prepared superblock
* (specified by fs_fd) and attach to an open_tree-like file descriptor.
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 3e77b728a22b..46f225580009 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -57,6 +57,9 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
* doing an __iget/iput with SB_ACTIVE clear would actually
* evict all inodes with zero i_count from icache which is
* unnecessarily violent and may in fact be illegal to do.
+ * However, we should have been called /after/ evict_inodes
+ * removed all zero refcount inodes, in any case. Test to
+ * be sure.
*/
if (!atomic_read(&inode->i_count)) {
spin_unlock(&inode->i_lock);
@@ -77,6 +80,7 @@ static void fsnotify_unmount_inodes(struct super_block *sb)
iput_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/nsfs.c b/fs/nsfs.c
index a0431642c6b5..f75767bd623a 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -3,6 +3,7 @@
#include <linux/pseudo_fs.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
#include <linux/ktime.h>
@@ -11,6 +12,8 @@
#include <linux/nsfs.h>
#include <linux/uaccess.h>
+#include "internal.h"
+
static struct vfsmount *nsfs_mnt;
static long ns_ioctl(struct file *filp, unsigned int ioctl,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 1c4c51f3df60..cda1027d0819 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3282,6 +3282,7 @@ static void ocfs2_dlm_init_debug(struct ocfs2_super *osb)
debugfs_create_u32("locking_filter", 0600, osb->osb_debug_root,
&dlm_debug->d_filter_secs);
+ ocfs2_get_dlm_debug(dlm_debug);
}
static void ocfs2_dlm_shutdown_debug(struct ocfs2_super *osb)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 1afe57f425a0..68ba354cf361 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1066,6 +1066,14 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num);
+ if (replayed) {
+ jbd2_journal_lock_updates(journal->j_journal);
+ status = jbd2_journal_flush(journal->j_journal);
+ jbd2_journal_unlock_updates(journal->j_journal);
+ if (status < 0)
+ mlog_errno(status);
+ }
+
status = ocfs2_journal_toggle_dirty(osb, 1, replayed);
if (status < 0) {
mlog_errno(status);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index b801c6353100..6220642fe113 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -227,13 +227,17 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
{
struct ovl_fh *fh;
- int fh_type, fh_len, dwords;
- void *buf;
+ int fh_type, dwords;
int buflen = MAX_HANDLE_SZ;
uuid_t *uuid = &real->d_sb->s_uuid;
+ int err;
- buf = kmalloc(buflen, GFP_KERNEL);
- if (!buf)
+ /* Make sure the real fid stays 32bit aligned */
+ BUILD_BUG_ON(OVL_FH_FID_OFFSET % 4);
+ BUILD_BUG_ON(MAX_HANDLE_SZ + OVL_FH_FID_OFFSET > 255);
+
+ fh = kzalloc(buflen + OVL_FH_FID_OFFSET, GFP_KERNEL);
+ if (!fh)
return ERR_PTR(-ENOMEM);
/*
@@ -242,27 +246,19 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
* the price or reconnecting the dentry.
*/
dwords = buflen >> 2;
- fh_type = exportfs_encode_fh(real, buf, &dwords, 0);
+ fh_type = exportfs_encode_fh(real, (void *)fh->fb.fid, &dwords, 0);
buflen = (dwords << 2);
- fh = ERR_PTR(-EIO);
+ err = -EIO;
if (WARN_ON(fh_type < 0) ||
WARN_ON(buflen > MAX_HANDLE_SZ) ||
WARN_ON(fh_type == FILEID_INVALID))
- goto out;
+ goto out_err;
- BUILD_BUG_ON(MAX_HANDLE_SZ + offsetof(struct ovl_fh, fid) > 255);
- fh_len = offsetof(struct ovl_fh, fid) + buflen;
- fh = kmalloc(fh_len, GFP_KERNEL);
- if (!fh) {
- fh = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- fh->version = OVL_FH_VERSION;
- fh->magic = OVL_FH_MAGIC;
- fh->type = fh_type;
- fh->flags = OVL_FH_FLAG_CPU_ENDIAN;
+ fh->fb.version = OVL_FH_VERSION;
+ fh->fb.magic = OVL_FH_MAGIC;
+ fh->fb.type = fh_type;
+ fh->fb.flags = OVL_FH_FLAG_CPU_ENDIAN;
/*
* When we will want to decode an overlay dentry from this handle
* and all layers are on the same fs, if we get a disconncted real
@@ -270,14 +266,15 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
* it to upperdentry or to lowerstack is by checking this flag.
*/
if (is_upper)
- fh->flags |= OVL_FH_FLAG_PATH_UPPER;
- fh->len = fh_len;
- fh->uuid = *uuid;
- memcpy(fh->fid, buf, buflen);
+ fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
+ fh->fb.len = sizeof(fh->fb) + buflen;
+ fh->fb.uuid = *uuid;
-out:
- kfree(buf);
return fh;
+
+out_err:
+ kfree(fh);
+ return ERR_PTR(err);
}
int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
@@ -300,8 +297,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
/*
* Do not fail when upper doesn't support xattrs.
*/
- err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh,
- fh ? fh->len : 0, 0);
+ err = ovl_check_setxattr(dentry, upper, OVL_XATTR_ORIGIN, fh->buf,
+ fh ? fh->fb.len : 0, 0);
kfree(fh);
return err;
@@ -317,7 +314,7 @@ static int ovl_set_upper_fh(struct dentry *upper, struct dentry *index)
if (IS_ERR(fh))
return PTR_ERR(fh);
- err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh, fh->len, 0);
+ err = ovl_do_setxattr(index, OVL_XATTR_UPPER, fh->buf, fh->fb.len, 0);
kfree(fh);
return err;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 702aa63f6774..29abdb1d3b5c 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -1170,7 +1170,7 @@ static int ovl_rename(struct inode *olddir, struct dentry *old,
if (newdentry == trap)
goto out_dput;
- if (WARN_ON(olddentry->d_inode == newdentry->d_inode))
+ if (olddentry->d_inode == newdentry->d_inode)
goto out_dput;
err = 0;
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 73c9775215b3..70e55588aedc 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -211,10 +211,11 @@ static int ovl_check_encode_origin(struct dentry *dentry)
return 1;
}
-static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
+static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
{
struct ovl_fh *fh = NULL;
int err, enc_lower;
+ int len;
/*
* Check if we should encode a lower or upper file handle and maybe
@@ -231,11 +232,12 @@ static int ovl_d_to_fh(struct dentry *dentry, char *buf, int buflen)
return PTR_ERR(fh);
err = -EOVERFLOW;
- if (fh->len > buflen)
+ len = OVL_FH_LEN(fh);
+ if (len > buflen)
goto fail;
- memcpy(buf, (char *)fh, fh->len);
- err = fh->len;
+ memcpy(fid, fh, len);
+ err = len;
out:
kfree(fh);
@@ -243,31 +245,16 @@ out:
fail:
pr_warn_ratelimited("overlayfs: failed to encode file handle (%pd2, err=%i, buflen=%d, len=%d, type=%d)\n",
- dentry, err, buflen, fh ? (int)fh->len : 0,
- fh ? fh->type : 0);
+ dentry, err, buflen, fh ? (int)fh->fb.len : 0,
+ fh ? fh->fb.type : 0);
goto out;
}
-static int ovl_dentry_to_fh(struct dentry *dentry, u32 *fid, int *max_len)
-{
- int res, len = *max_len << 2;
-
- res = ovl_d_to_fh(dentry, (char *)fid, len);
- if (res <= 0)
- return FILEID_INVALID;
-
- len = res;
-
- /* Round up to dwords */
- *max_len = (len + 3) >> 2;
- return OVL_FILEID;
-}
-
static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
struct inode *parent)
{
struct dentry *dentry;
- int type;
+ int bytes = *max_len << 2;
/* TODO: encode connectable file handles */
if (parent)
@@ -277,10 +264,14 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
if (WARN_ON(!dentry))
return FILEID_INVALID;
- type = ovl_dentry_to_fh(dentry, fid, max_len);
-
+ bytes = ovl_dentry_to_fid(dentry, fid, bytes);
dput(dentry);
- return type;
+ if (bytes <= 0)
+ return FILEID_INVALID;
+
+ *max_len = bytes >> 2;
+
+ return OVL_FILEID_V1;
}
/*
@@ -777,24 +768,45 @@ out_err:
goto out;
}
+static struct ovl_fh *ovl_fid_to_fh(struct fid *fid, int buflen, int fh_type)
+{
+ struct ovl_fh *fh;
+
+ /* If on-wire inner fid is aligned - nothing to do */
+ if (fh_type == OVL_FILEID_V1)
+ return (struct ovl_fh *)fid;
+
+ if (fh_type != OVL_FILEID_V0)
+ return ERR_PTR(-EINVAL);
+
+ fh = kzalloc(buflen, GFP_KERNEL);
+ if (!fh)
+ return ERR_PTR(-ENOMEM);
+
+ /* Copy unaligned inner fh into aligned buffer */
+ memcpy(&fh->fb, fid, buflen - OVL_FH_WIRE_OFFSET);
+ return fh;
+}
+
static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct dentry *dentry = NULL;
- struct ovl_fh *fh = (struct ovl_fh *) fid;
+ struct ovl_fh *fh = NULL;
int len = fh_len << 2;
unsigned int flags = 0;
int err;
- err = -EINVAL;
- if (fh_type != OVL_FILEID)
+ fh = ovl_fid_to_fh(fid, len, fh_type);
+ err = PTR_ERR(fh);
+ if (IS_ERR(fh))
goto out_err;
err = ovl_check_fh_len(fh, len);
if (err)
goto out_err;
- flags = fh->flags;
+ flags = fh->fb.flags;
dentry = (flags & OVL_FH_FLAG_PATH_UPPER) ?
ovl_upper_fh_to_d(sb, fh) :
ovl_lower_fh_to_d(sb, fh);
@@ -802,12 +814,18 @@ static struct dentry *ovl_fh_to_dentry(struct super_block *sb, struct fid *fid,
if (IS_ERR(dentry) && err != -ESTALE)
goto out_err;
+out:
+ /* We may have needed to re-align OVL_FILEID_V0 */
+ if (!IS_ERR_OR_NULL(fh) && fh != (void *)fid)
+ kfree(fh);
+
return dentry;
out_err:
pr_warn_ratelimited("overlayfs: failed to decode file handle (len=%d, type=%d, flags=%x, err=%i)\n",
- len, fh_type, flags, err);
- return ERR_PTR(err);
+ fh_len, fh_type, flags, err);
+ dentry = ERR_PTR(err);
+ goto out;
}
static struct dentry *ovl_fh_to_parent(struct super_block *sb, struct fid *fid,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index bc14781886bf..b045cf1826fc 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -200,8 +200,14 @@ int ovl_getattr(const struct path *path, struct kstat *stat,
if (ovl_test_flag(OVL_INDEX, d_inode(dentry)) ||
(!ovl_verify_lower(dentry->d_sb) &&
(is_dir || lowerstat.nlink == 1))) {
- stat->ino = lowerstat.ino;
lower_layer = ovl_layer_lower(dentry);
+ /*
+ * Cannot use origin st_dev;st_ino because
+ * origin inode content may differ from overlay
+ * inode content.
+ */
+ if (samefs || lower_layer->fsid)
+ stat->ino = lowerstat.ino;
}
/*
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index c269d6033525..76ff66339173 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -84,21 +84,21 @@ static int ovl_acceptable(void *ctx, struct dentry *dentry)
* Return -ENODATA for "origin unknown".
* Return <0 for an invalid file handle.
*/
-int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len)
{
- if (fh_len < sizeof(struct ovl_fh) || fh_len < fh->len)
+ if (fb_len < sizeof(struct ovl_fb) || fb_len < fb->len)
return -EINVAL;
- if (fh->magic != OVL_FH_MAGIC)
+ if (fb->magic != OVL_FH_MAGIC)
return -EINVAL;
/* Treat larger version and unknown flags as "origin unknown" */
- if (fh->version > OVL_FH_VERSION || fh->flags & ~OVL_FH_FLAG_ALL)
+ if (fb->version > OVL_FH_VERSION || fb->flags & ~OVL_FH_FLAG_ALL)
return -ENODATA;
/* Treat endianness mismatch as "origin unknown" */
- if (!(fh->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
- (fh->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
+ if (!(fb->flags & OVL_FH_FLAG_ANY_ENDIAN) &&
+ (fb->flags & OVL_FH_FLAG_BIG_ENDIAN) != OVL_FH_FLAG_CPU_ENDIAN)
return -ENODATA;
return 0;
@@ -119,15 +119,15 @@ static struct ovl_fh *ovl_get_fh(struct dentry *dentry, const char *name)
if (res == 0)
return NULL;
- fh = kzalloc(res, GFP_KERNEL);
+ fh = kzalloc(res + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
return ERR_PTR(-ENOMEM);
- res = vfs_getxattr(dentry, name, fh, res);
+ res = vfs_getxattr(dentry, name, fh->buf, res);
if (res < 0)
goto fail;
- err = ovl_check_fh_len(fh, res);
+ err = ovl_check_fb_len(&fh->fb, res);
if (err < 0) {
if (err == -ENODATA)
goto out;
@@ -158,12 +158,12 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
* Make sure that the stored uuid matches the uuid of the lower
* layer where file handle will be decoded.
*/
- if (!uuid_equal(&fh->uuid, &mnt->mnt_sb->s_uuid))
+ if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
return NULL;
- bytes = (fh->len - offsetof(struct ovl_fh, fid));
- real = exportfs_decode_fh(mnt, (struct fid *)fh->fid,
- bytes >> 2, (int)fh->type,
+ bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
+ real = exportfs_decode_fh(mnt, (struct fid *)fh->fb.fid,
+ bytes >> 2, (int)fh->fb.type,
connected ? ovl_acceptable : NULL, mnt);
if (IS_ERR(real)) {
/*
@@ -173,7 +173,7 @@ struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
* index entries correctly.
*/
if (real == ERR_PTR(-ESTALE) &&
- !(fh->flags & OVL_FH_FLAG_PATH_UPPER))
+ !(fh->fb.flags & OVL_FH_FLAG_PATH_UPPER))
real = NULL;
return real;
}
@@ -323,6 +323,14 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
int i;
for (i = 0; i < ofs->numlower; i++) {
+ /*
+ * If lower fs uuid is not unique among lower fs we cannot match
+ * fh->uuid to layer.
+ */
+ if (ofs->lower_layers[i].fsid &&
+ ofs->lower_layers[i].fs->bad_uuid)
+ continue;
+
origin = ovl_decode_real_fh(fh, ofs->lower_layers[i].mnt,
connected);
if (origin)
@@ -400,7 +408,7 @@ static int ovl_verify_fh(struct dentry *dentry, const char *name,
if (IS_ERR(ofh))
return PTR_ERR(ofh);
- if (fh->len != ofh->len || memcmp(fh, ofh, fh->len))
+ if (fh->fb.len != ofh->fb.len || memcmp(&fh->fb, &ofh->fb, fh->fb.len))
err = -ESTALE;
kfree(ofh);
@@ -431,7 +439,7 @@ int ovl_verify_set_fh(struct dentry *dentry, const char *name,
err = ovl_verify_fh(dentry, name, fh);
if (set && err == -ENODATA)
- err = ovl_do_setxattr(dentry, name, fh, fh->len, 0);
+ err = ovl_do_setxattr(dentry, name, fh->buf, fh->fb.len, 0);
if (err)
goto fail;
@@ -505,20 +513,20 @@ int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index)
goto fail;
err = -EINVAL;
- if (index->d_name.len < sizeof(struct ovl_fh)*2)
+ if (index->d_name.len < sizeof(struct ovl_fb)*2)
goto fail;
err = -ENOMEM;
len = index->d_name.len / 2;
- fh = kzalloc(len, GFP_KERNEL);
+ fh = kzalloc(len + OVL_FH_WIRE_OFFSET, GFP_KERNEL);
if (!fh)
goto fail;
err = -EINVAL;
- if (hex2bin((u8 *)fh, index->d_name.name, len))
+ if (hex2bin(fh->buf, index->d_name.name, len))
goto fail;
- err = ovl_check_fh_len(fh, len);
+ err = ovl_check_fb_len(&fh->fb, len);
if (err)
goto fail;
@@ -597,11 +605,11 @@ static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
{
char *n, *s;
- n = kcalloc(fh->len, 2, GFP_KERNEL);
+ n = kcalloc(fh->fb.len, 2, GFP_KERNEL);
if (!n)
return -ENOMEM;
- s = bin2hex(n, fh, fh->len);
+ s = bin2hex(n, fh->buf, fh->fb.len);
*name = (struct qstr) QSTR_INIT(n, s - n);
return 0;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 6934bcf030f0..f283b1d69a9e 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -71,20 +71,36 @@ enum ovl_entry_flag {
#error Endianness not defined
#endif
-/* The type returned by overlay exportfs ops when encoding an ovl_fh handle */
-#define OVL_FILEID 0xfb
+/* The type used to be returned by overlay exportfs for misaligned fid */
+#define OVL_FILEID_V0 0xfb
+/* The type returned by overlay exportfs for 32bit aligned fid */
+#define OVL_FILEID_V1 0xf8
-/* On-disk and in-memeory format for redirect by file handle */
-struct ovl_fh {
+/* On-disk format for "origin" file handle */
+struct ovl_fb {
u8 version; /* 0 */
u8 magic; /* 0xfb */
u8 len; /* size of this header + size of fid */
u8 flags; /* OVL_FH_FLAG_* */
u8 type; /* fid_type of fid */
uuid_t uuid; /* uuid of filesystem */
- u8 fid[0]; /* file identifier */
+ u32 fid[0]; /* file identifier should be 32bit aligned in-memory */
} __packed;
+/* In-memory and on-wire format for overlay file handle */
+struct ovl_fh {
+ u8 padding[3]; /* make sure fb.fid is 32bit aligned */
+ union {
+ struct ovl_fb fb;
+ u8 buf[0];
+ };
+} __packed;
+
+#define OVL_FH_WIRE_OFFSET offsetof(struct ovl_fh, fb)
+#define OVL_FH_LEN(fh) (OVL_FH_WIRE_OFFSET + (fh)->fb.len)
+#define OVL_FH_FID_OFFSET (OVL_FH_WIRE_OFFSET + \
+ offsetof(struct ovl_fb, fid))
+
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
{
int err = vfs_rmdir(dir, dentry);
@@ -302,7 +318,13 @@ static inline void ovl_inode_unlock(struct inode *inode)
/* namei.c */
-int ovl_check_fh_len(struct ovl_fh *fh, int fh_len);
+int ovl_check_fb_len(struct ovl_fb *fb, int fb_len);
+
+static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
+{
+ return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
+}
+
struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
bool connected);
int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index a8279280e88d..28348c44ea5b 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -22,6 +22,8 @@ struct ovl_config {
struct ovl_sb {
struct super_block *sb;
dev_t pseudo_dev;
+ /* Unusable (conflicting) uuid */
+ bool bad_uuid;
};
struct ovl_layer {
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index afbcb116a7f1..7621ff176d15 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1255,7 +1255,7 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
{
unsigned int i;
- if (!ofs->config.nfs_export && !(ofs->config.index && ofs->upper_mnt))
+ if (!ofs->config.nfs_export && !ofs->upper_mnt)
return true;
for (i = 0; i < ofs->numlowerfs; i++) {
@@ -1263,9 +1263,13 @@ static bool ovl_lower_uuid_ok(struct ovl_fs *ofs, const uuid_t *uuid)
* We use uuid to associate an overlay lower file handle with a
* lower layer, so we can accept lower fs with null uuid as long
* as all lower layers with null uuid are on the same fs.
+ * if we detect multiple lower fs with the same uuid, we
+ * disable lower file handle decoding on all of them.
*/
- if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid))
+ if (uuid_equal(&ofs->lower_fs[i].sb->s_uuid, uuid)) {
+ ofs->lower_fs[i].bad_uuid = true;
return false;
+ }
}
return true;
}
@@ -1277,6 +1281,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
unsigned int i;
dev_t dev;
int err;
+ bool bad_uuid = false;
/* fsid 0 is reserved for upper fs even with non upper overlay */
if (ofs->upper_mnt && ofs->upper_mnt->mnt_sb == sb)
@@ -1288,11 +1293,15 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
}
if (!ovl_lower_uuid_ok(ofs, &sb->s_uuid)) {
- ofs->config.index = false;
- ofs->config.nfs_export = false;
- pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
- uuid_is_null(&sb->s_uuid) ? "null" : "conflicting",
- path->dentry);
+ bad_uuid = true;
+ if (ofs->config.index || ofs->config.nfs_export) {
+ ofs->config.index = false;
+ ofs->config.nfs_export = false;
+ pr_warn("overlayfs: %s uuid detected in lower fs '%pd2', falling back to index=off,nfs_export=off.\n",
+ uuid_is_null(&sb->s_uuid) ? "null" :
+ "conflicting",
+ path->dentry);
+ }
}
err = get_anon_bdev(&dev);
@@ -1303,6 +1312,7 @@ static int ovl_get_fsid(struct ovl_fs *ofs, const struct path *path)
ofs->lower_fs[ofs->numlowerfs].sb = sb;
ofs->lower_fs[ofs->numlowerfs].pseudo_dev = dev;
+ ofs->lower_fs[ofs->numlowerfs].bad_uuid = bad_uuid;
ofs->numlowerfs++;
return ofs->numlowerfs;
diff --git a/fs/pipe.c b/fs/pipe.c
index 87109e761fa5..57502c3c0fba 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -364,17 +364,39 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
ret = -EAGAIN;
break;
}
- if (signal_pending(current)) {
- if (!ret)
- ret = -ERESTARTSYS;
- break;
- }
__pipe_unlock(pipe);
- if (was_full) {
+
+ /*
+ * We only get here if we didn't actually read anything.
+ *
+ * However, we could have seen (and removed) a zero-sized
+ * pipe buffer, and might have made space in the buffers
+ * that way.
+ *
+ * You can't make zero-sized pipe buffers by doing an empty
+ * write (not even in packet mode), but they can happen if
+ * the writer gets an EFAULT when trying to fill a buffer
+ * that already got allocated and inserted in the buffer
+ * array.
+ *
+ * So we still need to wake up any pending writers in the
+ * _very_ unlikely case that the pipe was full, but we got
+ * no data.
+ */
+ if (unlikely(was_full)) {
wake_up_interruptible_sync_poll(&pipe->wait, EPOLLOUT | EPOLLWRNORM);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- wait_event_interruptible(pipe->wait, pipe_readable(pipe));
+
+ /*
+ * But because we didn't read anything, at this point we can
+ * just return directly with -ERESTARTSYS if we're interrupted,
+ * since we've done any required wakeups and there's no need
+ * to mark anything accessed. And we've dropped the lock.
+ */
+ if (wait_event_interruptible(pipe->wait, pipe_readable(pipe)) < 0)
+ return -ERESTARTSYS;
+
__pipe_lock(pipe);
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
}
@@ -559,7 +581,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
}
wait_event_interruptible(pipe->wait, pipe_writable(pipe));
__pipe_lock(pipe);
- was_empty = pipe_empty(head, pipe->tail);
+ was_empty = pipe_empty(pipe->head, pipe->tail);
}
out:
__pipe_unlock(pipe);
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 84ad1c90d535..249672bf54fe 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -631,12 +631,15 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
/**
* posix_acl_update_mode - update mode in set_acl
+ * @inode: target inode
+ * @mode_p: mode (pointer) for update
+ * @acl: acl pointer
*
* Update the file mode when setting an ACL: compute the new file permission
* bits based on the ACL. In addition, if the ACL is equivalent to the new
- * file mode, set *acl to NULL to indicate that no ACL should be set.
+ * file mode, set *@acl to NULL to indicate that no ACL should be set.
*
- * As with chmod, clear the setgit bit if the caller is not in the owning group
+ * As with chmod, clear the setgid bit if the caller is not in the owning group
* or capable of CAP_FSETID (see inode_change_ok).
*
* Called from set_acl inode operations.
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 37bdbec5b402..fd931d3e77be 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -134,7 +134,7 @@ static int show_stat(struct seq_file *p, void *v)
softirq += cpustat[CPUTIME_SOFTIRQ];
steal += cpustat[CPUTIME_STEAL];
guest += cpustat[CPUTIME_GUEST];
- guest_nice += cpustat[CPUTIME_USER];
+ guest_nice += cpustat[CPUTIME_GUEST_NICE];
sum += kstat_cpu_irqs_sum(i);
sum += arch_irq_stat_cpu(i);
@@ -175,7 +175,7 @@ static int show_stat(struct seq_file *p, void *v)
softirq = cpustat[CPUTIME_SOFTIRQ];
steal = cpustat[CPUTIME_STEAL];
guest = cpustat[CPUTIME_GUEST];
- guest_nice = cpustat[CPUTIME_USER];
+ guest_nice = cpustat[CPUTIME_GUEST_NICE];
seq_printf(p, "cpu%d", i);
seq_put_decimal_ull(p, " ", nsec_to_clock_t(user));
seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice));
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 8caff834f002..487ee39b438a 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -407,6 +407,17 @@ static int notrace ramoops_pstore_write(struct pstore_record *record)
prz = cxt->dprzs[cxt->dump_write_cnt];
+ /*
+ * Since this is a new crash dump, we need to reset the buffer in
+ * case it still has an old dump present. Without this, the new dump
+ * will get appended, which would seriously confuse anything trying
+ * to check dump file contents. Specifically, ramoops_read_kmsg_hdr()
+ * expects to find a dump header in the beginning of buffer data, so
+ * we must to reset the buffer values, in order to ensure that the
+ * header will be written to the beginning of the buffer.
+ */
+ persistent_ram_zap(prz);
+
/* Build header and append record contents. */
hlen = ramoops_write_kmsg_hdr(prz, record);
if (!hlen)
@@ -577,6 +588,7 @@ static int ramoops_init_przs(const char *name,
dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
name, record_size,
(unsigned long long)*paddr, err);
+ kfree(label);
while (i > 0) {
i--;
@@ -622,6 +634,7 @@ static int ramoops_init_prz(const char *name,
dev_err(dev, "failed to request %s mem region (0x%zx@0x%llx): %d\n",
name, sz, (unsigned long long)*paddr, err);
+ kfree(label);
return err;
}
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b0688c02dc90..b6a4f692d345 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -984,6 +984,7 @@ static int add_dquot_ref(struct super_block *sb, int type)
* later.
*/
old_inode = inode;
+ cond_resched();
spin_lock(&sb->s_inode_list_lock);
}
spin_unlock(&sb->s_inode_list_lock);
diff --git a/fs/super.c b/fs/super.c
index cfadab2cbf35..cd352530eca9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -448,10 +448,12 @@ void generic_shutdown_super(struct super_block *sb)
sync_filesystem(sb);
sb->s_flags &= ~SB_ACTIVE;
- fsnotify_sb_delete(sb);
cgroup_writeback_umount();
+ /* evict all inodes with zero refcount */
evict_inodes(sb);
+ /* only nonzero refcount inodes can have marks */
+ fsnotify_sb_delete(sb);
if (sb->s_dio_done_wq) {
destroy_workqueue(sb->s_dio_done_wq);
diff --git a/fs/verity/enable.c b/fs/verity/enable.c
index eabc6ac19906..b79e3fd19d11 100644
--- a/fs/verity/enable.c
+++ b/fs/verity/enable.c
@@ -315,7 +315,7 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
if (arg.block_size != PAGE_SIZE)
return -EINVAL;
- if (arg.salt_size > FIELD_SIZEOF(struct fsverity_descriptor, salt))
+ if (arg.salt_size > sizeof_field(struct fsverity_descriptor, salt))
return -EMSGSIZE;
if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE)
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index c284e10af491..fc93fd88ec89 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -2248,24 +2248,32 @@ xfs_alloc_longest_free_extent(
return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
}
+/*
+ * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
+ * return the largest possible minimum length.
+ */
unsigned int
xfs_alloc_min_freelist(
struct xfs_mount *mp,
struct xfs_perag *pag)
{
+ /* AG btrees have at least 1 level. */
+ static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
+ const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
unsigned int min_free;
+ ASSERT(mp->m_ag_maxlevels > 0);
+
/* space needed by-bno freespace btree */
- min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
+ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_ag_maxlevels);
/* space needed by-size freespace btree */
- min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
+ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels);
/* space needed reverse mapping used space btree */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
- min_free += min_t(unsigned int,
- pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
- mp->m_rmap_maxlevels);
+ min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
+ mp->m_rmap_maxlevels);
return min_free;
}
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index a9ad1f991ba3..4c2e046fbfad 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4561,7 +4561,7 @@ xfs_bmapi_convert_delalloc(
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL };
- u16 flags = 0;
+ uint16_t flags = 0;
struct xfs_trans *tp;
int error;
@@ -5972,8 +5972,7 @@ xfs_bmap_insert_extents(
goto del_cursor;
}
- if (XFS_IS_CORRUPT(mp,
- stop_fsb >= got.br_startoff + got.br_blockcount)) {
+ if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
error = -EFSCORRUPTED;
goto del_cursor;
}
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 0aa87cbde49e..dd6fcaaea318 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -724,3 +724,24 @@ xfs_dir2_namecheck(
/* There shouldn't be any slashes or nulls here */
return !memchr(name, '/', length) && !memchr(name, 0, length);
}
+
+xfs_dahash_t
+xfs_dir2_hashname(
+ struct xfs_mount *mp,
+ struct xfs_name *name)
+{
+ if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
+ return xfs_ascii_ci_hashname(name);
+ return xfs_da_hashname(name->name, name->len);
+}
+
+enum xfs_dacmp
+xfs_dir2_compname(
+ struct xfs_da_args *args,
+ const unsigned char *name,
+ int len)
+{
+ if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
+ return xfs_ascii_ci_compname(args, name, len);
+ return xfs_da_compname(args, name, len);
+}
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index c031c53d0f0d..01ee0b926572 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -175,6 +175,12 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
+int xfs_dir2_sf_entsize(struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr, int len);
+void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino);
+void xfs_dir2_sf_put_ftype(struct xfs_mount *mp,
+ struct xfs_dir2_sf_entry *sfep, uint8_t ftype);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
@@ -194,25 +200,8 @@ xfs_dir2_data_entsize(
return round_up(len, XFS_DIR2_DATA_ALIGN);
}
-static inline xfs_dahash_t
-xfs_dir2_hashname(
- struct xfs_mount *mp,
- struct xfs_name *name)
-{
- if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
- return xfs_ascii_ci_hashname(name);
- return xfs_da_hashname(name->name, name->len);
-}
-
-static inline enum xfs_dacmp
-xfs_dir2_compname(
- struct xfs_da_args *args,
- const unsigned char *name,
- int len)
-{
- if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
- return xfs_ascii_ci_compname(args, name, len);
- return xfs_da_compname(args, name, len);
-}
+xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name);
+enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args,
+ const unsigned char *name, int len);
#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 8b94d33d232f..7b7f6fb2ea3b 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -37,7 +37,7 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
-static int
+int
xfs_dir2_sf_entsize(
struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr,
@@ -84,7 +84,7 @@ xfs_dir2_sf_get_ino(
return get_unaligned_be64(from) & XFS_MAXINUMBER;
}
-static void
+void
xfs_dir2_sf_put_ino(
struct xfs_mount *mp,
struct xfs_dir2_sf_hdr *hdr,
@@ -145,7 +145,7 @@ xfs_dir2_sf_get_ftype(
return XFS_DIR3_FT_UNKNOWN;
}
-static void
+void
xfs_dir2_sf_put_ftype(
struct xfs_mount *mp,
struct xfs_dir2_sf_entry *sfep,
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 988cde7744e6..5b759af4d165 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2909,3 +2909,67 @@ xfs_ialloc_setup_geometry(
else
igeo->ialloc_align = 0;
}
+
+/* Compute the location of the root directory inode that is laid out by mkfs. */
+xfs_ino_t
+xfs_ialloc_calc_rootino(
+ struct xfs_mount *mp,
+ int sunit)
+{
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_agblock_t first_bno;
+
+ /*
+ * Pre-calculate the geometry of AG 0. We know what it looks like
+ * because libxfs knows how to create allocation groups now.
+ *
+ * first_bno is the first block in which mkfs could possibly have
+ * allocated the root directory inode, once we factor in the metadata
+ * that mkfs formats before it. Namely, the four AG headers...
+ */
+ first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+
+ /* ...the two free space btree roots... */
+ first_bno += 2;
+
+ /* ...the inode btree root... */
+ first_bno += 1;
+
+ /* ...the initial AGFL... */
+ first_bno += xfs_alloc_min_freelist(mp, NULL);
+
+ /* ...the free inode btree root... */
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ first_bno++;
+
+ /* ...the reverse mapping btree root... */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ first_bno++;
+
+ /* ...the reference count btree... */
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ first_bno++;
+
+ /*
+ * ...and the log, if it is allocated in the first allocation group.
+ *
+ * This can happen with filesystems that only have a single
+ * allocation group, or very odd geometries created by old mkfs
+ * versions on very small filesystems.
+ */
+ if (mp->m_sb.sb_logstart &&
+ XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0)
+ first_bno += mp->m_sb.sb_logblocks;
+
+ /*
+ * Now round first_bno up to whatever allocation alignment is given
+ * by the filesystem or was passed in.
+ */
+ if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
+ first_bno = roundup(first_bno, sunit);
+ else if (xfs_sb_version_hasalign(&mp->m_sb) &&
+ mp->m_sb.sb_inoalignmt > 1)
+ first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
+
+ return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 323592d563d5..72b3468b97b1 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
+xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index c55cd9a3dec9..7a9c04920505 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -197,6 +197,24 @@ xfs_calc_inode_chunk_res(
}
/*
+ * Per-extent log reservation for the btree changes involved in freeing or
+ * allocating a realtime extent. We have to be able to log as many rtbitmap
+ * blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
+ * as well as the realtime summary block.
+ */
+static unsigned int
+xfs_rtalloc_log_count(
+ struct xfs_mount *mp,
+ unsigned int num_ops)
+{
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+ unsigned int rtbmp_bytes;
+
+ rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
+ return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
+}
+
+/*
* Various log reservation values.
*
* These are based on the size of the file system block because that is what
@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(
/*
* In a write transaction we can allocate a maximum of 2
- * extents. This gives:
+ * extents. This gives (t1):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
+ * Or, if we're writing to a realtime file (t2):
+ * the inode getting the new extents: inode size
+ * the inode's bmap btree: max depth * block size
+ * the agfs of the ags from which the extents are allocated: 2 * sector
+ * the superblock free block counter: sector size
+ * the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 1 block
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size
@@ -234,40 +260,72 @@ STATIC uint
xfs_calc_write_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 1) +
+ unsigned int t1, t2, t3;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
+ xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
- XFS_FSB_TO_B(mp, 1)) +
+ blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))));
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
+ } else {
+ t2 = 0;
+ }
+
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+
+ return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
/*
- * In truncating a file we free up to two extents at once. We can modify:
+ * In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
+ * Or, if it's a realtime file (t3):
+ * the agf for each of the ags: 2 * sector size
+ * the agfl for each of the ags: 2 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 2 exts * 1 block
+ * worst case split in allocation btrees per extent assuming 2 extents:
+ * 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int t1, t2, t3;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
+
+ t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+ } else {
+ t3 = 0;
+ }
+
+ return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
/*
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 2efd78a9719e..e62fb5216341 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -992,6 +992,7 @@ xfs_prepare_shift(
struct xfs_inode *ip,
loff_t offset)
{
+ struct xfs_mount *mp = ip->i_mount;
int error;
/*
@@ -1005,6 +1006,17 @@ xfs_prepare_shift(
}
/*
+ * Shift operations must stabilize the start block offset boundary along
+ * with the full range of the operation. If we don't, a COW writeback
+ * completion could race with an insert, front merge with the start
+ * extent (after split) during the shift and corrupt the file. Start
+ * with the block just prior to the start to stabilize the boundary.
+ */
+ offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
+ if (offset)
+ offset -= (1 << mp->m_sb.sb_blocklog);
+
+ /*
* Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from offset to EOF.
*/
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 3458a1264a3f..3984779e5911 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -956,7 +956,7 @@ xfs_buf_item_relse(
struct xfs_buf_log_item *bip = bp->b_log_item;
trace_xfs_buf_item_relse(bp, _RET_IP_);
- ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
bp->b_log_item = NULL;
if (list_empty(&bp->b_li_list))
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index fca65109cf24..56efe140c923 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -31,7 +31,7 @@
#include "xfs_reflink.h"
#include "xfs_extent_busy.h"
#include "xfs_health.h"
-
+#include "xfs_trace.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
@@ -360,66 +360,119 @@ release_buf:
}
/*
- * Update alignment values based on mount options and sb values
+ * If the sunit/swidth change would move the precomputed root inode value, we
+ * must reject the ondisk change because repair will stumble over that.
+ * However, we allow the mount to proceed because we never rejected this
+ * combination before. Returns true to update the sb, false otherwise.
+ */
+static inline int
+xfs_check_new_dalign(
+ struct xfs_mount *mp,
+ int new_dalign,
+ bool *update_sb)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+ xfs_ino_t calc_ino;
+
+ calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
+ trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
+
+ if (sbp->sb_rootino == calc_ino) {
+ *update_sb = true;
+ return 0;
+ }
+
+ xfs_warn(mp,
+"Cannot change stripe alignment; would require moving root inode.");
+
+ /*
+ * XXX: Next time we add a new incompat feature, this should start
+ * returning -EINVAL to fail the mount. Until then, spit out a warning
+ * that we're ignoring the administrator's instructions.
+ */
+ xfs_warn(mp, "Skipping superblock stripe alignment update.");
+ *update_sb = false;
+ return 0;
+}
+
+/*
+ * If we were provided with new sunit/swidth values as mount options, make sure
+ * that they pass basic alignment and superblock feature checks, and convert
+ * them into the same units (FSB) that everything else expects. This step
+ * /must/ be done before computing the inode geometry.
*/
STATIC int
-xfs_update_alignment(xfs_mount_t *mp)
+xfs_validate_new_dalign(
+ struct xfs_mount *mp)
{
- xfs_sb_t *sbp = &(mp->m_sb);
+ if (mp->m_dalign == 0)
+ return 0;
- if (mp->m_dalign) {
+ /*
+ * If stripe unit and stripe width are not multiples
+ * of the fs blocksize turn off alignment.
+ */
+ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
+ (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
+ xfs_warn(mp,
+ "alignment check failed: sunit/swidth vs. blocksize(%d)",
+ mp->m_sb.sb_blocksize);
+ return -EINVAL;
+ } else {
/*
- * If stripe unit and stripe width are not multiples
- * of the fs blocksize turn off alignment.
+ * Convert the stripe unit and width to FSBs.
*/
- if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
- (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
+ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
+ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. blocksize(%d)",
- sbp->sb_blocksize);
+ "alignment check failed: sunit/swidth vs. agsize(%d)",
+ mp->m_sb.sb_agblocks);
return -EINVAL;
- } else {
- /*
- * Convert the stripe unit and width to FSBs.
- */
- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
- if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
- xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. agsize(%d)",
- sbp->sb_agblocks);
- return -EINVAL;
- } else if (mp->m_dalign) {
- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
- } else {
- xfs_warn(mp,
- "alignment check failed: sunit(%d) less than bsize(%d)",
- mp->m_dalign, sbp->sb_blocksize);
- return -EINVAL;
- }
- }
-
- /*
- * Update superblock with new values
- * and log changes
- */
- if (xfs_sb_version_hasdalign(sbp)) {
- if (sbp->sb_unit != mp->m_dalign) {
- sbp->sb_unit = mp->m_dalign;
- mp->m_update_sb = true;
- }
- if (sbp->sb_width != mp->m_swidth) {
- sbp->sb_width = mp->m_swidth;
- mp->m_update_sb = true;
- }
+ } else if (mp->m_dalign) {
+ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
- "cannot change alignment: superblock does not support data alignment");
+ "alignment check failed: sunit(%d) less than bsize(%d)",
+ mp->m_dalign, mp->m_sb.sb_blocksize);
return -EINVAL;
}
+ }
+
+ if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
+ xfs_warn(mp,
+"cannot change alignment: superblock does not support data alignment");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Update alignment values based on mount options and sb values. */
+STATIC int
+xfs_update_alignment(
+ struct xfs_mount *mp)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+
+ if (mp->m_dalign) {
+ bool update_sb;
+ int error;
+
+ if (sbp->sb_unit == mp->m_dalign &&
+ sbp->sb_width == mp->m_swidth)
+ return 0;
+
+ error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
+ if (error || !update_sb)
+ return error;
+
+ sbp->sb_unit = mp->m_dalign;
+ sbp->sb_width = mp->m_swidth;
+ mp->m_update_sb = true;
} else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
xfs_sb_version_hasdalign(&mp->m_sb)) {
- mp->m_dalign = sbp->sb_unit;
- mp->m_swidth = sbp->sb_width;
+ mp->m_dalign = sbp->sb_unit;
+ mp->m_swidth = sbp->sb_width;
}
return 0;
@@ -648,12 +701,12 @@ xfs_mountfs(
}
/*
- * Check if sb_agblocks is aligned at stripe boundary
- * If sb_agblocks is NOT aligned turn off m_dalign since
- * allocator alignment is within an ag, therefore ag has
- * to be aligned at stripe boundary.
+ * If we were given new sunit/swidth options, do some basic validation
+ * checks and convert the incore dalign and swidth values to the
+ * same units (FSB) that everything else uses. This /must/ happen
+ * before computing the inode geometry.
*/
- error = xfs_update_alignment(mp);
+ error = xfs_validate_new_dalign(mp);
if (error)
goto out;
@@ -664,6 +717,17 @@ xfs_mountfs(
xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
+ /*
+ * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
+ * is NOT aligned turn off m_dalign since allocator alignment is within
+ * an ag, therefore ag has to be aligned at stripe boundary. Note that
+ * we must compute the free space and rmap btree geometry before doing
+ * this.
+ */
+ error = xfs_update_alignment(mp);
+ if (error)
+ goto out;
+
/* enable fail_at_unmount as default */
mp->m_fail_unmount = true;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c13bb3655e48..a86be7f807ee 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3573,6 +3573,27 @@ DEFINE_KMEM_EVENT(kmem_alloc_large);
DEFINE_KMEM_EVENT(kmem_realloc);
DEFINE_KMEM_EVENT(kmem_zone_alloc);
+TRACE_EVENT(xfs_check_new_dalign,
+ TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
+ TP_ARGS(mp, new_dalign, calc_rootino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, new_dalign)
+ __field(xfs_ino_t, sb_rootino)
+ __field(xfs_ino_t, calc_rootino)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->new_dalign = new_dalign;
+ __entry->sb_rootino = mp->m_sb.sb_rootino;
+ __entry->calc_rootino = calc_rootino;
+ ),
+ TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->new_dalign, __entry->sb_rootino,
+ __entry->calc_rootino)
+)
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 6782f0d45ebe..49e5383d4222 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -19,6 +19,8 @@ struct ahci_host_priv;
struct platform_device;
struct scsi_host_template;
+int ahci_platform_enable_phys(struct ahci_host_priv *hpriv);
+void ahci_platform_disable_phys(struct ahci_host_priv *hpriv);
int ahci_platform_enable_clks(struct ahci_host_priv *hpriv);
void ahci_platform_disable_clks(struct ahci_host_priv *hpriv);
int ahci_platform_enable_regulators(struct ahci_host_priv *hpriv);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3cdb84cdc488..853d92ceee64 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -470,6 +470,7 @@ extern struct bio *bio_copy_user_iov(struct request_queue *,
gfp_t);
extern int bio_uncopy_user(struct bio *);
void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter);
+void bio_truncate(struct bio *bio, unsigned new_size);
static inline void zero_fill_bio(struct bio *bio)
{
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 19394c77ed99..e4a6949fd171 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -188,7 +188,6 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
struct request_queue *q);
int blkcg_init_queue(struct request_queue *q);
-void blkcg_drain_queue(struct request_queue *q);
void blkcg_exit_queue(struct request_queue *q);
/* Blkio controller policy registration */
@@ -720,7 +719,6 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { ret
static inline struct blkcg_gq *blk_queue_root_blkg(struct request_queue *q)
{ return NULL; }
static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
-static inline void blkcg_drain_queue(struct request_queue *q) { }
static inline void blkcg_exit_queue(struct request_queue *q) { }
static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 169fd25f6bc2..a11d5b7dbbf3 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+ struct bpf_prog *replace_prog,
enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type);
@@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type, u32 flags);
+ struct bpf_prog *replace_prog, enum bpf_attach_type type,
+ u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
@@ -157,8 +159,8 @@ void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
struct cgroup *cgroup,
enum bpf_attach_type type);
void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
-int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
-void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);
+int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *map);
+void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *map);
int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
@@ -360,9 +362,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
static inline void bpf_cgroup_storage_set(
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
-static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
+static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
-static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
+static inline void bpf_cgroup_storage_release(struct bpf_prog_aux *aux,
struct bpf_map *map) {}
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return NULL; }
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 35903f148be5..b14e51d56a82 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -461,6 +461,7 @@ struct bpf_trampoline {
struct {
struct btf_func_model model;
void *addr;
+ bool ftrace_managed;
} func;
/* list of BPF programs using this trampoline */
struct hlist_head progs_hlist[BPF_TRAMP_MAX];
@@ -470,11 +471,69 @@ struct bpf_trampoline {
void *image;
u64 selector;
};
+
+#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */
+
+struct bpf_dispatcher_prog {
+ struct bpf_prog *prog;
+ refcount_t users;
+};
+
+struct bpf_dispatcher {
+ /* dispatcher mutex */
+ struct mutex mutex;
+ void *func;
+ struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX];
+ int num_progs;
+ void *image;
+ u32 image_off;
+};
+
+static __always_inline unsigned int bpf_dispatcher_nopfunc(
+ const void *ctx,
+ const struct bpf_insn *insnsi,
+ unsigned int (*bpf_func)(const void *,
+ const struct bpf_insn *))
+{
+ return bpf_func(ctx, insnsi);
+}
#ifdef CONFIG_BPF_JIT
struct bpf_trampoline *bpf_trampoline_lookup(u64 key);
int bpf_trampoline_link_prog(struct bpf_prog *prog);
int bpf_trampoline_unlink_prog(struct bpf_prog *prog);
void bpf_trampoline_put(struct bpf_trampoline *tr);
+void *bpf_jit_alloc_exec_page(void);
+#define BPF_DISPATCHER_INIT(name) { \
+ .mutex = __MUTEX_INITIALIZER(name.mutex), \
+ .func = &name##func, \
+ .progs = {}, \
+ .num_progs = 0, \
+ .image = NULL, \
+ .image_off = 0 \
+}
+
+#define DEFINE_BPF_DISPATCHER(name) \
+ noinline unsigned int name##func( \
+ const void *ctx, \
+ const struct bpf_insn *insnsi, \
+ unsigned int (*bpf_func)(const void *, \
+ const struct bpf_insn *)) \
+ { \
+ return bpf_func(ctx, insnsi); \
+ } \
+ EXPORT_SYMBOL(name##func); \
+ struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name);
+#define DECLARE_BPF_DISPATCHER(name) \
+ unsigned int name##func( \
+ const void *ctx, \
+ const struct bpf_insn *insnsi, \
+ unsigned int (*bpf_func)(const void *, \
+ const struct bpf_insn *)); \
+ extern struct bpf_dispatcher name;
+#define BPF_DISPATCHER_FUNC(name) name##func
+#define BPF_DISPATCHER_PTR(name) (&name)
+void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
+ struct bpf_prog *to);
#else
static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
@@ -489,6 +548,13 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog)
return -ENOTSUPP;
}
static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {}
+#define DEFINE_BPF_DISPATCHER(name)
+#define DECLARE_BPF_DISPATCHER(name)
+#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc
+#define BPF_DISPATCHER_PTR(name) NULL
+static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d,
+ struct bpf_prog *from,
+ struct bpf_prog *to) {}
#endif
struct bpf_func_info_aux {
@@ -817,6 +883,8 @@ struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog);
void bpf_prog_put(struct bpf_prog *prog);
int __bpf_prog_charge(struct user_struct *user, u32 pages);
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);
+void __bpf_free_used_maps(struct bpf_prog_aux *aux,
+ struct bpf_map **used_maps, u32 len);
void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);
@@ -894,14 +962,14 @@ struct sk_buff;
struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key);
-void __dev_map_flush(struct bpf_map *map);
+void __dev_map_flush(void);
int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
-void __cpu_map_flush(struct bpf_map *map);
+void __cpu_map_flush(void);
int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
struct net_device *dev_rx);
@@ -940,6 +1008,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog);
+struct bpf_prog *bpf_prog_by_id(u32 id);
+
#else /* !CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get(u32 ufd)
{
@@ -1001,7 +1071,7 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map
return NULL;
}
-static inline void __dev_map_flush(struct bpf_map *map)
+static inline void __dev_map_flush(void)
{
}
@@ -1030,7 +1100,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
return NULL;
}
-static inline void __cpu_map_flush(struct bpf_map *map)
+static inline void __cpu_map_flush(void)
{
}
@@ -1071,6 +1141,11 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
static inline void bpf_map_put(struct bpf_map *map)
{
}
+
+static inline struct bpf_prog *bpf_prog_by_id(u32 id)
+{
+ return ERR_PTR(-ENOTSUPP);
+}
#endif /* CONFIG_BPF_SYSCALL */
static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 9b3c720a31b1..5e3d45525bd3 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -18,6 +18,7 @@
#include <linux/can/error.h>
#include <linux/can/led.h>
#include <linux/can/netlink.h>
+#include <linux/can/skb.h>
#include <linux/netdevice.h>
/*
@@ -91,6 +92,36 @@ struct can_priv {
#define get_can_dlc(i) (min_t(__u8, (i), CAN_MAX_DLC))
#define get_canfd_dlc(i) (min_t(__u8, (i), CANFD_MAX_DLC))
+/* Check for outgoing skbs that have not been created by the CAN subsystem */
+static inline bool can_skb_headroom_valid(struct net_device *dev,
+ struct sk_buff *skb)
+{
+ /* af_packet creates a headroom of HH_DATA_MOD bytes which is fine */
+ if (WARN_ON_ONCE(skb_headroom(skb) < sizeof(struct can_skb_priv)))
+ return false;
+
+ /* af_packet does not apply CAN skb specific settings */
+ if (skb->ip_summed == CHECKSUM_NONE) {
+ /* init headroom */
+ can_skb_prv(skb)->ifindex = dev->ifindex;
+ can_skb_prv(skb)->skbcnt = 0;
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+ /* preform proper loopback on capable devices */
+ if (dev->flags & IFF_ECHO)
+ skb->pkt_type = PACKET_LOOPBACK;
+ else
+ skb->pkt_type = PACKET_HOST;
+
+ skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ }
+
+ return true;
+}
+
/* Drop a given socketbuffer if it does not contain a valid CAN frame. */
static inline bool can_dropped_invalid_skb(struct net_device *dev,
struct sk_buff *skb)
@@ -108,6 +139,9 @@ static inline bool can_dropped_invalid_skb(struct net_device *dev,
} else
goto inval_skb;
+ if (!can_skb_headroom_valid(dev, skb))
+ goto inval_skb;
+
return false;
inval_skb:
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 92d5fdc8154e..31b1b0e03df8 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -595,17 +595,6 @@ struct governor_attr {
size_t count);
};
-static inline bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy)
-{
- /*
- * Allow remote callbacks if:
- * - dvfs_possible_from_any_cpu flag is set
- * - the local and remote CPUs share cpufreq policy
- */
- return policy->dvfs_possible_from_any_cpu ||
- cpumask_test_cpu(smp_processor_id(), policy->cpus);
-}
-
/*********************************************************************
* FREQUENCY TABLE HELPERS *
*********************************************************************/
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 2bae9ed3c783..fb376b5b7281 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -13,6 +13,7 @@
#include <linux/device.h>
#include <linux/notifier.h>
#include <linux/pm_opp.h>
+#include <linux/pm_qos.h>
#define DEVFREQ_NAME_LEN 16
@@ -123,8 +124,8 @@ struct devfreq_dev_profile {
* @previous_freq: previously configured frequency value.
* @data: Private data of the governor. The devfreq framework does not
* touch this.
- * @min_freq: Limit minimum frequency requested by user (0: none)
- * @max_freq: Limit maximum frequency requested by user (0: none)
+ * @user_min_freq_req: PM QoS minimum frequency request from user (via sysfs)
+ * @user_max_freq_req: PM QoS maximum frequency request from user (via sysfs)
* @scaling_min_freq: Limit minimum frequency requested by OPP interface
* @scaling_max_freq: Limit maximum frequency requested by OPP interface
* @stop_polling: devfreq polling status of a device.
@@ -136,6 +137,8 @@ struct devfreq_dev_profile {
* @time_in_state: Statistics of devfreq states
* @last_stat_updated: The last time stat updated
* @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier
+ * @nb_min: Notifier block for DEV_PM_QOS_MIN_FREQUENCY
+ * @nb_max: Notifier block for DEV_PM_QOS_MAX_FREQUENCY
*
* This structure stores the devfreq information for a give device.
*
@@ -161,8 +164,8 @@ struct devfreq {
void *data; /* private data for governors */
- unsigned long min_freq;
- unsigned long max_freq;
+ struct dev_pm_qos_request user_min_freq_req;
+ struct dev_pm_qos_request user_max_freq_req;
unsigned long scaling_min_freq;
unsigned long scaling_max_freq;
bool stop_polling;
@@ -178,6 +181,9 @@ struct devfreq {
unsigned long last_stat_updated;
struct srcu_notifier_head transition_notifier_list;
+
+ struct notifier_block nb_min;
+ struct notifier_block nb_max;
};
struct devfreq_freqs {
diff --git a/include/linux/device.h b/include/linux/device.h
index e226030c1df3..96ff76731e93 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1666,11 +1666,11 @@ extern bool kill_device(struct device *dev);
#ifdef CONFIG_DEVTMPFS
extern int devtmpfs_create_node(struct device *dev);
extern int devtmpfs_delete_node(struct device *dev);
-extern int devtmpfs_mount(const char *mntdir);
+extern int devtmpfs_mount(void);
#else
static inline int devtmpfs_create_node(struct device *dev) { return 0; }
static inline int devtmpfs_delete_node(struct device *dev) { return 0; }
-static inline int devtmpfs_mount(const char *mountpoint) { return 0; }
+static inline int devtmpfs_mount(void) { return 0; }
#endif
/* drivers/base/power/shutdown.c */
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 8fcdee1c0cf9..dad4a68fa009 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -1364,8 +1364,11 @@ static inline int dma_get_slave_caps(struct dma_chan *chan,
static inline int dmaengine_desc_set_reuse(struct dma_async_tx_descriptor *tx)
{
struct dma_slave_caps caps;
+ int ret;
- dma_get_slave_caps(tx->chan, &caps);
+ ret = dma_get_slave_caps(tx->chan, &caps);
+ if (ret)
+ return ret;
if (caps.descriptor_reuse) {
tx->flags |= DMA_CTRL_REUSE;
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index 897e799dbcb9..fa5735c353cd 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -37,8 +37,6 @@
* the structure defined in struct sja1105_private.
*/
struct sja1105_tagger_data {
- struct sk_buff_head skb_rxtstamp_queue;
- struct work_struct rxtstamp_work;
struct sk_buff *stampable_skb;
/* Protects concurrent access to the meta state machine
* from taggers running on multiple ports on SMP systems
@@ -55,10 +53,12 @@ struct sja1105_skb_cb {
((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb))
struct sja1105_port {
+ struct kthread_worker *xmit_worker;
+ struct kthread_work xmit_work;
+ struct sk_buff_head xmit_queue;
struct sja1105_tagger_data *data;
struct dsa_port *dp;
bool hwts_tx_en;
- int mgmt_slot;
};
#endif /* _NET_DSA_SJA1105_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 99dfea595c8c..aa54586db7a5 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -824,7 +824,7 @@ typedef struct {
__aligned_u64 image_size;
unsigned int image_code_type;
unsigned int image_data_type;
- unsigned long unload;
+ u32 unload;
} efi_loaded_image_32_t;
typedef struct {
@@ -840,14 +840,14 @@ typedef struct {
__aligned_u64 image_size;
unsigned int image_code_type;
unsigned int image_data_type;
- unsigned long unload;
+ u64 unload;
} efi_loaded_image_64_t;
typedef struct {
u32 revision;
- void *parent_handle;
+ efi_handle_t parent_handle;
efi_system_table_t *system_table;
- void *device_handle;
+ efi_handle_t device_handle;
void *file_path;
void *reserved;
u32 load_options_size;
@@ -856,7 +856,7 @@ typedef struct {
__aligned_u64 image_size;
unsigned int image_code_type;
unsigned int image_data_type;
- unsigned long unload;
+ efi_status_t (*unload)(efi_handle_t image_handle);
} efi_loaded_image_t;
diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
new file mode 100644
index 000000000000..d01b77887f82
--- /dev/null
+++ b/include/linux/ethtool_netlink.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _LINUX_ETHTOOL_NETLINK_H_
+#define _LINUX_ETHTOOL_NETLINK_H_
+
+#include <uapi/linux/ethtool_netlink.h>
+#include <linux/ethtool.h>
+#include <linux/netdevice.h>
+
+#define __ETHTOOL_LINK_MODE_MASK_NWORDS \
+ DIV_ROUND_UP(__ETHTOOL_LINK_MODE_MASK_NBITS, 32)
+
+enum ethtool_multicast_groups {
+ ETHNL_MCGRP_MONITOR,
+};
+
+#endif /* _LINUX_ETHTOOL_NETLINK_H_ */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a141cb07e76a..70e6dd960bca 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -420,7 +420,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
#define BPF_FIELD_SIZEOF(type, field) \
({ \
- const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \
+ const int __size = bytes_to_bpf_size(sizeof_field(type, field)); \
BUILD_BUG_ON(__size < 0); \
__size; \
})
@@ -497,7 +497,7 @@ static inline bool insn_is_zext(const struct bpf_insn *insn)
#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE) \
({ \
- BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE)); \
+ BUILD_BUG_ON(sizeof_field(TYPE, MEMBER) != (SIZE)); \
*(PTR_SIZE) = (SIZE); \
offsetof(TYPE, MEMBER); \
})
@@ -559,23 +559,26 @@ struct sk_filter {
DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
-#define BPF_PROG_RUN(prog, ctx) ({ \
- u32 ret; \
- cant_sleep(); \
- if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
- struct bpf_prog_stats *stats; \
- u64 start = sched_clock(); \
- ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
- stats = this_cpu_ptr(prog->aux->stats); \
- u64_stats_update_begin(&stats->syncp); \
- stats->cnt++; \
- stats->nsecs += sched_clock() - start; \
- u64_stats_update_end(&stats->syncp); \
- } else { \
- ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \
- } \
+#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
+ u32 ret; \
+ cant_sleep(); \
+ if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
+ struct bpf_prog_stats *stats; \
+ u64 start = sched_clock(); \
+ ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
+ stats = this_cpu_ptr(prog->aux->stats); \
+ u64_stats_update_begin(&stats->syncp); \
+ stats->cnt++; \
+ stats->nsecs += sched_clock() - start; \
+ u64_stats_update_end(&stats->syncp); \
+ } else { \
+ ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
+ } \
ret; })
+#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \
+ bpf_dispatcher_nopfunc)
+
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
struct bpf_skb_data_end {
@@ -589,7 +592,6 @@ struct bpf_redirect_info {
u32 tgt_index;
void *tgt_value;
struct bpf_map *map;
- struct bpf_map *map_to_flush;
u32 kern_flags;
};
@@ -608,7 +610,7 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb)
{
struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
- BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
cb->data_meta = skb->data - skb_metadata_len(skb);
cb->data_end = skb->data + skb_headlen(skb);
}
@@ -646,9 +648,9 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
* attached to sockets, we need to clear the bpf_skb_cb() area
* to not leak previous contents to user space.
*/
- BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN);
- BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) !=
- FIELD_SIZEOF(struct qdisc_skb_cb, data));
+ BUILD_BUG_ON(sizeof_field(struct __sk_buff, cb) != BPF_SKB_CB_LEN);
+ BUILD_BUG_ON(sizeof_field(struct __sk_buff, cb) !=
+ sizeof_field(struct qdisc_skb_cb, data));
return qdisc_skb_cb(skb)->data;
}
@@ -699,6 +701,8 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
return res;
}
+DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp)
+
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
@@ -708,9 +712,12 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
* already takes rcu_read_lock() when fetching the program, so
* it's not necessary here anymore.
*/
- return BPF_PROG_RUN(prog, xdp);
+ return __BPF_PROG_RUN(prog, xdp,
+ BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp));
}
+void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
+
static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
{
return prog->len * sizeof(struct bpf_insn);
diff --git a/include/linux/fsl/enetc_mdio.h b/include/linux/fsl/enetc_mdio.h
new file mode 100644
index 000000000000..4875dd38af7e
--- /dev/null
+++ b/include/linux/fsl/enetc_mdio.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2019 NXP */
+
+#ifndef _FSL_ENETC_MDIO_H_
+#define _FSL_ENETC_MDIO_H_
+
+#include <linux/phy.h>
+
+/* PCS registers */
+#define ENETC_PCS_LINK_TIMER1 0x12
+#define ENETC_PCS_LINK_TIMER1_VAL 0x06a0
+#define ENETC_PCS_LINK_TIMER2 0x13
+#define ENETC_PCS_LINK_TIMER2_VAL 0x0003
+#define ENETC_PCS_IF_MODE 0x14
+#define ENETC_PCS_IF_MODE_SGMII_EN BIT(0)
+#define ENETC_PCS_IF_MODE_USE_SGMII_AN BIT(1)
+#define ENETC_PCS_IF_MODE_SGMII_SPEED(x) (((x) << 2) & GENMASK(3, 2))
+
+/* Not a mistake, the SerDes PLL needs to be set at 3.125 GHz by Reset
+ * Configuration Word (RCW, outside Linux control) for 2.5G SGMII mode. The PCS
+ * still thinks it's at gigabit.
+ */
+enum enetc_pcs_speed {
+ ENETC_PCS_SPEED_10 = 0,
+ ENETC_PCS_SPEED_100 = 1,
+ ENETC_PCS_SPEED_1000 = 2,
+ ENETC_PCS_SPEED_2500 = 2,
+};
+
+struct enetc_hw;
+
+struct enetc_mdio_priv {
+ struct enetc_hw *hw;
+ int mdio_base;
+};
+
+#if IS_REACHABLE(CONFIG_FSL_ENETC_MDIO)
+
+int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum);
+int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum, u16 value);
+struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs);
+
+#else
+
+static inline int enetc_mdio_read(struct mii_bus *bus, int phy_id, int regnum)
+{ return -EINVAL; }
+static inline int enetc_mdio_write(struct mii_bus *bus, int phy_id, int regnum,
+ u16 value)
+{ return -EINVAL; }
+struct enetc_hw *enetc_hw_alloc(struct device *dev, void __iomem *port_regs)
+{ return ERR_PTR(-EINVAL); }
+
+#endif
+
+#endif
diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index 992bf9fa1729..b0b743563f43 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -192,6 +192,7 @@ int ptp_qoriq_settime(struct ptp_clock_info *ptp,
const struct timespec64 *ts);
int ptp_qoriq_enable(struct ptp_clock_info *ptp,
struct ptp_clock_request *rq, int on);
+int extts_clean_up(struct ptp_qoriq *ptp_qoriq, int index, bool update_event);
#ifdef CONFIG_DEBUG_FS
void ptp_qoriq_create_debugfs(struct ptp_qoriq *ptp_qoriq);
void ptp_qoriq_remove_debugfs(struct ptp_qoriq *ptp_qoriq);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7247d35c3d16..db95244a62d4 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -264,6 +264,7 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
struct dyn_ftrace *rec,
unsigned long old_addr,
unsigned long new_addr);
+unsigned long ftrace_find_rec_direct(unsigned long ip);
#else
# define ftrace_direct_func_count 0
static inline int register_ftrace_direct(unsigned long ip, unsigned long addr)
@@ -290,6 +291,10 @@ static inline int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
{
return -ENODEV;
}
+static inline unsigned long ftrace_find_rec_direct(unsigned long ip)
+{
+ return 0;
+}
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index d2f786706657..582ef05ec07e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -300,6 +300,7 @@ struct i2c_driver {
* generic enough to hide second-sourcing and compatible revisions.
* @adapter: manages the bus segment hosting this I2C device
* @dev: Driver model device node for the slave.
+ * @init_irq: IRQ that was set at initialization
* @irq: indicates the IRQ generated by this device (if any)
* @detected: member of an i2c_driver.clients list or i2c-core's
* userspace_devices list
@@ -466,12 +467,6 @@ i2c_new_probed_device(struct i2c_adapter *adap,
/* Common custom probe functions */
extern int i2c_probe_func_quick_read(struct i2c_adapter *adap, unsigned short addr);
-/* For devices that use several addresses, use i2c_new_dummy() to make
- * client handles for the extra addresses.
- */
-extern struct i2c_client *
-i2c_new_dummy(struct i2c_adapter *adap, u16 address);
-
extern struct i2c_client *
i2c_new_dummy_device(struct i2c_adapter *adapter, u16 address);
@@ -856,6 +851,11 @@ extern void i2c_del_driver(struct i2c_driver *driver);
#define i2c_add_driver(driver) \
i2c_register_driver(THIS_MODULE, driver)
+static inline bool i2c_client_has_driver(struct i2c_client *client)
+{
+ return !IS_ERR_OR_NULL(client) && client->dev.driver;
+}
+
/* call the i2c_client->command() of all attached clients with
* the given arguments */
extern void i2c_clients_command(struct i2c_adapter *adap,
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 76cf11e905e1..8a9792a6427a 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -24,6 +24,14 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb)
return (struct ethhdr *)skb_mac_header(skb);
}
+/* Prefer this version in TX path, instead of
+ * skb_reset_mac_header() + eth_hdr()
+ */
+static inline struct ethhdr *skb_eth_hdr(const struct sk_buff *skb)
+{
+ return (struct ethhdr *)skb->data;
+}
+
static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb)
{
return (struct ethhdr *)skb_inner_mac_header(skb);
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index d77fe34fb00a..aa5914355728 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -28,3 +28,5 @@ extern unsigned int real_root_dev;
extern char __initramfs_start[];
extern unsigned long __initramfs_size;
+
+void console_on_rootfs(void);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 29dce6ff6bae..ce44b687d02b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -457,7 +457,7 @@ struct jbd2_revoke_table_s;
* @h_journal: Which journal handle belongs to - used iff h_reserved set.
* @h_rsv_handle: Handle reserved for finishing the logical operation.
* @h_total_credits: Number of remaining buffers we are allowed to add to
- journal. These are dirty buffers and revoke descriptor blocks.
+ * journal. These are dirty buffers and revoke descriptor blocks.
* @h_revoke_credits: Number of remaining revoke records available for handle
* @h_ref: Reference count on this handle.
* @h_err: Field for caller's use to track errors through large fs operations.
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 4f404c565db1..e18fe54969e9 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -205,20 +205,23 @@ static inline void *kasan_reset_tag(const void *addr)
#endif /* CONFIG_KASAN_SW_TAGS */
#ifdef CONFIG_KASAN_VMALLOC
-int kasan_populate_vmalloc(unsigned long requested_size,
- struct vm_struct *area);
-void kasan_poison_vmalloc(void *start, unsigned long size);
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size);
+void kasan_poison_vmalloc(const void *start, unsigned long size);
+void kasan_unpoison_vmalloc(const void *start, unsigned long size);
void kasan_release_vmalloc(unsigned long start, unsigned long end,
unsigned long free_region_start,
unsigned long free_region_end);
#else
-static inline int kasan_populate_vmalloc(unsigned long requested_size,
- struct vm_struct *area)
+static inline int kasan_populate_vmalloc(unsigned long start,
+ unsigned long size)
{
return 0;
}
-static inline void kasan_poison_vmalloc(void *start, unsigned long size) {}
+static inline void kasan_poison_vmalloc(const void *start, unsigned long size)
+{ }
+static inline void kasan_unpoison_vmalloc(const void *start, unsigned long size)
+{ }
static inline void kasan_release_vmalloc(unsigned long start,
unsigned long end,
unsigned long free_region_start,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3adcb39fa6f5..0d9db2a14f44 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -79,15 +79,6 @@
*/
#define round_down(x, y) ((x) & ~__round_mask(x, y))
-/**
- * FIELD_SIZEOF - get the size of a struct's field
- * @t: the target struct
- * @f: the target struct's field
- * Return: the size of @f in the struct definition without having a
- * declared instance of @t.
- */
-#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
-
#define typeof_member(T, m) typeof(((T*)0)->m)
#define DIV_ROUND_UP __KERNEL_DIV_ROUND_UP
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7ed1e2f8641e..538c25e778c0 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -149,7 +149,7 @@ static inline bool is_error_page(struct page *page)
#define KVM_REQUEST_ARCH_BASE 8
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
- BUILD_BUG_ON((unsigned)(nr) >= (FIELD_SIZEOF(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
+ BUILD_BUG_ON((unsigned)(nr) >= (sizeof_field(struct kvm_vcpu, requests) * 8) - KVM_REQUEST_ARCH_BASE); \
(unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \
})
#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d3bbfddf616a..2dbde119721d 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1175,6 +1175,7 @@ extern unsigned int ata_do_dev_read_id(struct ata_device *dev,
struct ata_taskfile *tf, u16 *id);
extern void ata_qc_complete(struct ata_queued_cmd *qc);
extern int ata_qc_complete_multiple(struct ata_port *ap, u64 qc_active);
+extern u64 ata_qc_get_active(struct ata_port *ap);
extern void ata_scsi_simulate(struct ata_device *dev, struct scsi_cmnd *cmd);
extern int ata_std_bios_param(struct scsi_device *sdev,
struct block_device *bdev,
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 3a08ecdfca11..ba0dca6aac6e 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -122,8 +122,8 @@ static inline bool movable_node_is_enabled(void)
extern void arch_remove_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap);
-extern void __remove_pages(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap);
+extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap);
/* reasonably generic interface to expand the physical pages */
extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
@@ -342,6 +342,9 @@ extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
+extern void remove_pfn_range_from_zone(struct zone *zone,
+ unsigned long start_pfn,
+ unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem);
extern int sparse_add_section(int nid, unsigned long pfn,
unsigned long nr_pages, struct vmem_altmap *altmap);
diff --git a/include/linux/mfd/mt6397/rtc.h b/include/linux/mfd/mt6397/rtc.h
index f84b9163c0ee..7dfb63b81373 100644
--- a/include/linux/mfd/mt6397/rtc.h
+++ b/include/linux/mfd/mt6397/rtc.h
@@ -46,6 +46,14 @@
#define RTC_AL_SEC 0x0018
+#define RTC_AL_SEC_MASK 0x003f
+#define RTC_AL_MIN_MASK 0x003f
+#define RTC_AL_HOU_MASK 0x001f
+#define RTC_AL_DOM_MASK 0x001f
+#define RTC_AL_DOW_MASK 0x0007
+#define RTC_AL_MTH_MASK 0x000f
+#define RTC_AL_YEA_MASK 0x007f
+
#define RTC_PDN2 0x002e
#define RTC_PDN2_PWRON_ALARM BIT(4)
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 4ce8901a1af6..18c6208f56fc 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -373,6 +373,56 @@ static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa)
}
/**
+ * mii_lpa_mod_linkmode_adv_sgmii
+ * @lp_advertising: pointer to destination link mode.
+ * @lpa: value of the MII_LPA register
+ *
+ * A small helper function that translates MII_LPA bits to
+ * linkmode advertisement settings for SGMII.
+ * Leaves other bits unchanged.
+ */
+static inline void
+mii_lpa_mod_linkmode_lpa_sgmii(unsigned long *lp_advertising, u32 lpa)
+{
+ u32 speed_duplex = lpa & LPA_SGMII_DPX_SPD_MASK;
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Half_BIT, lp_advertising,
+ speed_duplex == LPA_SGMII_1000HALF);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT, lp_advertising,
+ speed_duplex == LPA_SGMII_1000FULL);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, lp_advertising,
+ speed_duplex == LPA_SGMII_100HALF);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, lp_advertising,
+ speed_duplex == LPA_SGMII_100FULL);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Half_BIT, lp_advertising,
+ speed_duplex == LPA_SGMII_10HALF);
+
+ linkmode_mod_bit(ETHTOOL_LINK_MODE_10baseT_Full_BIT, lp_advertising,
+ speed_duplex == LPA_SGMII_10FULL);
+}
+
+/**
+ * mii_lpa_to_linkmode_adv_sgmii
+ * @advertising: pointer to destination link mode.
+ * @lpa: value of the MII_LPA register
+ *
+ * A small helper function that translates MII_ADVERTISE bits
+ * to linkmode advertisement settings when in SGMII mode.
+ * Clears the old value of advertising.
+ */
+static inline void mii_lpa_to_linkmode_lpa_sgmii(unsigned long *lp_advertising,
+ u32 lpa)
+{
+ linkmode_zero(lp_advertising);
+
+ mii_lpa_mod_linkmode_lpa_sgmii(lp_advertising, lpa);
+}
+
+/**
* mii_adv_mod_linkmode_adv_t
* @advertising:pointer to destination link mode.
* @adv: value of the MII_ADVERTISE register
diff --git a/include/linux/mii_timestamper.h b/include/linux/mii_timestamper.h
new file mode 100644
index 000000000000..fa940bbaf8ae
--- /dev/null
+++ b/include/linux/mii_timestamper.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for generic time stamping devices on MII buses.
+ * Copyright (C) 2018 Richard Cochran <[email protected]>
+ */
+#ifndef _LINUX_MII_TIMESTAMPER_H
+#define _LINUX_MII_TIMESTAMPER_H
+
+#include <linux/device.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+
+struct phy_device;
+
+/**
+ * struct mii_timestamper - Callback interface to MII time stamping devices.
+ *
+ * @rxtstamp: Requests a Rx timestamp for 'skb'. If the skb is accepted,
+ * the MII time stamping device promises to deliver it using
+ * netif_rx() as soon as a timestamp becomes available. One of
+ * the PTP_CLASS_ values is passed in 'type'. The function
+ * must return true if the skb is accepted for delivery.
+ *
+ * @txtstamp: Requests a Tx timestamp for 'skb'. The MII time stamping
+ * device promises to deliver it using skb_complete_tx_timestamp()
+ * as soon as a timestamp becomes available. One of the PTP_CLASS_
+ * values is passed in 'type'.
+ *
+ * @hwtstamp: Handles SIOCSHWTSTAMP ioctl for hardware time stamping.
+ *
+ * @link_state: Allows the device to respond to changes in the link
+ * state. The caller invokes this function while holding
+ * the phy_device mutex.
+ *
+ * @ts_info: Handles ethtool queries for hardware time stamping.
+ * @device: Remembers the device to which the instance belongs.
+ *
+ * Drivers for PHY time stamping devices should embed their
+ * mii_timestamper within a private structure, obtaining a reference
+ * to it using container_of().
+ *
+ * Drivers for non-PHY time stamping devices should return a pointer
+ * to a mii_timestamper from the probe_channel() callback of their
+ * mii_timestamping_ctrl interface.
+ */
+struct mii_timestamper {
+ bool (*rxtstamp)(struct mii_timestamper *mii_ts,
+ struct sk_buff *skb, int type);
+
+ void (*txtstamp)(struct mii_timestamper *mii_ts,
+ struct sk_buff *skb, int type);
+
+ int (*hwtstamp)(struct mii_timestamper *mii_ts,
+ struct ifreq *ifreq);
+
+ void (*link_state)(struct mii_timestamper *mii_ts,
+ struct phy_device *phydev);
+
+ int (*ts_info)(struct mii_timestamper *mii_ts,
+ struct ethtool_ts_info *ts_info);
+
+ struct device *device;
+};
+
+/**
+ * struct mii_timestamping_ctrl - MII time stamping controller interface.
+ *
+ * @probe_channel: Callback into the controller driver announcing the
+ * presence of the 'port' channel. The 'device' field
+ * had been passed to register_mii_tstamp_controller().
+ * The driver must return either a pointer to a valid
+ * MII timestamper instance or PTR_ERR.
+ *
+ * @release_channel: Releases an instance obtained via .probe_channel.
+ */
+struct mii_timestamping_ctrl {
+ struct mii_timestamper *(*probe_channel)(struct device *device,
+ unsigned int port);
+ void (*release_channel)(struct device *device,
+ struct mii_timestamper *mii_ts);
+};
+
+#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
+
+int register_mii_tstamp_controller(struct device *device,
+ struct mii_timestamping_ctrl *ctrl);
+
+void unregister_mii_tstamp_controller(struct device *device);
+
+struct mii_timestamper *register_mii_timestamper(struct device_node *node,
+ unsigned int port);
+
+void unregister_mii_timestamper(struct mii_timestamper *mii_ts);
+
+#else
+
+static inline
+int register_mii_tstamp_controller(struct device *device,
+ struct mii_timestamping_ctrl *ctrl)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline void unregister_mii_tstamp_controller(struct device *device)
+{
+}
+
+static inline
+struct mii_timestamper *register_mii_timestamper(struct device_node *node,
+ unsigned int port)
+{
+ return NULL;
+}
+
+static inline void unregister_mii_timestamper(struct mii_timestamper *mii_ts)
+{
+}
+
+#endif
+
+#endif
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 36e412c3d657..20372de0b587 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -47,7 +47,7 @@
#define DEFAULT_UAR_PAGE_SHIFT 12
#define MAX_MSIX_P_PORT 17
-#define MAX_MSIX 64
+#define MAX_MSIX 128
#define MIN_MSIX_P_PORT 5
#define MLX4_IS_LEGACY_EQ_MODE(dev_cap) ((dev_cap).num_comp_vectors < \
(dev_cap).num_ports * MIN_MSIX_P_PORT)
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 27200dea0297..59cff380f41a 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -928,8 +928,6 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev);
void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health);
void mlx5_drain_health_wq(struct mlx5_core_dev *dev);
void mlx5_trigger_health_work(struct mlx5_core_dev *dev);
-int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size,
- struct mlx5_frag_buf *buf, int node);
int mlx5_buf_alloc(struct mlx5_core_dev *dev,
int size, struct mlx5_frag_buf *buf);
void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c97ea3b694e6..80a9162b406c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2621,6 +2621,9 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
+extern int apply_to_existing_page_range(struct mm_struct *mm,
+ unsigned long address, unsigned long size,
+ pte_fn_t fn, void *data);
#ifdef CONFIG_PAGE_POISONING
extern bool page_poisoning_enabled(void);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 5714fd35a83c..e3596db077dc 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -587,9 +587,9 @@ struct platform_device_id {
#define MDIO_NAME_SIZE 32
#define MDIO_MODULE_PREFIX "mdio:"
-#define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d"
+#define MDIO_ID_FMT "%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u%u"
#define MDIO_ID_ARGS(_id) \
- (_id)>>31, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \
+ ((_id)>>31) & 1, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1, \
((_id)>>27) & 1, ((_id)>>26) & 1, ((_id)>>25) & 1, ((_id)>>24) & 1, \
((_id)>>23) & 1, ((_id)>>22) & 1, ((_id)>>21) & 1, ((_id)>>20) & 1, \
((_id)>>19) & 1, ((_id)>>18) & 1, ((_id)>>17) & 1, ((_id)>>16) & 1, \
diff --git a/include/linux/net.h b/include/linux/net.h
index 9cafb5f353a9..6451425e828f 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -171,6 +171,7 @@ struct proto_ops {
int (*compat_getsockopt)(struct socket *sock, int level,
int optname, char __user *optval, int __user *optlen);
#endif
+ void (*show_fdinfo)(struct seq_file *m, struct socket *sock);
int (*sendmsg) (struct socket *sock, struct msghdr *m,
size_t total_len);
/* Notes for implementing recvmsg:
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9ef20389622d..2741aa35bec6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -849,6 +849,7 @@ enum tc_setup_type {
TC_SETUP_QDISC_GRED,
TC_SETUP_QDISC_TAPRIO,
TC_SETUP_FT,
+ TC_SETUP_QDISC_ETS,
};
/* These structures hold the attributes of bpf state that are being passed
@@ -1014,7 +1015,7 @@ int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
* Called when a user wants to change the Maximum Transfer Unit
* of a device.
*
- * void (*ndo_tx_timeout)(struct net_device *dev);
+ * void (*ndo_tx_timeout)(struct net_device *dev, unsigned int txqueue);
* Callback used when the transmitter has not made any progress
* for dev->watchdog ticks.
*
@@ -1281,7 +1282,8 @@ struct net_device_ops {
int new_mtu);
int (*ndo_neigh_setup)(struct net_device *dev,
struct neigh_parms *);
- void (*ndo_tx_timeout) (struct net_device *dev);
+ void (*ndo_tx_timeout) (struct net_device *dev,
+ unsigned int txqueue);
void (*ndo_get_stats64)(struct net_device *dev,
struct rtnl_link_stats64 *storage);
@@ -1707,6 +1709,7 @@ enum netdev_priv_flags {
* @miniq_ingress: ingress/clsact qdisc specific data for
* ingress processing
* @ingress_queue: XXX: need comments on this one
+ * @nf_hooks_ingress: netfilter hooks executed for ingress packets
* @broadcast: hw bcast address
*
* @rx_cpu_rmap: CPU reverse-mapping for RX completion interrupts,
@@ -1775,7 +1778,7 @@ enum netdev_priv_flags {
* for hardware timestamping
* @sfp_bus: attached &struct sfp_bus structure.
* @qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock
- spinlock
+ * spinlock
* @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
* @qdisc_xmit_lock_key: lockdep class annotating
* netdev_queue->_xmit_lock spinlock
@@ -2823,16 +2826,16 @@ static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
}
static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
- __sum16 check, __wsum pseudo)
+ __wsum pseudo)
{
NAPI_GRO_CB(skb)->csum = ~pseudo;
NAPI_GRO_CB(skb)->csum_valid = 1;
}
-#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo) \
+#define skb_gro_checksum_try_convert(skb, proto, compute_pseudo) \
do { \
if (__skb_gro_checksum_convert_check(skb)) \
- __skb_gro_checksum_convert(skb, check, \
+ __skb_gro_checksum_convert(skb, \
compute_pseudo(skb, proto)); \
} while (0)
@@ -4391,6 +4394,15 @@ struct netdev_notifier_bonding_info {
void netdev_bonding_info_change(struct net_device *dev,
struct netdev_bonding_info *bonding_info);
+#if IS_ENABLED(CONFIG_ETHTOOL_NETLINK)
+void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data);
+#else
+static inline void ethtool_notify(struct net_device *dev, unsigned int cmd,
+ const void *data)
+{
+}
+#endif
+
static inline
struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
{
diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h
index 10f81629b9ce..6d0d70f3219c 100644
--- a/include/linux/nvme-fc-driver.h
+++ b/include/linux/nvme-fc-driver.h
@@ -270,6 +270,8 @@ struct nvme_fc_remote_port {
*
* Host/Initiator Transport Entrypoints/Parameters:
*
+ * @module: The LLDD module using the interface
+ *
* @localport_delete: The LLDD initiates deletion of a localport via
* nvme_fc_deregister_localport(). However, the teardown is
* asynchronous. This routine is called upon the completion of the
@@ -383,6 +385,8 @@ struct nvme_fc_remote_port {
* Value is Mandatory. Allowed to be zero.
*/
struct nvme_fc_port_template {
+ struct module *module;
+
/* initiator-based functions */
void (*localport_delete)(struct nvme_fc_local_port *);
void (*remoteport_delete)(struct nvme_fc_remote_port *);
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 99cefe6f5edb..491a2b7e77c1 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -12,6 +12,7 @@
#include <linux/of.h>
#if IS_ENABLED(CONFIG_OF_MDIO)
+extern bool of_mdiobus_child_is_phy(struct device_node *child);
extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np);
extern struct phy_device *of_phy_find_device(struct device_node *phy_np);
extern struct phy_device *of_phy_connect(struct net_device *dev,
@@ -54,6 +55,11 @@ static inline int of_mdio_parse_addr(struct device *dev,
}
#else /* CONFIG_OF_MDIO */
+static inline bool of_mdiobus_child_is_phy(struct device_node *child)
+{
+ return false;
+}
+
static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np)
{
/*
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5032d453ac66..5932bb8e9c35 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -17,6 +17,7 @@
#include <linux/linkmode.h>
#include <linux/mdio.h>
#include <linux/mii.h>
+#include <linux/mii_timestamper.h>
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
@@ -99,9 +100,11 @@ typedef enum {
PHY_INTERFACE_MODE_2500BASEX,
PHY_INTERFACE_MODE_RXAUI,
PHY_INTERFACE_MODE_XAUI,
- /* 10GBASE-KR, XFI, SFI - single lane 10G Serdes */
- PHY_INTERFACE_MODE_10GKR,
+ /* 10GBASE-R, XFI, SFI - single lane 10G Serdes */
+ PHY_INTERFACE_MODE_10GBASER,
PHY_INTERFACE_MODE_USXGMII,
+ /* 10GBASE-KR - with Clause 73 AN */
+ PHY_INTERFACE_MODE_10GKR,
PHY_INTERFACE_MODE_MAX,
} phy_interface_t;
@@ -175,10 +178,12 @@ static inline const char *phy_modes(phy_interface_t interface)
return "rxaui";
case PHY_INTERFACE_MODE_XAUI:
return "xaui";
- case PHY_INTERFACE_MODE_10GKR:
- return "10gbase-kr";
+ case PHY_INTERFACE_MODE_10GBASER:
+ return "10gbase-r";
case PHY_INTERFACE_MODE_USXGMII:
return "usxgmii";
+ case PHY_INTERFACE_MODE_10GKR:
+ return "10gbase-kr";
default:
return "unknown";
}
@@ -441,6 +446,7 @@ struct phy_device {
struct sfp_bus *sfp_bus;
struct phylink *phylink;
struct net_device *attached_dev;
+ struct mii_timestamper *mii_ts;
u8 mdix;
u8 mdix_ctrl;
@@ -546,29 +552,6 @@ struct phy_driver {
*/
int (*match_phy_device)(struct phy_device *phydev);
- /* Handles ethtool queries for hardware time stamping. */
- int (*ts_info)(struct phy_device *phydev, struct ethtool_ts_info *ti);
-
- /* Handles SIOCSHWTSTAMP ioctl for hardware time stamping. */
- int (*hwtstamp)(struct phy_device *phydev, struct ifreq *ifr);
-
- /*
- * Requests a Rx timestamp for 'skb'. If the skb is accepted,
- * the phy driver promises to deliver it using netif_rx() as
- * soon as a timestamp becomes available. One of the
- * PTP_CLASS_ values is passed in 'type'. The function must
- * return true if the skb is accepted for delivery.
- */
- bool (*rxtstamp)(struct phy_device *dev, struct sk_buff *skb, int type);
-
- /*
- * Requests a Tx timestamp for 'skb'. The phy driver promises
- * to deliver it using skb_complete_tx_timestamp() as soon as a
- * timestamp becomes available. One of the PTP_CLASS_ values
- * is passed in 'type'.
- */
- void (*txtstamp)(struct phy_device *dev, struct sk_buff *skb, int type);
-
/* Some devices (e.g. qnap TS-119P II) require PHY register changes to
* enable Wake on LAN, so set_wol is provided to be called in the
* ethernet driver's set_wol function. */
@@ -937,6 +920,66 @@ static inline bool phy_polling_mode(struct phy_device *phydev)
}
/**
+ * phy_has_hwtstamp - Tests whether a PHY time stamp configuration.
+ * @phydev: the phy_device struct
+ */
+static inline bool phy_has_hwtstamp(struct phy_device *phydev)
+{
+ return phydev && phydev->mii_ts && phydev->mii_ts->hwtstamp;
+}
+
+/**
+ * phy_has_rxtstamp - Tests whether a PHY supports receive time stamping.
+ * @phydev: the phy_device struct
+ */
+static inline bool phy_has_rxtstamp(struct phy_device *phydev)
+{
+ return phydev && phydev->mii_ts && phydev->mii_ts->rxtstamp;
+}
+
+/**
+ * phy_has_tsinfo - Tests whether a PHY reports time stamping and/or
+ * PTP hardware clock capabilities.
+ * @phydev: the phy_device struct
+ */
+static inline bool phy_has_tsinfo(struct phy_device *phydev)
+{
+ return phydev && phydev->mii_ts && phydev->mii_ts->ts_info;
+}
+
+/**
+ * phy_has_txtstamp - Tests whether a PHY supports transmit time stamping.
+ * @phydev: the phy_device struct
+ */
+static inline bool phy_has_txtstamp(struct phy_device *phydev)
+{
+ return phydev && phydev->mii_ts && phydev->mii_ts->txtstamp;
+}
+
+static inline int phy_hwtstamp(struct phy_device *phydev, struct ifreq *ifr)
+{
+ return phydev->mii_ts->hwtstamp(phydev->mii_ts, ifr);
+}
+
+static inline bool phy_rxtstamp(struct phy_device *phydev, struct sk_buff *skb,
+ int type)
+{
+ return phydev->mii_ts->rxtstamp(phydev->mii_ts, skb, type);
+}
+
+static inline int phy_ts_info(struct phy_device *phydev,
+ struct ethtool_ts_info *tsinfo)
+{
+ return phydev->mii_ts->ts_info(phydev->mii_ts, tsinfo);
+}
+
+static inline void phy_txtstamp(struct phy_device *phydev, struct sk_buff *skb,
+ int type)
+{
+ phydev->mii_ts->txtstamp(phydev->mii_ts, skb, type);
+}
+
+/**
* phy_is_internal - Convenience function for testing if a PHY is internal
* @phydev: the phy_device struct
*/
@@ -1000,7 +1043,7 @@ int phy_modify_paged_changed(struct phy_device *phydev, int page, u32 regnum,
int phy_modify_paged(struct phy_device *phydev, int page, u32 regnum,
u16 mask, u16 set);
-struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
+struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id,
bool is_c45,
struct phy_c45_device_ids *c45_ids);
#if IS_ENABLED(CONFIG_PHYLIB)
@@ -1094,11 +1137,13 @@ void phy_attached_info(struct phy_device *phydev);
int genphy_read_abilities(struct phy_device *phydev);
int genphy_setup_forced(struct phy_device *phydev);
int genphy_restart_aneg(struct phy_device *phydev);
+int genphy_check_and_restart_aneg(struct phy_device *phydev, bool restart);
int genphy_config_eee_advert(struct phy_device *phydev);
int __genphy_config_aneg(struct phy_device *phydev, bool changed);
int genphy_aneg_done(struct phy_device *phydev);
int genphy_update_link(struct phy_device *phydev);
int genphy_read_lpa(struct phy_device *phydev);
+int genphy_read_status_fixed(struct phy_device *phydev);
int genphy_read_status(struct phy_device *phydev);
int genphy_suspend(struct phy_device *phydev);
int genphy_resume(struct phy_device *phydev);
diff --git a/include/linux/phy_led_triggers.h b/include/linux/phy_led_triggers.h
index 3d507a8a6989..5c4d7a755101 100644
--- a/include/linux/phy_led_triggers.h
+++ b/include/linux/phy_led_triggers.h
@@ -14,7 +14,7 @@ struct phy_device;
#define PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE 11
#define PHY_LINK_LED_TRIGGER_NAME_SIZE (MII_BUS_ID_SIZE + \
- FIELD_SIZEOF(struct mdio_device, addr)+\
+ sizeof_field(struct mdio_device, addr)+\
PHY_LED_TRIGGER_SPEED_SUFFIX_SIZE)
struct phy_led_trigger {
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index fed5488e3c75..523209e70947 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -63,10 +63,12 @@ enum phylink_op_type {
* struct phylink_config - PHYLINK configuration structure
* @dev: a pointer to a struct device associated with the MAC
* @type: operation type of PHYLINK instance
+ * @pcs_poll: MAC PCS cannot provide link change interrupt
*/
struct phylink_config {
struct device *dev;
enum phylink_op_type type;
+ bool pcs_poll;
};
/**
diff --git a/include/linux/platform_data/eth_ixp4xx.h b/include/linux/platform_data/eth_ixp4xx.h
new file mode 100644
index 000000000000..6f652ea0c6ae
--- /dev/null
+++ b/include/linux/platform_data/eth_ixp4xx.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PLATFORM_DATA_ETH_IXP4XX
+#define __PLATFORM_DATA_ETH_IXP4XX
+
+#include <linux/types.h>
+
+#define IXP4XX_ETH_NPEA 0x00
+#define IXP4XX_ETH_NPEB 0x10
+#define IXP4XX_ETH_NPEC 0x20
+
+/* Information about built-in Ethernet MAC interfaces */
+struct eth_plat_info {
+ u8 phy; /* MII PHY ID, 0 - 31 */
+ u8 rxq; /* configurable, currently 0 - 31 only */
+ u8 txreadyq;
+ u8 hwaddr[6];
+};
+
+#endif
diff --git a/include/linux/platform_data/ti-sysc.h b/include/linux/platform_data/ti-sysc.h
index 0b9380475144..8cfe570fdece 100644
--- a/include/linux/platform_data/ti-sysc.h
+++ b/include/linux/platform_data/ti-sysc.h
@@ -49,6 +49,7 @@ struct sysc_regbits {
s8 emufree_shift;
};
+#define SYSC_QUIRK_FORCE_MSTANDBY BIT(20)
#define SYSC_MODULE_QUIRK_AESS BIT(19)
#define SYSC_MODULE_QUIRK_SGX BIT(18)
#define SYSC_MODULE_QUIRK_HDQ1W BIT(17)
diff --git a/include/linux/platform_data/wan_ixp4xx_hss.h b/include/linux/platform_data/wan_ixp4xx_hss.h
new file mode 100644
index 000000000000..d525a0feb9e1
--- /dev/null
+++ b/include/linux/platform_data/wan_ixp4xx_hss.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PLATFORM_DATA_WAN_IXP4XX_HSS_H
+#define __PLATFORM_DATA_WAN_IXP4XX_HSS_H
+
+#include <linux/types.h>
+
+/* Information about built-in HSS (synchronous serial) interfaces */
+struct hss_plat_info {
+ int (*set_clock)(int port, unsigned int clock_type);
+ int (*open)(int port, void *pdev,
+ void (*set_carrier_cb)(void *pdev, int carrier));
+ void (*close)(int port, void *pdev);
+ u8 txreadyq;
+ u32 timer_freq;
+};
+
+#endif
diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h
index fe6cfdcfbc26..468328b1e1dd 100644
--- a/include/linux/posix-clock.h
+++ b/include/linux/posix-clock.h
@@ -69,29 +69,32 @@ struct posix_clock_operations {
*
* @ops: Functional interface to the clock
* @cdev: Character device instance for this clock
- * @kref: Reference count.
+ * @dev: Pointer to the clock's device.
* @rwsem: Protects the 'zombie' field from concurrent access.
* @zombie: If 'zombie' is true, then the hardware has disappeared.
- * @release: A function to free the structure when the reference count reaches
- * zero. May be NULL if structure is statically allocated.
*
* Drivers should embed their struct posix_clock within a private
* structure, obtaining a reference to it during callbacks using
* container_of().
+ *
+ * Drivers should supply an initialized but not exposed struct device
+ * to posix_clock_register(). It is used to manage lifetime of the
+ * driver's private structure. It's 'release' field should be set to
+ * a release function for this private structure.
*/
struct posix_clock {
struct posix_clock_operations ops;
struct cdev cdev;
- struct kref kref;
+ struct device *dev;
struct rw_semaphore rwsem;
bool zombie;
- void (*release)(struct posix_clock *clk);
};
/**
* posix_clock_register() - register a new clock
- * @clk: Pointer to the clock. Caller must provide 'ops' and 'release'
- * @devid: Allocated device id
+ * @clk: Pointer to the clock. Caller must provide 'ops' field
+ * @dev: Pointer to the initialized device. Caller must provide
+ * 'release' field
*
* A clock driver calls this function to register itself with the
* clock device subsystem. If 'clk' points to dynamically allocated
@@ -100,7 +103,7 @@ struct posix_clock {
*
* Returns zero on success, non-zero otherwise.
*/
-int posix_clock_register(struct posix_clock *clk, dev_t devid);
+int posix_clock_register(struct posix_clock *clk, struct device *dev);
/**
* posix_clock_unregister() - unregister a clock
diff --git a/include/linux/printk.h b/include/linux/printk.h
index c09d67edda3a..1e6108b8d15f 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -302,9 +302,8 @@ extern int kptr_restrict;
printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__)
#define pr_err(fmt, ...) \
printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warning(fmt, ...) \
+#define pr_warn(fmt, ...) \
printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__)
-#define pr_warn pr_warning
#define pr_notice(fmt, ...) \
printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__)
#define pr_info(fmt, ...) \
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 93cc4f1d444a..c64a1ef87240 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -243,6 +243,13 @@ int ptp_find_pin(struct ptp_clock *ptp,
int ptp_schedule_worker(struct ptp_clock *ptp, unsigned long delay);
+/**
+ * ptp_cancel_worker_sync() - cancel ptp auxiliary clock
+ *
+ * @ptp: The clock obtained from ptp_clock_register().
+ */
+void ptp_cancel_worker_sync(struct ptp_clock *ptp);
+
#else
static inline struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info,
struct device *parent)
@@ -260,6 +267,8 @@ static inline int ptp_find_pin(struct ptp_clock *ptp,
static inline int ptp_schedule_worker(struct ptp_clock *ptp,
unsigned long delay)
{ return -EOPNOTSUPP; }
+static inline void ptp_cancel_worker_sync(struct ptp_clock *ptp)
+{ }
#endif
diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h
index bc8206a8f30e..61974c4c566b 100644
--- a/include/linux/rculist_nulls.h
+++ b/include/linux/rculist_nulls.h
@@ -101,6 +101,43 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n,
}
/**
+ * hlist_nulls_add_tail_rcu
+ * @n: the element to add to the hash list.
+ * @h: the list to add to.
+ *
+ * Description:
+ * Adds the specified element to the specified hlist_nulls,
+ * while permitting racing traversals.
+ *
+ * The caller must take whatever precautions are necessary
+ * (such as holding appropriate locks) to avoid racing
+ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
+ * or hlist_nulls_del_rcu(), running on this same list.
+ * However, it is perfectly legal to run concurrently with
+ * the _rcu list-traversal primitives, such as
+ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
+ * problems on Alpha CPUs. Regardless of the type of CPU, the
+ * list-traversal primitive must be guarded by rcu_read_lock().
+ */
+static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
+ struct hlist_nulls_head *h)
+{
+ struct hlist_nulls_node *i, *last = NULL;
+
+ /* Note: write side code, so rcu accessors are not needed. */
+ for (i = h->first; !is_a_nulls(i); i = i->next)
+ last = i;
+
+ if (last) {
+ n->next = last->next;
+ n->pprev = &last->next;
+ rcu_assign_pointer(hlist_next_rcu(last), n);
+ } else {
+ hlist_nulls_add_head_rcu(n, h);
+ }
+}
+
+/**
* hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
* @tpos: the type * to use as a loop cursor.
* @pos: the &struct hlist_nulls_node to use as a loop cursor.
diff --git a/include/linux/sched/cpufreq.h b/include/linux/sched/cpufreq.h
index afa940cd50dc..cc6bcc1e96bc 100644
--- a/include/linux/sched/cpufreq.h
+++ b/include/linux/sched/cpufreq.h
@@ -12,6 +12,8 @@
#define SCHED_CPUFREQ_MIGRATION (1U << 1)
#ifdef CONFIG_CPU_FREQ
+struct cpufreq_policy;
+
struct update_util_data {
void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
};
@@ -20,6 +22,7 @@ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
void (*func)(struct update_util_data *data, u64 time,
unsigned int flags));
void cpufreq_remove_update_util_hook(int cpu);
+bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy);
static inline unsigned long map_util_freq(unsigned long util,
unsigned long freq, unsigned long cap)
diff --git a/include/linux/sfp.h b/include/linux/sfp.h
index 487fd9412d10..38893e4dd0f0 100644
--- a/include/linux/sfp.h
+++ b/include/linux/sfp.h
@@ -275,6 +275,61 @@ struct sfp_diag {
__be16 cal_v_offset;
} __packed;
+/* SFF8024 defined constants */
+enum {
+ SFF8024_ID_UNK = 0x00,
+ SFF8024_ID_SFF_8472 = 0x02,
+ SFF8024_ID_SFP = 0x03,
+ SFF8024_ID_DWDM_SFP = 0x0b,
+ SFF8024_ID_QSFP_8438 = 0x0c,
+ SFF8024_ID_QSFP_8436_8636 = 0x0d,
+ SFF8024_ID_QSFP28_8636 = 0x11,
+
+ SFF8024_ENCODING_UNSPEC = 0x00,
+ SFF8024_ENCODING_8B10B = 0x01,
+ SFF8024_ENCODING_4B5B = 0x02,
+ SFF8024_ENCODING_NRZ = 0x03,
+ SFF8024_ENCODING_8472_MANCHESTER= 0x04,
+ SFF8024_ENCODING_8472_SONET = 0x05,
+ SFF8024_ENCODING_8472_64B66B = 0x06,
+ SFF8024_ENCODING_8436_MANCHESTER= 0x06,
+ SFF8024_ENCODING_8436_SONET = 0x04,
+ SFF8024_ENCODING_8436_64B66B = 0x05,
+ SFF8024_ENCODING_256B257B = 0x07,
+ SFF8024_ENCODING_PAM4 = 0x08,
+
+ SFF8024_CONNECTOR_UNSPEC = 0x00,
+ /* codes 01-05 not supportable on SFP, but some modules have single SC */
+ SFF8024_CONNECTOR_SC = 0x01,
+ SFF8024_CONNECTOR_FIBERJACK = 0x06,
+ SFF8024_CONNECTOR_LC = 0x07,
+ SFF8024_CONNECTOR_MT_RJ = 0x08,
+ SFF8024_CONNECTOR_MU = 0x09,
+ SFF8024_CONNECTOR_SG = 0x0a,
+ SFF8024_CONNECTOR_OPTICAL_PIGTAIL= 0x0b,
+ SFF8024_CONNECTOR_MPO_1X12 = 0x0c,
+ SFF8024_CONNECTOR_MPO_2X16 = 0x0d,
+ SFF8024_CONNECTOR_HSSDC_II = 0x20,
+ SFF8024_CONNECTOR_COPPER_PIGTAIL= 0x21,
+ SFF8024_CONNECTOR_RJ45 = 0x22,
+ SFF8024_CONNECTOR_NOSEPARATE = 0x23,
+ SFF8024_CONNECTOR_MXC_2X16 = 0x24,
+
+ SFF8024_ECC_UNSPEC = 0x00,
+ SFF8024_ECC_100G_25GAUI_C2M_AOC = 0x01,
+ SFF8024_ECC_100GBASE_SR4_25GBASE_SR = 0x02,
+ SFF8024_ECC_100GBASE_LR4_25GBASE_LR = 0x03,
+ SFF8024_ECC_100GBASE_ER4_25GBASE_ER = 0x04,
+ SFF8024_ECC_100GBASE_SR10 = 0x05,
+ SFF8024_ECC_100GBASE_CR4 = 0x0b,
+ SFF8024_ECC_25GBASE_CR_S = 0x0c,
+ SFF8024_ECC_25GBASE_CR_N = 0x0d,
+ SFF8024_ECC_10GBASE_T_SFI = 0x16,
+ SFF8024_ECC_10GBASE_T_SR = 0x1c,
+ SFF8024_ECC_5GBASE_T = 0x1d,
+ SFF8024_ECC_2_5GBASE_T = 0x1e,
+};
+
/* SFP EEPROM registers */
enum {
SFP_PHYS_ID = 0x00,
@@ -309,34 +364,7 @@ enum {
SFP_SFF8472_COMPLIANCE = 0x5e,
SFP_CC_EXT = 0x5f,
- SFP_PHYS_ID_SFF = 0x02,
- SFP_PHYS_ID_SFP = 0x03,
SFP_PHYS_EXT_ID_SFP = 0x04,
- SFP_CONNECTOR_UNSPEC = 0x00,
- /* codes 01-05 not supportable on SFP, but some modules have single SC */
- SFP_CONNECTOR_SC = 0x01,
- SFP_CONNECTOR_FIBERJACK = 0x06,
- SFP_CONNECTOR_LC = 0x07,
- SFP_CONNECTOR_MT_RJ = 0x08,
- SFP_CONNECTOR_MU = 0x09,
- SFP_CONNECTOR_SG = 0x0a,
- SFP_CONNECTOR_OPTICAL_PIGTAIL = 0x0b,
- SFP_CONNECTOR_MPO_1X12 = 0x0c,
- SFP_CONNECTOR_MPO_2X16 = 0x0d,
- SFP_CONNECTOR_HSSDC_II = 0x20,
- SFP_CONNECTOR_COPPER_PIGTAIL = 0x21,
- SFP_CONNECTOR_RJ45 = 0x22,
- SFP_CONNECTOR_NOSEPARATE = 0x23,
- SFP_CONNECTOR_MXC_2X16 = 0x24,
- SFP_ENCODING_UNSPEC = 0x00,
- SFP_ENCODING_8B10B = 0x01,
- SFP_ENCODING_4B5B = 0x02,
- SFP_ENCODING_NRZ = 0x03,
- SFP_ENCODING_8472_MANCHESTER = 0x04,
- SFP_ENCODING_8472_SONET = 0x05,
- SFP_ENCODING_8472_64B66B = 0x06,
- SFP_ENCODING_256B257B = 0x07,
- SFP_ENCODING_PAM4 = 0x08,
SFP_OPTIONS_HIGH_POWER_LEVEL = BIT(13),
SFP_OPTIONS_PAGING_A2 = BIT(12),
SFP_OPTIONS_RETIMER = BIT(11),
@@ -479,6 +507,8 @@ struct sfp_bus;
* @module_insert: called after a module has been detected to determine
* whether the module is supported for the upstream device.
* @module_remove: called after the module has been removed.
+ * @module_start: called after the PHY probe step
+ * @module_stop: called before the PHY is removed
* @link_down: called when the link is non-operational for whatever
* reason.
* @link_up: called when the link is operational.
@@ -492,6 +522,8 @@ struct sfp_upstream_ops {
void (*detach)(void *priv, struct sfp_bus *bus);
int (*module_insert)(void *priv, const struct sfp_eeprom_id *id);
void (*module_remove)(void *priv);
+ int (*module_start)(void *priv);
+ void (*module_stop)(void *priv);
void (*link_down)(void *priv);
void (*link_up)(void *priv);
int (*connect_phy)(void *priv, struct phy_device *);
@@ -501,10 +533,10 @@ struct sfp_upstream_ops {
#if IS_ENABLED(CONFIG_SFP)
int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
+bool sfp_may_have_phy(struct sfp_bus *bus, const struct sfp_eeprom_id *id);
void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
unsigned long *support);
phy_interface_t sfp_select_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id,
unsigned long *link_modes);
int sfp_get_module_info(struct sfp_bus *bus, struct ethtool_modinfo *modinfo);
@@ -525,6 +557,12 @@ static inline int sfp_parse_port(struct sfp_bus *bus,
return PORT_OTHER;
}
+static inline bool sfp_may_have_phy(struct sfp_bus *bus,
+ const struct sfp_eeprom_id *id)
+{
+ return false;
+}
+
static inline void sfp_parse_support(struct sfp_bus *bus,
const struct sfp_eeprom_id *id,
unsigned long *support)
@@ -532,7 +570,6 @@ static inline void sfp_parse_support(struct sfp_bus *bus,
}
static inline phy_interface_t sfp_select_interface(struct sfp_bus *bus,
- const struct sfp_eeprom_id *id,
unsigned long *link_modes)
{
return PHY_INTERFACE_MODE_NA;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e9133bcf0544..016b3c4ab99a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1478,6 +1478,11 @@ static inline void skb_mark_not_on_list(struct sk_buff *skb)
skb->next = NULL;
}
+/* Iterate through singly-linked GSO fragments of an skb. */
+#define skb_list_walk_safe(first, skb, next) \
+ for ((skb) = (first), (next) = (skb) ? (skb)->next : NULL; (skb); \
+ (skb) = (next), (next) = (skb) ? (skb)->next : NULL)
+
static inline void skb_list_del_init(struct sk_buff *skb)
{
__list_del_entry(&skb->list);
@@ -4092,6 +4097,9 @@ enum skb_ext_id {
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
TC_SKB_EXT,
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ SKB_EXT_MPTCP,
+#endif
SKB_EXT_NUM, /* must be last */
};
@@ -4112,6 +4120,9 @@ struct skb_ext {
char data[0] __aligned(8);
};
+struct skb_ext *__skb_ext_alloc(void);
+void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
+ struct skb_ext *ext);
void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
void __skb_ext_put(struct skb_ext *ext);
diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 98fe8663033a..3a67a7e45633 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -689,10 +689,10 @@ extern void spi_finalize_current_transfer(struct spi_controller *ctlr);
/* Helper calls for driver to timestamp transfer */
void spi_take_timestamp_pre(struct spi_controller *ctlr,
struct spi_transfer *xfer,
- const void *tx, bool irqs_off);
+ size_t progress, bool irqs_off);
void spi_take_timestamp_post(struct spi_controller *ctlr,
struct spi_transfer *xfer,
- const void *tx, bool irqs_off);
+ size_t progress, bool irqs_off);
/* the spi driver core manages memory for the spi_controller classdev */
extern struct spi_controller *__spi_alloc_controller(struct device *host,
diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index d4bcd9387136..0531afa9b21e 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -109,6 +109,18 @@ struct stmmac_axi {
bool axi_rb;
};
+#define EST_GCL 1024
+struct stmmac_est {
+ int enable;
+ u32 btr_offset[2];
+ u32 btr[2];
+ u32 ctr[2];
+ u32 ter;
+ u32 gcl_unaligned[EST_GCL];
+ u32 gcl[EST_GCL];
+ u32 gcl_size;
+};
+
struct stmmac_rxq_cfg {
u8 mode_to_use;
u32 chan;
@@ -139,6 +151,7 @@ struct plat_stmmacenet_data {
struct device_node *phylink_node;
struct device_node *mdio_node;
struct stmmac_dma_cfg *dma_cfg;
+ struct stmmac_est *est;
int clk_csr;
int has_gmac;
int enh_desc;
diff --git a/include/linux/sxgbe_platform.h b/include/linux/sxgbe_platform.h
index 85ec745767bd..966146f7267a 100644
--- a/include/linux/sxgbe_platform.h
+++ b/include/linux/sxgbe_platform.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * 10G controller driver for Samsung EXYNOS SoCs
+ * 10G controller driver for Samsung Exynos SoCs
*
* Copyright (C) 2013 Samsung Electronics Co., Ltd.
* http://www.samsung.com
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index d0391cc2dae9..5262b7a76d39 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -1231,8 +1231,6 @@ asmlinkage long sys_ni_syscall(void);
* the ksys_xyzyyz() functions prototyped below.
*/
-int ksys_mount(const char __user *dev_name, const char __user *dir_name,
- const char __user *type, unsigned long flags, void __user *data);
int ksys_umount(char __user *name, int flags);
int ksys_dup(unsigned int fildes);
int ksys_chroot(const char __user *filename);
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 0d6e949ba315..03e9b184411b 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -403,6 +403,7 @@ extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx,
extern int tpm_send(struct tpm_chip *chip, void *cmd, size_t buflen);
extern int tpm_get_random(struct tpm_chip *chip, u8 *data, size_t max);
extern struct tpm_chip *tpm_default_chip(void);
+void tpm2_flush_context(struct tpm_chip *chip, u32 handle);
#else
static inline int tpm_is_tpm2(struct tpm_chip *chip)
{
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index d8860f2d0976..b0bff3083278 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -253,7 +253,7 @@ extern int usbnet_open(struct net_device *net);
extern int usbnet_stop(struct net_device *net);
extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb,
struct net_device *net);
-extern void usbnet_tx_timeout(struct net_device *net);
+extern void usbnet_tx_timeout(struct net_device *net, unsigned int txqueue);
extern int usbnet_change_mtu(struct net_device *net, int new_mtu);
extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *);
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 1bab88184d3c..a088349dd94f 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -437,7 +437,7 @@ static inline void addrconf_addr_solict_mult(const struct in6_addr *addr,
static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
- __be64 *p = (__be64 *)addr;
+ __be64 *p = (__force __be64 *)addr;
return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(1))) == 0UL;
#else
return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
@@ -449,7 +449,7 @@ static inline bool ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr)
static inline bool ipv6_addr_is_ll_all_routers(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
- __be64 *p = (__be64 *)addr;
+ __be64 *p = (__force __be64 *)addr;
return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) | (p[1] ^ cpu_to_be64(2))) == 0UL;
#else
return ((addr->s6_addr32[0] ^ htonl(0xff020000)) |
@@ -466,7 +466,7 @@ static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr)
static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
- __be64 *p = (__be64 *)addr;
+ __be64 *p = (__force __be64 *)addr;
return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) |
((p[1] ^ cpu_to_be64(0x00000001ff000000UL)) &
cpu_to_be64(0xffffffffff000000UL))) == 0UL;
@@ -481,7 +481,7 @@ static inline bool ipv6_addr_is_solict_mult(const struct in6_addr *addr)
static inline bool ipv6_addr_is_all_snoopers(const struct in6_addr *addr)
{
#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64
- __be64 *p = (__be64 *)addr;
+ __be64 *p = (__force __be64 *)addr;
return ((p[0] ^ cpu_to_be64(0xff02000000000000UL)) |
(p[1] ^ cpu_to_be64(0x6a))) == 0UL;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 3426d6dacc45..17e10fba2152 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -41,6 +41,10 @@ struct unix_skb_parms {
u32 consumed;
} __randomize_layout;
+struct scm_stat {
+ u32 nr_fds;
+};
+
#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb))
#define unix_state_lock(s) spin_lock(&unix_sk(s)->lock)
@@ -65,6 +69,7 @@ struct unix_sock {
#define UNIX_GC_MAYBE_CYCLE 1
struct socket_wq peer_wq;
wait_queue_entry_t peer_wake;
+ struct scm_stat scm_stat;
};
static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 4206dc6d813f..b1c717286993 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -98,6 +98,8 @@ struct vsock_transport_send_notify_data {
#define VSOCK_TRANSPORT_F_G2H 0x00000002
/* Transport provides DGRAM communication */
#define VSOCK_TRANSPORT_F_DGRAM 0x00000004
+/* Transport provides local (loopback) communication */
+#define VSOCK_TRANSPORT_F_LOCAL 0x00000008
struct vsock_transport {
struct module *module;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 47f87b2fcf63..a6856f1d5d1f 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -485,6 +485,8 @@ enum devlink_param_generic_id {
#define DEVLINK_INFO_VERSION_GENERIC_FW_UNDI "fw.undi"
/* NCSI support/handler version */
#define DEVLINK_INFO_VERSION_GENERIC_FW_NCSI "fw.ncsi"
+/* FW parameter set id */
+#define DEVLINK_INFO_VERSION_GENERIC_FW_PSID "fw.psid"
struct devlink_region;
struct devlink_info_req;
@@ -562,7 +564,7 @@ struct devlink_trap {
};
/* All traps must be documented in
- * Documentation/networking/devlink-trap.rst
+ * Documentation/networking/devlink/devlink-trap.rst
*/
enum devlink_trap_generic_id {
DEVLINK_TRAP_GENERIC_ID_SMAC_MC,
@@ -596,7 +598,7 @@ enum devlink_trap_generic_id {
};
/* All trap groups must be documented in
- * Documentation/networking/devlink-trap.rst
+ * Documentation/networking/devlink/devlink-trap.rst
*/
enum devlink_trap_group_generic_id {
DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS,
@@ -1000,6 +1002,8 @@ int devlink_health_report(struct devlink_health_reporter *reporter,
void
devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
enum devlink_health_reporter_state state);
+void
+devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter);
bool devlink_is_reload_failed(const struct devlink *devlink);
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6767dc3f66c0..63495e3443ac 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -43,6 +43,7 @@ struct phylink_link_state;
#define DSA_TAG_PROTO_SJA1105_VALUE 13
#define DSA_TAG_PROTO_KSZ8795_VALUE 14
#define DSA_TAG_PROTO_OCELOT_VALUE 15
+#define DSA_TAG_PROTO_AR9331_VALUE 16
enum dsa_tag_protocol {
DSA_TAG_PROTO_NONE = DSA_TAG_PROTO_NONE_VALUE,
@@ -61,6 +62,7 @@ enum dsa_tag_protocol {
DSA_TAG_PROTO_SJA1105 = DSA_TAG_PROTO_SJA1105_VALUE,
DSA_TAG_PROTO_KSZ8795 = DSA_TAG_PROTO_KSZ8795_VALUE,
DSA_TAG_PROTO_OCELOT = DSA_TAG_PROTO_OCELOT_VALUE,
+ DSA_TAG_PROTO_AR9331 = DSA_TAG_PROTO_AR9331_VALUE,
};
struct packet_type;
@@ -88,7 +90,6 @@ struct dsa_device_ops {
struct dsa_skb_cb {
struct sk_buff *clone;
- bool deferred_xmit;
};
struct __dsa_skb_cb {
@@ -190,9 +191,6 @@ struct dsa_port {
struct phylink *pl;
struct phylink_config pl_config;
- struct work_struct xmit_work;
- struct sk_buff_head xmit_queue;
-
struct list_head list;
/*
@@ -281,6 +279,11 @@ struct dsa_switch {
*/
bool vlan_filtering;
+ /* MAC PCS does not provide link state change interrupt, and requires
+ * polling. Flag passed on to PHYLINK.
+ */
+ bool pcs_poll;
+
size_t num_ports;
};
@@ -377,7 +380,8 @@ typedef int dsa_fdb_dump_cb_t(const unsigned char *addr, u16 vid,
bool is_static, void *data);
struct dsa_switch_ops {
enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds,
- int port);
+ int port,
+ enum dsa_tag_protocol mprot);
int (*setup)(struct dsa_switch *ds);
void (*teardown)(struct dsa_switch *ds);
@@ -562,11 +566,6 @@ struct dsa_switch_ops {
bool (*port_rxtstamp)(struct dsa_switch *ds, int port,
struct sk_buff *skb, unsigned int type);
- /*
- * Deferred frame Tx
- */
- netdev_tx_t (*port_deferred_xmit)(struct dsa_switch *ds, int port,
- struct sk_buff *skb);
/* Devlink parameters */
int (*devlink_param_get)(struct dsa_switch *ds, u32 id,
struct devlink_param_gset_ctx *ctx);
diff --git a/include/net/dsfield.h b/include/net/dsfield.h
index 1a245ee10c95..a59a57ffc546 100644
--- a/include/net/dsfield.h
+++ b/include/net/dsfield.h
@@ -21,7 +21,7 @@ static inline __u8 ipv4_get_dsfield(const struct iphdr *iph)
static inline __u8 ipv6_get_dsfield(const struct ipv6hdr *ipv6h)
{
- return ntohs(*(const __be16 *)ipv6h) >> 4;
+ return ntohs(*(__force const __be16 *)ipv6h) >> 4;
}
diff --git a/include/net/dst.h b/include/net/dst.h
index fe62fe2eb781..3448cf865ede 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -82,7 +82,7 @@ struct dst_entry {
struct dst_metrics {
u32 metrics[RTAX_MAX];
refcount_t refcnt;
-};
+} __aligned(4); /* Low pointer bits contain DST_METRICS_FLAGS */
extern const struct dst_metrics dst_default_metrics;
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old);
@@ -516,7 +516,16 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu)
struct dst_entry *dst = skb_dst(skb);
if (dst && dst->ops->update_pmtu)
- dst->ops->update_pmtu(dst, NULL, skb, mtu);
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, true);
+}
+
+/* update dst pmtu but not do neighbor confirm */
+static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
+{
+ struct dst_entry *dst = skb_dst(skb);
+
+ if (dst && dst->ops->update_pmtu)
+ dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
}
static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
@@ -526,7 +535,7 @@ static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
u32 encap_mtu = dst_mtu(encap_dst);
if (skb->len > encap_mtu - headroom)
- skb_dst_update_pmtu(skb, encap_mtu - headroom);
+ skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
}
#endif /* _NET_DST_H */
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 5ec645f27ee3..443863c7b8da 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -27,7 +27,8 @@ struct dst_ops {
struct dst_entry * (*negative_advice)(struct dst_entry *);
void (*link_failure)(struct sk_buff *);
void (*update_pmtu)(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu);
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh);
void (*redirect)(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/garp.h b/include/net/garp.h
index c41833bd4590..4d9a0c6a2e5f 100644
--- a/include/net/garp.h
+++ b/include/net/garp.h
@@ -37,7 +37,7 @@ struct garp_skb_cb {
static inline struct garp_skb_cb *garp_cb(struct sk_buff *skb)
{
BUILD_BUG_ON(sizeof(struct garp_skb_cb) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
return (struct garp_skb_cb *)skb->cb;
}
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index af2b4c065a04..d0019d3395cf 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -103,13 +103,19 @@ struct inet_bind_hashbucket {
struct hlist_head chain;
};
-/*
- * Sockets can be hashed in established or listening table
+/* Sockets can be hashed in established or listening table.
+ * We must use different 'nulls' end-of-chain value for all hash buckets :
+ * A socket might transition from ESTABLISH to LISTEN state without
+ * RCU grace period. A lookup in ehash table needs to handle this case.
*/
+#define LISTENING_NULLS_BASE (1U << 29)
struct inet_listen_hashbucket {
spinlock_t lock;
unsigned int count;
- struct hlist_head head;
+ union {
+ struct hlist_head head;
+ struct hlist_nulls_head nulls_head;
+ };
};
/* This is for listening sockets, thus all sockets which possess wildcards. */
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index f1535f172935..b579faea41e9 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -487,6 +487,7 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
struct fib6_info *rt,
unsigned int nsiblings,
struct netlink_ext_ack *extack);
+int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt);
void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct nl_info *info);
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index af645604f328..236503a50759 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -33,8 +33,8 @@
/* Used to memset ipv4 address padding. */
#define IP_TUNNEL_KEY_IPV4_PAD offsetofend(struct ip_tunnel_key, u.ipv4.dst)
#define IP_TUNNEL_KEY_IPV4_PAD_LEN \
- (FIELD_SIZEOF(struct ip_tunnel_key, u) - \
- FIELD_SIZEOF(struct ip_tunnel_key, u.ipv4))
+ (sizeof_field(struct ip_tunnel_key, u) - \
+ sizeof_field(struct ip_tunnel_key, u.ipv4))
struct ip_tunnel_key {
__be64 tun_id;
@@ -63,7 +63,7 @@ struct ip_tunnel_key {
/* Maximum tunnel options length. */
#define IP_TUNNEL_OPTS_MAX \
- GENMASK((FIELD_SIZEOF(struct ip_tunnel_info, \
+ GENMASK((sizeof_field(struct ip_tunnel_info, \
options_len) * BITS_PER_BYTE) - 1, 0)
struct ip_tunnel_info {
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
new file mode 100644
index 000000000000..0573ae75c3db
--- /dev/null
+++ b/include/net/mptcp.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Multipath TCP
+ *
+ * Copyright (c) 2017 - 2019, Intel Corporation.
+ */
+
+#ifndef __NET_MPTCP_H
+#define __NET_MPTCP_H
+
+#include <linux/skbuff.h>
+#include <linux/types.h>
+
+/* MPTCP sk_buff extension data */
+struct mptcp_ext {
+ u64 data_ack;
+ u64 data_seq;
+ u32 subflow_seq;
+ u16 data_len;
+ u8 use_map:1,
+ dsn64:1,
+ data_fin:1,
+ use_ack:1,
+ ack64:1,
+ __unused:3;
+ /* one byte hole */
+};
+
+#ifdef CONFIG_MPTCP
+
+/* move the skb extension owership, with the assumption that 'to' is
+ * newly allocated
+ */
+static inline void mptcp_skb_ext_move(struct sk_buff *to,
+ struct sk_buff *from)
+{
+ if (!skb_ext_exist(from, SKB_EXT_MPTCP))
+ return;
+
+ if (WARN_ON_ONCE(to->active_extensions))
+ skb_ext_put(to);
+
+ to->active_extensions = from->active_extensions;
+ to->extensions = from->extensions;
+ from->active_extensions = 0;
+}
+
+static inline bool mptcp_ext_matches(const struct mptcp_ext *to_ext,
+ const struct mptcp_ext *from_ext)
+{
+ /* MPTCP always clears the ext when adding it to the skb, so
+ * holes do not bother us here
+ */
+ return !from_ext ||
+ (to_ext && from_ext &&
+ !memcmp(from_ext, to_ext, sizeof(struct mptcp_ext)));
+}
+
+/* check if skbs can be collapsed.
+ * MPTCP collapse is allowed if neither @to or @from carry an mptcp data
+ * mapping, or if the extension of @to is the same as @from.
+ * Collapsing is not possible if @to lacks an extension, but @from carries one.
+ */
+static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return mptcp_ext_matches(skb_ext_find(to, SKB_EXT_MPTCP),
+ skb_ext_find(from, SKB_EXT_MPTCP));
+}
+
+#else
+
+static inline void mptcp_skb_ext_move(struct sk_buff *to,
+ const struct sk_buff *from)
+{
+}
+
+static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return true;
+}
+
+#endif /* CONFIG_MPTCP */
+#endif /* __NET_MPTCP_H */
diff --git a/include/net/mrp.h b/include/net/mrp.h
index ef58b4a07190..1c308c034e1a 100644
--- a/include/net/mrp.h
+++ b/include/net/mrp.h
@@ -39,7 +39,7 @@ struct mrp_skb_cb {
static inline struct mrp_skb_cb *mrp_cb(struct sk_buff *skb)
{
BUILD_BUG_ON(sizeof(struct mrp_skb_cb) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
return (struct mrp_skb_cb *)skb->cb;
}
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 6ad9ad47a9c5..8ec77bfdc1a4 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -72,7 +72,6 @@ struct neigh_parms {
struct net_device *dev;
struct list_head list;
int (*neigh_setup)(struct neighbour *);
- void (*neigh_cleanup)(struct neighbour *);
struct neigh_table *tbl;
void *sysctl_table;
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 44b5a00a9c64..37f0fbefb060 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -81,7 +81,7 @@ struct nf_conn_help {
};
#define NF_CT_HELPER_BUILD_BUG_ON(structsize) \
- BUILD_BUG_ON((structsize) > FIELD_SIZEOF(struct nf_conn_help, data))
+ BUILD_BUG_ON((structsize) > sizeof_field(struct nf_conn_help, data))
struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
u16 l3num, u8 protonum);
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index f0897b3c97fb..415b8f49d150 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -106,6 +106,12 @@ struct flow_offload {
};
#define NF_FLOW_TIMEOUT (30 * HZ)
+#define nf_flowtable_time_stamp (u32)jiffies
+
+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
+{
+ return (__s32)(timeout - nf_flowtable_time_stamp);
+}
struct nf_flow_route {
struct {
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 7281895fa6d9..2656155b4069 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -41,7 +41,7 @@ struct nft_immediate_expr {
*/
static inline u32 nft_cmp_fast_mask(unsigned int len)
{
- return cpu_to_le32(~0U >> (FIELD_SIZEOF(struct nft_cmp_fast_expr,
+ return cpu_to_le32(~0U >> (sizeof_field(struct nft_cmp_fast_expr,
data) * BITS_PER_BYTE - len));
}
diff --git a/include/net/netlink.h b/include/net/netlink.h
index b140c8f1be22..56c365dc6dc7 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1735,7 +1735,7 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
}
/**
- * nla_validate_nested - Validate a stream of nested attributes
+ * __nla_validate_nested - Validate a stream of nested attributes
* @start: container attribute
* @maxtype: maximum attribute type to be expected
* @policy: validation policy
@@ -1758,9 +1758,9 @@ static inline int __nla_validate_nested(const struct nlattr *start, int maxtype,
}
static inline int
-nl80211_validate_nested(const struct nlattr *start, int maxtype,
- const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+nla_validate_nested(const struct nlattr *start, int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
{
return __nla_validate_nested(start, maxtype, policy,
NL_VALIDATE_STRICT, extack);
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index c0c0791b1912..08b98414d94e 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -154,6 +154,7 @@ struct netns_ipv4 {
int sysctl_tcp_adv_win_scale;
int sysctl_tcp_frto;
int sysctl_tcp_nometrics_save;
+ int sysctl_tcp_no_ssthresh_metrics_save;
int sysctl_tcp_moderate_rcvbuf;
int sysctl_tcp_tso_win_divisor;
int sysctl_tcp_workaround_signed_windows;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index e553fc80eb23..47b115e2012a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -791,9 +791,8 @@ enum tc_prio_command {
struct tc_prio_qopt_offload_params {
int bands;
u8 priomap[TC_PRIO_MAX + 1];
- /* In case that a prio qdisc is offloaded and now is changed to a
- * non-offloadedable config, it needs to update the backlog & qlen
- * values to negate the HW backlog & qlen values (and only them).
+ /* At the point of un-offloading the Qdisc, the reported backlog and
+ * qlen need to be reduced by the portion that is in HW.
*/
struct gnet_stats_queue *qstats;
};
@@ -824,4 +823,35 @@ struct tc_root_qopt_offload {
bool ingress;
};
+enum tc_ets_command {
+ TC_ETS_REPLACE,
+ TC_ETS_DESTROY,
+ TC_ETS_STATS,
+ TC_ETS_GRAFT,
+};
+
+struct tc_ets_qopt_offload_replace_params {
+ unsigned int bands;
+ u8 priomap[TC_PRIO_MAX + 1];
+ unsigned int quanta[TCQ_ETS_MAX_BANDS]; /* 0 for strict bands. */
+ unsigned int weights[TCQ_ETS_MAX_BANDS];
+ struct gnet_stats_queue *qstats;
+};
+
+struct tc_ets_qopt_offload_graft_params {
+ u8 band;
+ u32 child_handle;
+};
+
+struct tc_ets_qopt_offload {
+ enum tc_ets_command command;
+ u32 handle;
+ u32 parent;
+ union {
+ struct tc_ets_qopt_offload_replace_params replace_params;
+ struct tc_qopt_offload_stats stats;
+ struct tc_ets_qopt_offload_graft_params graft_params;
+ };
+};
+
#endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 144f264ea394..fceddf89592a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -308,6 +308,7 @@ struct tcf_proto_ops {
int (*delete)(struct tcf_proto *tp, void *arg,
bool *last, bool rtnl_held,
struct netlink_ext_ack *);
+ bool (*delete_empty)(struct tcf_proto *tp);
void (*walk)(struct tcf_proto *tp,
struct tcf_walker *arg, bool rtnl_held);
int (*reoffload)(struct tcf_proto *tp, bool add,
@@ -336,6 +337,10 @@ struct tcf_proto_ops {
int flags;
};
+/* Classifiers setting TCF_PROTO_OPS_DOIT_UNLOCKED in tcf_proto_ops->flags
+ * are expected to implement tcf_proto_ops->delete_empty(), otherwise race
+ * conditions can occur when filters are inserted/deleted simultaneously.
+ */
enum tcf_proto_ops_flags {
TCF_PROTO_OPS_DOIT_UNLOCKED = 1,
};
diff --git a/include/net/sock.h b/include/net/sock.h
index 87d54ef57f00..432ff73d20f3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -436,31 +436,15 @@ struct sock {
* Because of non atomicity rules, all
* changes are protected by socket lock.
*/
- unsigned int __sk_flags_offset[0];
-#ifdef __BIG_ENDIAN_BITFIELD
-#define SK_FL_PROTO_SHIFT 16
-#define SK_FL_PROTO_MASK 0x00ff0000
-
-#define SK_FL_TYPE_SHIFT 0
-#define SK_FL_TYPE_MASK 0x0000ffff
-#else
-#define SK_FL_PROTO_SHIFT 8
-#define SK_FL_PROTO_MASK 0x0000ff00
-
-#define SK_FL_TYPE_SHIFT 16
-#define SK_FL_TYPE_MASK 0xffff0000
-#endif
-
- unsigned int sk_padding : 1,
+ u8 sk_padding : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
sk_no_check_rx : 1,
- sk_userlocks : 4,
- sk_protocol : 8,
- sk_type : 16;
-#define SK_PROTOCOL_MAX U8_MAX
- u16 sk_gso_max_segs;
+ sk_userlocks : 4;
u8 sk_pacing_shift;
+ u16 sk_type;
+ u16 sk_protocol;
+ u16 sk_gso_max_segs;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
@@ -722,6 +706,11 @@ static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_h
hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
}
+static inline void __sk_nulls_add_node_tail_rcu(struct sock *sk, struct hlist_nulls_head *list)
+{
+ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
+}
+
static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
{
sock_hold(sk);
@@ -1475,6 +1464,7 @@ static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
+ skb_ext_reset(skb);
skb_zcopy_clear(skb, true);
sk->sk_tx_skb_cache = skb;
return;
@@ -2305,7 +2295,7 @@ struct sock_skb_cb {
* using skb->cb[] would keep using it directly and utilize its
* alignement guarantee.
*/
-#define SOCK_SKB_CB_OFFSET ((FIELD_SIZEOF(struct sk_buff, cb) - \
+#define SOCK_SKB_CB_OFFSET ((sizeof_field(struct sk_buff, cb) - \
sizeof(struct sock_skb_cb)))
#define SOCK_SKB_CB(__skb) ((struct sock_skb_cb *)((__skb)->cb + \
@@ -2583,9 +2573,9 @@ static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
*/
static inline void sk_pacing_shift_update(struct sock *sk, int val)
{
- if (!sk || !sk_fullsock(sk) || sk->sk_pacing_shift == val)
+ if (!sk || !sk_fullsock(sk) || READ_ONCE(sk->sk_pacing_shift) == val)
return;
- sk->sk_pacing_shift = val;
+ WRITE_ONCE(sk->sk_pacing_shift, val);
}
/* if a socket is bound to a device, check that the given device
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 86b9a8766648..5e4133d09b9d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -39,6 +39,7 @@
#include <net/tcp_states.h>
#include <net/inet_ecn.h>
#include <net/dst.h>
+#include <net/mptcp.h>
#include <linux/seq_file.h>
#include <linux/memcontrol.h>
@@ -182,6 +183,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
#define TCPOPT_SACK 5 /* SACK Block */
#define TCPOPT_TIMESTAMP 8 /* Better RTT estimations/PAWS */
#define TCPOPT_MD5SIG 19 /* MD5 Signature (RFC2385) */
+#define TCPOPT_MPTCP 30 /* Multipath TCP (RFC6824) */
#define TCPOPT_FASTOPEN 34 /* Fast open (RFC7413) */
#define TCPOPT_EXP 254 /* Experimental */
/* Magic number to be after the option value for sharing TCP
@@ -328,6 +330,9 @@ int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset,
size_t size, int flags);
+int tcp_send_mss(struct sock *sk, int *size_goal, int flags);
+void tcp_push(struct sock *sk, int flags, int mss_now, int nonagle,
+ int size_goal);
void tcp_release_cb(struct sock *sk);
void tcp_wfree(struct sk_buff *skb);
void tcp_write_timer_handler(struct sock *sk);
@@ -977,6 +982,13 @@ static inline bool tcp_skb_can_collapse_to(const struct sk_buff *skb)
return likely(!TCP_SKB_CB(skb)->eor);
}
+static inline bool tcp_skb_can_collapse(const struct sk_buff *to,
+ const struct sk_buff *from)
+{
+ return likely(tcp_skb_can_collapse_to(to) &&
+ mptcp_skb_can_collapse(to, from));
+}
+
/* Events passed to congestion control interface */
enum tcp_ca_event {
CA_EVENT_TX_START, /* first transmit when no packets in flight */
@@ -1532,8 +1544,9 @@ struct tcp_md5sig_key {
struct hlist_node node;
u8 keylen;
u8 family; /* AF_INET or AF_INET6 */
- union tcp_md5_addr addr;
u8 prefixlen;
+ union tcp_md5_addr addr;
+ int l3index; /* set if key added with L3 scope */
u8 key[TCP_MD5SIG_MAXKEYLEN];
struct rcu_head rcu;
};
@@ -1577,34 +1590,33 @@ struct tcp_md5sig_pool {
int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp);
+ int family, u8 prefixlen, int l3index,
+ const u8 *newkey, u8 newkeylen, gfp_t gfp);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen);
+ int family, u8 prefixlen, int l3index);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk);
#ifdef CONFIG_TCP_MD5SIG
#include <linux/jump_label.h>
extern struct static_key_false tcp_md5_needed;
-struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
+struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
int family);
static inline struct tcp_md5sig_key *
-tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
+tcp_md5_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_md5_addr *addr, int family)
{
if (!static_branch_unlikely(&tcp_md5_needed))
return NULL;
- return __tcp_md5_do_lookup(sk, addr, family);
+ return __tcp_md5_do_lookup(sk, l3index, addr, family);
}
#define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
#else
-static inline struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
- const union tcp_md5_addr *addr,
- int family)
+static inline struct tcp_md5sig_key *
+tcp_md5_do_lookup(const struct sock *sk, int l3index,
+ const union tcp_md5_addr *addr, int family)
{
return NULL;
}
@@ -1766,9 +1778,18 @@ static inline bool tcp_skb_is_last(const struct sock *sk,
return skb_queue_is_last(&sk->sk_write_queue, skb);
}
+/**
+ * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue
+ * @sk: socket
+ *
+ * Since the write queue can have a temporary empty skb in it,
+ * we must not use "return skb_queue_empty(&sk->sk_write_queue)"
+ */
static inline bool tcp_write_queue_empty(const struct sock *sk)
{
- return skb_queue_empty(&sk->sk_write_queue);
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ return tp->write_seq == tp->snd_nxt;
}
static inline bool tcp_rtx_queue_empty(const struct sock *sk)
@@ -1993,6 +2014,11 @@ struct tcp_request_sock_ops {
enum tcp_synack_type synack_type);
};
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops;
+#if IS_ENABLED(CONFIG_IPV6)
+extern const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops;
+#endif
+
#ifdef CONFIG_SYN_COOKIES
static inline __u32 cookie_init_sequence(const struct tcp_request_sock_ops *ops,
const struct sock *sk, struct sk_buff *skb,
@@ -2144,6 +2170,9 @@ struct tcp_ulp_ops {
/* diagnostic */
int (*get_info)(const struct sock *sk, struct sk_buff *skb);
size_t (*get_info_size)(const struct sock *sk);
+ /* clone ulp */
+ void (*clone)(const struct request_sock *req, struct sock *newsk,
+ const gfp_t priority);
char name[TCP_ULP_NAME_MAX];
struct module *owner;
diff --git a/include/net/tls.h b/include/net/tls.h
index df630f5fc723..bf9eb4823933 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -641,6 +641,7 @@ int tls_sw_fallback_init(struct sock *sk,
#ifdef CONFIG_TLS_DEVICE
void tls_device_init(void);
void tls_device_cleanup(void);
+void tls_device_sk_destruct(struct sock *sk);
int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
void tls_device_free_resources_tx(struct sock *sk);
int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx);
@@ -649,6 +650,14 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq);
void tls_offload_tx_resync_request(struct sock *sk, u32 got_seq, u32 exp_seq);
int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx,
struct sk_buff *skb, struct strp_msg *rxm);
+
+static inline bool tls_is_sk_rx_device_offloaded(struct sock *sk)
+{
+ if (!sk_fullsock(sk) ||
+ smp_load_acquire(&sk->sk_destruct) != tls_device_sk_destruct)
+ return false;
+ return tls_get_ctx(sk)->rx_conf == TLS_HW;
+}
#else
static inline void tls_device_init(void) {}
static inline void tls_device_cleanup(void) {}
diff --git a/include/net/x25.h b/include/net/x25.h
index ed1acc3044ac..d7d6c2b4ffa7 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -62,7 +62,8 @@ enum {
X25_STATE_1, /* Awaiting Call Accepted */
X25_STATE_2, /* Awaiting Clear Confirmation */
X25_STATE_3, /* Data Transfer */
- X25_STATE_4 /* Awaiting Reset Confirmation */
+ X25_STATE_4, /* Awaiting Reset Confirmation */
+ X25_STATE_5 /* Call Accepted / Call Connected pending */
};
enum {
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e3780e4b74e1..e86ec48ef627 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -72,7 +72,6 @@ struct xdp_umem {
struct xsk_map {
struct bpf_map map;
- struct list_head __percpu *flush_list;
spinlock_t lock; /* Synchronize map updates */
struct xdp_sock *xsk_map[];
};
@@ -119,8 +118,8 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
/* Used from netdev driver */
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
-u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
-void xsk_umem_discard_addr(struct xdp_umem *umem);
+bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
+void xsk_umem_release_addr(struct xdp_umem *umem);
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
void xsk_umem_consume_tx_done(struct xdp_umem *umem);
@@ -139,9 +138,8 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
struct xdp_sock **map_entry);
int xsk_map_inc(struct xsk_map *map);
void xsk_map_put(struct xsk_map *map);
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs);
-void __xsk_map_flush(struct bpf_map *map);
+int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
+void __xsk_map_flush(void);
static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
u32 key)
@@ -199,7 +197,7 @@ static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
return xsk_umem_has_addrs(umem, cnt - rq->length);
}
-static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
+static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
{
struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
@@ -210,12 +208,12 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
return addr;
}
-static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
+static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
{
struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
if (!rq->length)
- xsk_umem_discard_addr(umem);
+ xsk_umem_release_addr(umem);
else
rq->length--;
}
@@ -260,7 +258,7 @@ static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
return NULL;
}
-static inline void xsk_umem_discard_addr(struct xdp_umem *umem)
+static inline void xsk_umem_release_addr(struct xdp_umem *umem)
{
}
@@ -334,7 +332,7 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
return NULL;
}
-static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem)
+static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
{
}
@@ -369,13 +367,12 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
return 0;
}
-static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs)
+static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
return -EOPNOTSUPP;
}
-static inline void __xsk_map_flush(struct bpf_map *map)
+static inline void __xsk_map_flush(void)
{
}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index cacb48faf670..5608e14e3aad 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2832,6 +2832,11 @@ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma,
int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext,
struct rdma_user_mmap_entry *entry,
size_t length);
+int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
+ struct rdma_user_mmap_entry *entry,
+ size_t length, u32 min_pgoff,
+ u32 max_pgoff);
+
struct rdma_user_mmap_entry *
rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext,
unsigned long pgoff);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 64cbbbe74a36..068f96b1a83e 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -420,6 +420,8 @@ struct ocelot_port {
u8 ptp_cmd;
struct sk_buff_head tx_skbs;
u8 ts_id;
+
+ phy_interface_t phy_mode;
};
struct ocelot {
diff --git a/drivers/net/ethernet/mscc/ocelot_ana.h b/include/soc/mscc/ocelot_ana.h
index 841c6ec22b64..841c6ec22b64 100644
--- a/drivers/net/ethernet/mscc/ocelot_ana.h
+++ b/include/soc/mscc/ocelot_ana.h
diff --git a/drivers/net/ethernet/mscc/ocelot_dev.h b/include/soc/mscc/ocelot_dev.h
index 0a50d53bbd3f..0a50d53bbd3f 100644
--- a/drivers/net/ethernet/mscc/ocelot_dev.h
+++ b/include/soc/mscc/ocelot_dev.h
diff --git a/drivers/net/ethernet/mscc/ocelot_qsys.h b/include/soc/mscc/ocelot_qsys.h
index d8c63aa761be..d8c63aa761be 100644
--- a/drivers/net/ethernet/mscc/ocelot_qsys.h
+++ b/include/soc/mscc/ocelot_qsys.h
diff --git a/include/sound/soc.h b/include/sound/soc.h
index c28a1ed5e8df..262896799826 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1150,6 +1150,7 @@ struct snd_soc_pcm_runtime {
unsigned int num_codecs;
struct delayed_work delayed_work;
+ void (*close_delayed_work_func)(struct snd_soc_pcm_runtime *rtd);
#ifdef CONFIG_DEBUG_FS
struct dentry *debugfs_dpcm_root;
#endif
diff --git a/include/trace/events/preemptirq.h b/include/trace/events/preemptirq.h
index 95fba0471e5b..3f249e150c0c 100644
--- a/include/trace/events/preemptirq.h
+++ b/include/trace/events/preemptirq.h
@@ -18,13 +18,13 @@ DECLARE_EVENT_CLASS(preemptirq_template,
TP_ARGS(ip, parent_ip),
TP_STRUCT__entry(
- __field(u32, caller_offs)
- __field(u32, parent_offs)
+ __field(s32, caller_offs)
+ __field(s32, parent_offs)
),
TP_fast_assign(
- __entry->caller_offs = (u32)(ip - (unsigned long)_stext);
- __entry->parent_offs = (u32)(parent_ip - (unsigned long)_stext);
+ __entry->caller_offs = (s32)(ip - (unsigned long)_stext);
+ __entry->parent_offs = (s32)(parent_ip - (unsigned long)_stext);
),
TP_printk("caller=%pS parent=%pS",
diff --git a/include/trace/events/sctp.h b/include/trace/events/sctp.h
index 7475c7be165a..d4aac3436595 100644
--- a/include/trace/events/sctp.h
+++ b/include/trace/events/sctp.h
@@ -75,15 +75,6 @@ TRACE_EVENT(sctp_probe,
__entry->pathmtu = asoc->pathmtu;
__entry->rwnd = asoc->peer.rwnd;
__entry->unack_data = asoc->unack_data;
-
- if (trace_sctp_probe_path_enabled()) {
- struct sctp_transport *sp;
-
- list_for_each_entry(sp, &asoc->peer.transport_addr_list,
- transports) {
- trace_sctp_probe_path(sp, asoc);
- }
- }
),
TP_printk("asoc=%#llx mark=%#x bind_port=%d peer_port=%d pathmtu=%d "
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 51fe9f6719eb..a966d4b5ab37 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -19,7 +19,8 @@
#define inet_protocol_names \
EM(IPPROTO_TCP) \
EM(IPPROTO_DCCP) \
- EMe(IPPROTO_SCTP)
+ EM(IPPROTO_SCTP) \
+ EMe(IPPROTO_MPTCP)
#define tcp_state_names \
EM(TCP_ESTABLISHED) \
@@ -147,7 +148,7 @@ TRACE_EVENT(inet_sock_set_state,
__field(__u16, sport)
__field(__u16, dport)
__field(__u16, family)
- __field(__u8, protocol)
+ __field(__u16, protocol)
__array(__u8, saddr, 4)
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 3ad935527177..a534d71e689a 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -116,6 +116,7 @@
#define AUDIT_FANOTIFY 1331 /* Fanotify access decision */
#define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */
#define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */
+#define AUDIT_BPF 1334 /* BPF subsystem */
#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dbbcf0b02970..7df436da542d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -231,6 +231,11 @@ enum bpf_attach_type {
* When children program makes decision (like picking TCP CA or sock bind)
* parent program has a chance to override it.
*
+ * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
+ * programs for a cgroup. Though it's possible to replace an old program at
+ * any position by also specifying BPF_F_REPLACE flag and position itself in
+ * replace_bpf_fd attribute. Old program at this position will be released.
+ *
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
* Ex1:
@@ -249,6 +254,7 @@ enum bpf_attach_type {
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_F_ALLOW_MULTI (1U << 1)
+#define BPF_F_REPLACE (1U << 2)
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@@ -442,6 +448,10 @@ union bpf_attr {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
+ __u32 replace_bpf_fd; /* previously attached eBPF
+ * program to replace if
+ * BPF_F_REPLACE is used
+ */
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index c02dec97e1ce..1a2898c482ee 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -142,7 +142,8 @@ struct btf_param {
enum {
BTF_VAR_STATIC = 0,
- BTF_VAR_GLOBAL_ALLOCATED,
+ BTF_VAR_GLOBAL_ALLOCATED = 1,
+ BTF_VAR_GLOBAL_EXTERN = 2,
};
/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index d4591792f0b4..116bcbf09c74 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -593,6 +593,7 @@ struct ethtool_pauseparam {
* @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
* @ETH_SS_PHY_STATS: Statistic names, for use with %ETHTOOL_GPHYSTATS
* @ETH_SS_PHY_TUNABLES: PHY tunable names
+ * @ETH_SS_LINK_MODES: link mode names
*/
enum ethtool_stringset {
ETH_SS_TEST = 0,
@@ -604,6 +605,10 @@ enum ethtool_stringset {
ETH_SS_TUNABLES,
ETH_SS_PHY_STATS,
ETH_SS_PHY_TUNABLES,
+ ETH_SS_LINK_MODES,
+
+ /* add new constants above here */
+ ETH_SS_COUNT
};
/**
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
new file mode 100644
index 000000000000..02f82f42a889
--- /dev/null
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -0,0 +1,204 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * include/uapi/linux/ethtool_netlink.h - netlink interface for ethtool
+ *
+ * See Documentation/networking/ethtool-netlink.txt in kernel source tree for
+ * doucumentation of the interface.
+ */
+
+#ifndef _UAPI_LINUX_ETHTOOL_NETLINK_H_
+#define _UAPI_LINUX_ETHTOOL_NETLINK_H_
+
+#include <linux/ethtool.h>
+
+/* message types - userspace to kernel */
+enum {
+ ETHTOOL_MSG_USER_NONE,
+ ETHTOOL_MSG_STRSET_GET,
+ ETHTOOL_MSG_LINKINFO_GET,
+ ETHTOOL_MSG_LINKINFO_SET,
+ ETHTOOL_MSG_LINKMODES_GET,
+ ETHTOOL_MSG_LINKMODES_SET,
+ ETHTOOL_MSG_LINKSTATE_GET,
+
+ /* add new constants above here */
+ __ETHTOOL_MSG_USER_CNT,
+ ETHTOOL_MSG_USER_MAX = __ETHTOOL_MSG_USER_CNT - 1
+};
+
+/* message types - kernel to userspace */
+enum {
+ ETHTOOL_MSG_KERNEL_NONE,
+ ETHTOOL_MSG_STRSET_GET_REPLY,
+ ETHTOOL_MSG_LINKINFO_GET_REPLY,
+ ETHTOOL_MSG_LINKINFO_NTF,
+ ETHTOOL_MSG_LINKMODES_GET_REPLY,
+ ETHTOOL_MSG_LINKMODES_NTF,
+ ETHTOOL_MSG_LINKSTATE_GET_REPLY,
+
+ /* add new constants above here */
+ __ETHTOOL_MSG_KERNEL_CNT,
+ ETHTOOL_MSG_KERNEL_MAX = __ETHTOOL_MSG_KERNEL_CNT - 1
+};
+
+/* request header */
+
+/* use compact bitsets in reply */
+#define ETHTOOL_FLAG_COMPACT_BITSETS (1 << 0)
+/* provide optional reply for SET or ACT requests */
+#define ETHTOOL_FLAG_OMIT_REPLY (1 << 1)
+
+#define ETHTOOL_FLAG_ALL (ETHTOOL_FLAG_COMPACT_BITSETS | \
+ ETHTOOL_FLAG_OMIT_REPLY)
+
+enum {
+ ETHTOOL_A_HEADER_UNSPEC,
+ ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */
+ ETHTOOL_A_HEADER_DEV_NAME, /* string */
+ ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */
+
+ /* add new constants above here */
+ __ETHTOOL_A_HEADER_CNT,
+ ETHTOOL_A_HEADER_MAX = __ETHTOOL_A_HEADER_CNT - 1
+};
+
+/* bit sets */
+
+enum {
+ ETHTOOL_A_BITSET_BIT_UNSPEC,
+ ETHTOOL_A_BITSET_BIT_INDEX, /* u32 */
+ ETHTOOL_A_BITSET_BIT_NAME, /* string */
+ ETHTOOL_A_BITSET_BIT_VALUE, /* flag */
+
+ /* add new constants above here */
+ __ETHTOOL_A_BITSET_BIT_CNT,
+ ETHTOOL_A_BITSET_BIT_MAX = __ETHTOOL_A_BITSET_BIT_CNT - 1
+};
+
+enum {
+ ETHTOOL_A_BITSET_BITS_UNSPEC,
+ ETHTOOL_A_BITSET_BITS_BIT, /* nest - _A_BITSET_BIT_* */
+
+ /* add new constants above here */
+ __ETHTOOL_A_BITSET_BITS_CNT,
+ ETHTOOL_A_BITSET_BITS_MAX = __ETHTOOL_A_BITSET_BITS_CNT - 1
+};
+
+enum {
+ ETHTOOL_A_BITSET_UNSPEC,
+ ETHTOOL_A_BITSET_NOMASK, /* flag */
+ ETHTOOL_A_BITSET_SIZE, /* u32 */
+ ETHTOOL_A_BITSET_BITS, /* nest - _A_BITSET_BITS_* */
+ ETHTOOL_A_BITSET_VALUE, /* binary */
+ ETHTOOL_A_BITSET_MASK, /* binary */
+
+ /* add new constants above here */
+ __ETHTOOL_A_BITSET_CNT,
+ ETHTOOL_A_BITSET_MAX = __ETHTOOL_A_BITSET_CNT - 1
+};
+
+/* string sets */
+
+enum {
+ ETHTOOL_A_STRING_UNSPEC,
+ ETHTOOL_A_STRING_INDEX, /* u32 */
+ ETHTOOL_A_STRING_VALUE, /* string */
+
+ /* add new constants above here */
+ __ETHTOOL_A_STRING_CNT,
+ ETHTOOL_A_STRING_MAX = __ETHTOOL_A_STRING_CNT - 1
+};
+
+enum {
+ ETHTOOL_A_STRINGS_UNSPEC,
+ ETHTOOL_A_STRINGS_STRING, /* nest - _A_STRINGS_* */
+
+ /* add new constants above here */
+ __ETHTOOL_A_STRINGS_CNT,
+ ETHTOOL_A_STRINGS_MAX = __ETHTOOL_A_STRINGS_CNT - 1
+};
+
+enum {
+ ETHTOOL_A_STRINGSET_UNSPEC,
+ ETHTOOL_A_STRINGSET_ID, /* u32 */
+ ETHTOOL_A_STRINGSET_COUNT, /* u32 */
+ ETHTOOL_A_STRINGSET_STRINGS, /* nest - _A_STRINGS_* */
+
+ /* add new constants above here */
+ __ETHTOOL_A_STRINGSET_CNT,
+ ETHTOOL_A_STRINGSET_MAX = __ETHTOOL_A_STRINGSET_CNT - 1
+};
+
+enum {
+ ETHTOOL_A_STRINGSETS_UNSPEC,
+ ETHTOOL_A_STRINGSETS_STRINGSET, /* nest - _A_STRINGSET_* */
+
+ /* add new constants above here */
+ __ETHTOOL_A_STRINGSETS_CNT,
+ ETHTOOL_A_STRINGSETS_MAX = __ETHTOOL_A_STRINGSETS_CNT - 1
+};
+
+/* STRSET */
+
+enum {
+ ETHTOOL_A_STRSET_UNSPEC,
+ ETHTOOL_A_STRSET_HEADER, /* nest - _A_HEADER_* */
+ ETHTOOL_A_STRSET_STRINGSETS, /* nest - _A_STRINGSETS_* */
+ ETHTOOL_A_STRSET_COUNTS_ONLY, /* flag */
+
+ /* add new constants above here */
+ __ETHTOOL_A_STRSET_CNT,
+ ETHTOOL_A_STRSET_MAX = __ETHTOOL_A_STRSET_CNT - 1
+};
+
+/* LINKINFO */
+
+enum {
+ ETHTOOL_A_LINKINFO_UNSPEC,
+ ETHTOOL_A_LINKINFO_HEADER, /* nest - _A_HEADER_* */
+ ETHTOOL_A_LINKINFO_PORT, /* u8 */
+ ETHTOOL_A_LINKINFO_PHYADDR, /* u8 */
+ ETHTOOL_A_LINKINFO_TP_MDIX, /* u8 */
+ ETHTOOL_A_LINKINFO_TP_MDIX_CTRL, /* u8 */
+ ETHTOOL_A_LINKINFO_TRANSCEIVER, /* u8 */
+
+ /* add new constants above here */
+ __ETHTOOL_A_LINKINFO_CNT,
+ ETHTOOL_A_LINKINFO_MAX = __ETHTOOL_A_LINKINFO_CNT - 1
+};
+
+/* LINKMODES */
+
+enum {
+ ETHTOOL_A_LINKMODES_UNSPEC,
+ ETHTOOL_A_LINKMODES_HEADER, /* nest - _A_HEADER_* */
+ ETHTOOL_A_LINKMODES_AUTONEG, /* u8 */
+ ETHTOOL_A_LINKMODES_OURS, /* bitset */
+ ETHTOOL_A_LINKMODES_PEER, /* bitset */
+ ETHTOOL_A_LINKMODES_SPEED, /* u32 */
+ ETHTOOL_A_LINKMODES_DUPLEX, /* u8 */
+
+ /* add new constants above here */
+ __ETHTOOL_A_LINKMODES_CNT,
+ ETHTOOL_A_LINKMODES_MAX = __ETHTOOL_A_LINKMODES_CNT - 1
+};
+
+/* LINKSTATE */
+
+enum {
+ ETHTOOL_A_LINKSTATE_UNSPEC,
+ ETHTOOL_A_LINKSTATE_HEADER, /* nest - _A_HEADER_* */
+ ETHTOOL_A_LINKSTATE_LINK, /* u8 */
+
+ /* add new constants above here */
+ __ETHTOOL_A_LINKSTATE_CNT,
+ ETHTOOL_A_LINKSTATE_MAX = __ETHTOOL_A_LINKSTATE_CNT - 1
+};
+
+/* generic netlink info */
+#define ETHTOOL_GENL_NAME "ethtool"
+#define ETHTOOL_GENL_VERSION 1
+
+#define ETHTOOL_MCGRP_MONITOR_NAME "monitor"
+
+#endif /* _UAPI_LINUX_ETHTOOL_NETLINK_H_ */
diff --git a/include/uapi/linux/if_bonding.h b/include/uapi/linux/if_bonding.h
index 790585f0e61b..45f3750aa861 100644
--- a/include/uapi/linux/if_bonding.h
+++ b/include/uapi/linux/if_bonding.h
@@ -95,6 +95,16 @@
#define BOND_XMIT_POLICY_ENCAP23 3 /* encapsulated layer 2+3 */
#define BOND_XMIT_POLICY_ENCAP34 4 /* encapsulated layer 3+4 */
+/* 802.3ad port state definitions (43.4.2.2 in the 802.3ad standard) */
+#define LACP_STATE_LACP_ACTIVITY 0x1
+#define LACP_STATE_LACP_TIMEOUT 0x2
+#define LACP_STATE_AGGREGATION 0x4
+#define LACP_STATE_SYNCHRONIZATION 0x8
+#define LACP_STATE_COLLECTING 0x10
+#define LACP_STATE_DISTRIBUTING 0x20
+#define LACP_STATE_DEFAULTED 0x40
+#define LACP_STATE_EXPIRED 0x80
+
typedef struct ifbond {
__s32 bond_mode;
__s32 num_slaves;
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 1b3c2b643a02..4a58e3d7de46 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -156,6 +156,15 @@ struct bridge_vlan_xstats {
__u32 pad2;
};
+struct bridge_stp_xstats {
+ __u64 transition_blk;
+ __u64 transition_fwd;
+ __u64 rx_bpdu;
+ __u64 tx_bpdu;
+ __u64 rx_tcn;
+ __u64 tx_tcn;
+};
+
/* Bridge multicast database attributes
* [MDBA_MDB] = {
* [MDBA_MDB_ENTRY] = {
@@ -262,6 +271,7 @@ enum {
BRIDGE_XSTATS_VLAN,
BRIDGE_XSTATS_MCAST,
BRIDGE_XSTATS_PAD,
+ BRIDGE_XSTATS_STP,
__BRIDGE_XSTATS_MAX
};
#define BRIDGE_XSTATS_MAX (__BRIDGE_XSTATS_MAX - 1)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 8aec8769d944..1d69f637c5d6 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -169,6 +169,7 @@ enum {
IFLA_MAX_MTU,
IFLA_PROP_LIST,
IFLA_ALT_IFNAME, /* Alternative ifname */
+ IFLA_PERM_ADDRESS,
__IFLA_MAX
};
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index e7ad9d350a28..1521073b6348 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -76,6 +76,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_MPTCP = 262, /* Multipath TCP connection */
+#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
};
#endif
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index eabccb46edd1..a3300e1b9a01 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -48,6 +48,7 @@ struct io_uring_sqe {
#define IOSQE_FIXED_FILE (1U << 0) /* use fixed fileset */
#define IOSQE_IO_DRAIN (1U << 1) /* issue after inflight IO */
#define IOSQE_IO_LINK (1U << 2) /* links next sqe */
+#define IOSQE_IO_HARDLINK (1U << 3) /* like LINK, but stronger */
/*
* io_uring_setup() flags
@@ -57,23 +58,28 @@ struct io_uring_sqe {
#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
-#define IORING_OP_NOP 0
-#define IORING_OP_READV 1
-#define IORING_OP_WRITEV 2
-#define IORING_OP_FSYNC 3
-#define IORING_OP_READ_FIXED 4
-#define IORING_OP_WRITE_FIXED 5
-#define IORING_OP_POLL_ADD 6
-#define IORING_OP_POLL_REMOVE 7
-#define IORING_OP_SYNC_FILE_RANGE 8
-#define IORING_OP_SENDMSG 9
-#define IORING_OP_RECVMSG 10
-#define IORING_OP_TIMEOUT 11
-#define IORING_OP_TIMEOUT_REMOVE 12
-#define IORING_OP_ACCEPT 13
-#define IORING_OP_ASYNC_CANCEL 14
-#define IORING_OP_LINK_TIMEOUT 15
-#define IORING_OP_CONNECT 16
+enum {
+ IORING_OP_NOP,
+ IORING_OP_READV,
+ IORING_OP_WRITEV,
+ IORING_OP_FSYNC,
+ IORING_OP_READ_FIXED,
+ IORING_OP_WRITE_FIXED,
+ IORING_OP_POLL_ADD,
+ IORING_OP_POLL_REMOVE,
+ IORING_OP_SYNC_FILE_RANGE,
+ IORING_OP_SENDMSG,
+ IORING_OP_RECVMSG,
+ IORING_OP_TIMEOUT,
+ IORING_OP_TIMEOUT_REMOVE,
+ IORING_OP_ACCEPT,
+ IORING_OP_ASYNC_CANCEL,
+ IORING_OP_LINK_TIMEOUT,
+ IORING_OP_CONNECT,
+
+ /* this goes last, obviously */
+ IORING_OP_LAST,
+};
/*
* sqe->fsync_flags
diff --git a/include/uapi/linux/kcov.h b/include/uapi/linux/kcov.h
index 409d3ad1e6e2..1d0350e44ae3 100644
--- a/include/uapi/linux/kcov.h
+++ b/include/uapi/linux/kcov.h
@@ -9,11 +9,11 @@
* and the comment before kcov_remote_start() for usage details.
*/
struct kcov_remote_arg {
- unsigned int trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */
- unsigned int area_size; /* Length of coverage buffer in words */
- unsigned int num_handles; /* Size of handles array */
- __u64 common_handle;
- __u64 handles[0];
+ __u32 trace_mode; /* KCOV_TRACE_PC or KCOV_TRACE_CMP */
+ __u32 area_size; /* Length of coverage buffer in words */
+ __u32 num_handles; /* Size of handles array */
+ __aligned_u64 common_handle;
+ __aligned_u64 handles[0];
};
#define KCOV_REMOTE_MAX_HANDLES 0x100
diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h
index 51b48e4be1f2..0b9c3beda345 100644
--- a/include/uapi/linux/mii.h
+++ b/include/uapi/linux/mii.h
@@ -131,6 +131,18 @@
#define NWAYTEST_LOOPBACK 0x0100 /* Enable loopback for N-way */
#define NWAYTEST_RESV2 0xfe00 /* Unused... */
+/* MAC and PHY tx_config_Reg[15:0] for SGMII in-band auto-negotiation.*/
+#define ADVERTISE_SGMII 0x0001 /* MAC can do SGMII */
+#define LPA_SGMII 0x0001 /* PHY can do SGMII */
+#define LPA_SGMII_DPX_SPD_MASK 0x1C00 /* SGMII duplex and speed bits */
+#define LPA_SGMII_10HALF 0x0000 /* Can do 10mbps half-duplex */
+#define LPA_SGMII_10FULL 0x1000 /* Can do 10mbps full-duplex */
+#define LPA_SGMII_100HALF 0x0400 /* Can do 100mbps half-duplex */
+#define LPA_SGMII_100FULL 0x1400 /* Can do 100mbps full-duplex */
+#define LPA_SGMII_1000HALF 0x0800 /* Can do 1000mbps half-duplex */
+#define LPA_SGMII_1000FULL 0x1800 /* Can do 1000mbps full-duplex */
+#define LPA_SGMII_LINK 0x8000 /* PHY link with copper-side partner */
+
/* 1000BASE-T Control register */
#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */
#define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index e5b39721c6e4..f96e650d0af9 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -90,6 +90,14 @@ enum hwtstamp_tx_types {
* queue.
*/
HWTSTAMP_TX_ONESTEP_SYNC,
+
+ /*
+ * Same as HWTSTAMP_TX_ONESTEP_SYNC, but also enables time
+ * stamp insertion directly into PDelay_Resp packets. In this
+ * case, neither transmitted Sync nor PDelay_Resp packets will
+ * receive a time stamp via the socket error queue.
+ */
+ HWTSTAMP_TX_ONESTEP_P2P,
};
/* possible values for hwtstamp_config->rx_filter */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index bb9b049310df..e237ecbdcd8a 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -805,6 +805,8 @@ enum nft_exthdr_attributes {
* @NFT_META_TIME_NS: time since epoch (in nanoseconds)
* @NFT_META_TIME_DAY: day of week (from 0 = Sunday to 6 = Saturday)
* @NFT_META_TIME_HOUR: hour of day (in seconds)
+ * @NFT_META_SDIF: slave device interface index
+ * @NFT_META_SDIFNAME: slave device interface name
*/
enum nft_meta_keys {
NFT_META_LEN,
@@ -840,6 +842,8 @@ enum nft_meta_keys {
NFT_META_TIME_NS,
NFT_META_TIME_DAY,
NFT_META_TIME_HOUR,
+ NFT_META_SDIF,
+ NFT_META_SDIFNAME,
};
/**
diff --git a/include/uapi/linux/netfilter/xt_sctp.h b/include/uapi/linux/netfilter/xt_sctp.h
index 4bc6d1a08781..b4d804a9fccb 100644
--- a/include/uapi/linux/netfilter/xt_sctp.h
+++ b/include/uapi/linux/netfilter/xt_sctp.h
@@ -41,19 +41,19 @@ struct xt_sctp_info {
#define SCTP_CHUNKMAP_SET(chunkmap, type) \
do { \
(chunkmap)[type / bytes(__u32)] |= \
- 1 << (type % bytes(__u32)); \
+ 1u << (type % bytes(__u32)); \
} while (0)
#define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \
do { \
(chunkmap)[type / bytes(__u32)] &= \
- ~(1 << (type % bytes(__u32))); \
+ ~(1u << (type % bytes(__u32))); \
} while (0)
#define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \
({ \
((chunkmap)[type / bytes (__u32)] & \
- (1 << (type % bytes (__u32)))) ? 1: 0; \
+ (1u << (type % bytes (__u32)))) ? 1: 0; \
})
#define SCTP_CHUNKMAP_RESET(chunkmap) \
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 341e0e8cae46..5eab191607f8 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5517,6 +5517,10 @@ enum nl80211_feature_flags {
* with VLAN tagged frames and separate VLAN-specific netdevs added using
* vconfig similarly to the Ethernet case.
*
+ * @NL80211_EXT_FEATURE_AQL: The driver supports the Airtime Queue Limit (AQL)
+ * feature, which prevents bufferbloat by using the expected transmission
+ * time to limit the amount of data buffered in the hardware.
+ *
* @NUM_NL80211_EXT_FEATURES: number of extended features.
* @MAX_NL80211_EXT_FEATURES: highest extended feature index.
*/
@@ -5563,6 +5567,7 @@ enum nl80211_ext_feature_index {
NL80211_EXT_FEATURE_STA_TX_PWR,
NL80211_EXT_FEATURE_SAE_OFFLOAD,
NL80211_EXT_FEATURE_VLAN_OFFLOAD,
+ NL80211_EXT_FEATURE_AQL,
/* add new features before the definition below */
NUM_NL80211_EXT_FEATURES,
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index a87b44cd5590..ae2bff14e7e1 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -673,6 +673,32 @@ struct ovs_action_push_mpls {
};
/**
+ * struct ovs_action_add_mpls - %OVS_ACTION_ATTR_ADD_MPLS action
+ * argument.
+ * @mpls_lse: MPLS label stack entry to push.
+ * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame.
+ * @tun_flags: MPLS tunnel attributes.
+ *
+ * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and
+ * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected.
+ */
+struct ovs_action_add_mpls {
+ __be32 mpls_lse;
+ __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */
+ __u16 tun_flags;
+};
+
+#define OVS_MPLS_L3_TUNNEL_FLAG_MASK (1 << 0) /* Flag to specify the place of
+ * insertion of MPLS header.
+ * When false, the MPLS header
+ * will be inserted at the start
+ * of the packet.
+ * When true, the MPLS header
+ * will be inserted at the start
+ * of the l3 header.
+ */
+
+/**
* struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
* @vlan_tpid: Tag protocol identifier (TPID) to push.
* @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set
@@ -892,6 +918,10 @@ struct check_pkt_len_arg {
* @OVS_ACTION_ATTR_CHECK_PKT_LEN: Check the packet length and execute a set
* of actions if greater than the specified packet length, else execute
* another set of actions.
+ * @OVS_ACTION_ATTR_ADD_MPLS: Push a new MPLS label stack entry at the
+ * start of the packet or at the start of the l3 header depending on the value
+ * of l3 tunnel flag in the tun_flags field of OVS_ACTION_ATTR_ADD_MPLS
+ * argument.
*
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -927,6 +957,7 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_METER, /* u32 meter ID. */
OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */
OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */
+ OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */
__OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
* from userspace. */
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 9f1a72876212..bf5a5b1dfb0b 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -1187,4 +1187,21 @@ enum {
#define TCA_TAPRIO_ATTR_MAX (__TCA_TAPRIO_ATTR_MAX - 1)
+/* ETS */
+
+#define TCQ_ETS_MAX_BANDS 16
+
+enum {
+ TCA_ETS_UNSPEC,
+ TCA_ETS_NBANDS, /* u8 */
+ TCA_ETS_NSTRICT, /* u8 */
+ TCA_ETS_QUANTA, /* nested TCA_ETS_QUANTA_BAND */
+ TCA_ETS_QUANTA_BAND, /* u32 */
+ TCA_ETS_PRIOMAP, /* nested TCA_ETS_PRIOMAP_BAND */
+ TCA_ETS_PRIOMAP_BAND, /* u8 */
+ __TCA_ETS_MAX,
+};
+
+#define TCA_ETS_MAX (__TCA_ETS_MAX - 1)
+
#endif
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 74af1f759cee..d87184e673ca 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -317,14 +317,15 @@ enum {
#define TCP_MD5SIG_MAXKEYLEN 80
/* tcp_md5sig extension flags for TCP_MD5SIG_EXT */
-#define TCP_MD5SIG_FLAG_PREFIX 1 /* address prefix length */
+#define TCP_MD5SIG_FLAG_PREFIX 0x1 /* address prefix length */
+#define TCP_MD5SIG_FLAG_IFINDEX 0x2 /* ifindex set */
struct tcp_md5sig {
struct __kernel_sockaddr_storage tcpm_addr; /* address associated */
__u8 tcpm_flags; /* extension flags */
__u8 tcpm_prefixlen; /* address prefix */
__u16 tcpm_keylen; /* key length */
- __u32 __tcpm_pad; /* zero */
+ int tcpm_ifindex; /* device index for scope */
__u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */
};
diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 6c2194ab745b..dc0d23a50e69 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -65,6 +65,7 @@ enum {
TIPC_NL_UDP_GET_REMOTEIP,
TIPC_NL_KEY_SET,
TIPC_NL_KEY_FLUSH,
+ TIPC_NL_ADDR_LEGACY_GET,
__TIPC_NL_CMD_MAX,
TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
@@ -176,6 +177,7 @@ enum {
TIPC_NLA_NET_ADDR, /* u32 */
TIPC_NLA_NET_NODEID, /* u64 */
TIPC_NLA_NET_NODEID_W1, /* u64 */
+ TIPC_NLA_NET_ADDR_LEGACY, /* flag */
__TIPC_NLA_NET_MAX,
TIPC_NLA_NET_MAX = __TIPC_NLA_NET_MAX - 1
diff --git a/include/uapi/linux/vm_sockets.h b/include/uapi/linux/vm_sockets.h
index 68d57c5e99bc..fd0ed7221645 100644
--- a/include/uapi/linux/vm_sockets.h
+++ b/include/uapi/linux/vm_sockets.h
@@ -99,11 +99,13 @@
#define VMADDR_CID_HYPERVISOR 0
-/* This CID is specific to VMCI and can be considered reserved (even VMCI
- * doesn't use it anymore, it's a legacy value from an older release).
+/* Use this as the destination CID in an address when referring to the
+ * local communication (loopback).
+ * (This was VMADDR_CID_RESERVED, but even VMCI doesn't use it anymore,
+ * it was a legacy value from an older release).
*/
-#define VMADDR_CID_RESERVED 1
+#define VMADDR_CID_LOCAL 1
/* Use this as the destination CID in an address when referring to the host
* (any process other than the hypervisor). VMCI relies on it being 2, but
diff --git a/include/uapi/linux/wireguard.h b/include/uapi/linux/wireguard.h
new file mode 100644
index 000000000000..ae88be14c947
--- /dev/null
+++ b/include/uapi/linux/wireguard.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ *
+ * Documentation
+ * =============
+ *
+ * The below enums and macros are for interfacing with WireGuard, using generic
+ * netlink, with family WG_GENL_NAME and version WG_GENL_VERSION. It defines two
+ * methods: get and set. Note that while they share many common attributes,
+ * these two functions actually accept a slightly different set of inputs and
+ * outputs.
+ *
+ * WG_CMD_GET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST | NLM_F_DUMP. The command should contain
+ * one but not both of:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
+ *
+ * The kernel will then return several messages (NLM_F_MULTI) containing the
+ * following tree of nested items:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
+ * WGDEVICE_A_PRIVATE_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGDEVICE_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16
+ * WGDEVICE_A_FWMARK: NLA_U32
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGPEER_A_PRESHARED_KEY: NLA_EXACT_LEN, len WG_KEY_LEN
+ * WGPEER_A_ENDPOINT: NLA_MIN_LEN(struct sockaddr), struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16
+ * WGPEER_A_LAST_HANDSHAKE_TIME: NLA_EXACT_LEN, struct __kernel_timespec
+ * WGPEER_A_RX_BYTES: NLA_U64
+ * WGPEER_A_TX_BYTES: NLA_U64
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: NLA_MIN_LEN(struct in_addr), struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 0: NLA_NESTED
+ * ...
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ * WGPEER_A_PROTOCOL_VERSION: NLA_U32
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that all of the allowed IPs of a single peer will not
+ * fit within a single netlink message. In that case, the same peer will
+ * be written in the following message, except it will only contain
+ * WGPEER_A_PUBLIC_KEY and WGPEER_A_ALLOWEDIPS. This may occur several
+ * times in a row for the same peer. It is then up to the receiver to
+ * coalesce adjacent peers. Likewise, it is possible that all peers will
+ * not fit within a single message. So, subsequent peers will be sent
+ * in following messages, except those will only contain WGDEVICE_A_IFNAME
+ * and WGDEVICE_A_PEERS. It is then up to the receiver to coalesce these
+ * messages to form the complete list of peers.
+ *
+ * Since this is an NLA_F_DUMP command, the final message will always be
+ * NLMSG_DONE, even if an error occurs. However, this NLMSG_DONE message
+ * contains an integer error code. It is either zero or a negative error
+ * code corresponding to the errno.
+ *
+ * WG_CMD_SET_DEVICE
+ * -----------------
+ *
+ * May only be called via NLM_F_REQUEST. The command should contain the
+ * following tree of nested items, containing one but not both of
+ * WGDEVICE_A_IFINDEX and WGDEVICE_A_IFNAME:
+ *
+ * WGDEVICE_A_IFINDEX: NLA_U32
+ * WGDEVICE_A_IFNAME: NLA_NUL_STRING, maxlen IFNAMSIZ - 1
+ * WGDEVICE_A_FLAGS: NLA_U32, 0 or WGDEVICE_F_REPLACE_PEERS if all current
+ * peers should be removed prior to adding the list below.
+ * WGDEVICE_A_PRIVATE_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGDEVICE_A_LISTEN_PORT: NLA_U16, 0 to choose randomly
+ * WGDEVICE_A_FWMARK: NLA_U32, 0 to disable
+ * WGDEVICE_A_PEERS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGPEER_A_PUBLIC_KEY: len WG_KEY_LEN
+ * WGPEER_A_FLAGS: NLA_U32, 0 and/or WGPEER_F_REMOVE_ME if the
+ * specified peer should not exist at the end of the
+ * operation, rather than added/updated and/or
+ * WGPEER_F_REPLACE_ALLOWEDIPS if all current allowed
+ * IPs of this peer should be removed prior to adding
+ * the list below and/or WGPEER_F_UPDATE_ONLY if the
+ * peer should only be set if it already exists.
+ * WGPEER_A_PRESHARED_KEY: len WG_KEY_LEN, all zeros to remove
+ * WGPEER_A_ENDPOINT: struct sockaddr_in or struct sockaddr_in6
+ * WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL: NLA_U16, 0 to disable
+ * WGPEER_A_ALLOWEDIPS: NLA_NESTED
+ * 0: NLA_NESTED
+ * WGALLOWEDIP_A_FAMILY: NLA_U16
+ * WGALLOWEDIP_A_IPADDR: struct in_addr or struct in6_addr
+ * WGALLOWEDIP_A_CIDR_MASK: NLA_U8
+ * 0: NLA_NESTED
+ * ...
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ * WGPEER_A_PROTOCOL_VERSION: NLA_U32, should not be set or used at
+ * all by most users of this API, as the
+ * most recent protocol will be used when
+ * this is unset. Otherwise, must be set
+ * to 1.
+ * 0: NLA_NESTED
+ * ...
+ * ...
+ *
+ * It is possible that the amount of configuration data exceeds that of
+ * the maximum message length accepted by the kernel. In that case, several
+ * messages should be sent one after another, with each successive one
+ * filling in information not contained in the prior. Note that if
+ * WGDEVICE_F_REPLACE_PEERS is specified in the first message, it probably
+ * should not be specified in fragments that come after, so that the list
+ * of peers is only cleared the first time but appended after. Likewise for
+ * peers, if WGPEER_F_REPLACE_ALLOWEDIPS is specified in the first message
+ * of a peer, it likely should not be specified in subsequent fragments.
+ *
+ * If an error occurs, NLMSG_ERROR will reply containing an errno.
+ */
+
+#ifndef _WG_UAPI_WIREGUARD_H
+#define _WG_UAPI_WIREGUARD_H
+
+#define WG_GENL_NAME "wireguard"
+#define WG_GENL_VERSION 1
+
+#define WG_KEY_LEN 32
+
+enum wg_cmd {
+ WG_CMD_GET_DEVICE,
+ WG_CMD_SET_DEVICE,
+ __WG_CMD_MAX
+};
+#define WG_CMD_MAX (__WG_CMD_MAX - 1)
+
+enum wgdevice_flag {
+ WGDEVICE_F_REPLACE_PEERS = 1U << 0,
+ __WGDEVICE_F_ALL = WGDEVICE_F_REPLACE_PEERS
+};
+enum wgdevice_attribute {
+ WGDEVICE_A_UNSPEC,
+ WGDEVICE_A_IFINDEX,
+ WGDEVICE_A_IFNAME,
+ WGDEVICE_A_PRIVATE_KEY,
+ WGDEVICE_A_PUBLIC_KEY,
+ WGDEVICE_A_FLAGS,
+ WGDEVICE_A_LISTEN_PORT,
+ WGDEVICE_A_FWMARK,
+ WGDEVICE_A_PEERS,
+ __WGDEVICE_A_LAST
+};
+#define WGDEVICE_A_MAX (__WGDEVICE_A_LAST - 1)
+
+enum wgpeer_flag {
+ WGPEER_F_REMOVE_ME = 1U << 0,
+ WGPEER_F_REPLACE_ALLOWEDIPS = 1U << 1,
+ WGPEER_F_UPDATE_ONLY = 1U << 2,
+ __WGPEER_F_ALL = WGPEER_F_REMOVE_ME | WGPEER_F_REPLACE_ALLOWEDIPS |
+ WGPEER_F_UPDATE_ONLY
+};
+enum wgpeer_attribute {
+ WGPEER_A_UNSPEC,
+ WGPEER_A_PUBLIC_KEY,
+ WGPEER_A_PRESHARED_KEY,
+ WGPEER_A_FLAGS,
+ WGPEER_A_ENDPOINT,
+ WGPEER_A_PERSISTENT_KEEPALIVE_INTERVAL,
+ WGPEER_A_LAST_HANDSHAKE_TIME,
+ WGPEER_A_RX_BYTES,
+ WGPEER_A_TX_BYTES,
+ WGPEER_A_ALLOWEDIPS,
+ WGPEER_A_PROTOCOL_VERSION,
+ __WGPEER_A_LAST
+};
+#define WGPEER_A_MAX (__WGPEER_A_LAST - 1)
+
+enum wgallowedip_attribute {
+ WGALLOWEDIP_A_UNSPEC,
+ WGALLOWEDIP_A_FAMILY,
+ WGALLOWEDIP_A_IPADDR,
+ WGALLOWEDIP_A_CIDR_MASK,
+ __WGALLOWEDIP_A_LAST
+};
+#define WGALLOWEDIP_A_MAX (__WGALLOWEDIP_A_LAST - 1)
+
+#endif /* _WG_UAPI_WIREGUARD_H */
diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h
index 3f40501fc60b..2af7a1cd6658 100644
--- a/include/xen/interface/io/ring.h
+++ b/include/xen/interface/io/ring.h
@@ -125,35 +125,24 @@ struct __name##_back_ring { \
memset((_s)->pad, 0, sizeof((_s)->pad)); \
} while(0)
-#define FRONT_RING_INIT(_r, _s, __size) do { \
- (_r)->req_prod_pvt = 0; \
- (_r)->rsp_cons = 0; \
+#define FRONT_RING_ATTACH(_r, _s, _i, __size) do { \
+ (_r)->req_prod_pvt = (_i); \
+ (_r)->rsp_cons = (_i); \
(_r)->nr_ents = __RING_SIZE(_s, __size); \
(_r)->sring = (_s); \
} while (0)
-#define BACK_RING_INIT(_r, _s, __size) do { \
- (_r)->rsp_prod_pvt = 0; \
- (_r)->req_cons = 0; \
- (_r)->nr_ents = __RING_SIZE(_s, __size); \
- (_r)->sring = (_s); \
-} while (0)
+#define FRONT_RING_INIT(_r, _s, __size) FRONT_RING_ATTACH(_r, _s, 0, __size)
-/* Initialize to existing shared indexes -- for recovery */
-#define FRONT_RING_ATTACH(_r, _s, __size) do { \
- (_r)->sring = (_s); \
- (_r)->req_prod_pvt = (_s)->req_prod; \
- (_r)->rsp_cons = (_s)->rsp_prod; \
+#define BACK_RING_ATTACH(_r, _s, _i, __size) do { \
+ (_r)->rsp_prod_pvt = (_i); \
+ (_r)->req_cons = (_i); \
(_r)->nr_ents = __RING_SIZE(_s, __size); \
-} while (0)
-
-#define BACK_RING_ATTACH(_r, _s, __size) do { \
(_r)->sring = (_s); \
- (_r)->rsp_prod_pvt = (_s)->rsp_prod; \
- (_r)->req_cons = (_s)->req_prod; \
- (_r)->nr_ents = __RING_SIZE(_s, __size); \
} while (0)
+#define BACK_RING_INIT(_r, _s, __size) BACK_RING_ATTACH(_r, _s, 0, __size)
+
/* How big is this ring? */
#define RING_SIZE(_r) \
((_r)->nr_ents)
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index 869c816d5f8c..24228a102141 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -93,6 +93,7 @@ struct xenbus_device_id
struct xenbus_driver {
const char *name; /* defaults to ids[0].devicetype */
const struct xenbus_device_id *ids;
+ bool allow_rebind; /* avoid setting xenstore closed during remove */
int (*probe)(struct xenbus_device *dev,
const struct xenbus_device_id *id);
void (*otherend_changed)(struct xenbus_device *dev,
diff --git a/init/Kconfig b/init/Kconfig
index a34064a031a5..890aaa62efde 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1604,6 +1604,9 @@ config BPF_SYSCALL
Enable the bpf() system call that allows to manipulate eBPF
programs and maps via file descriptors.
+config ARCH_WANT_DEFAULT_BPF_JIT
+ bool
+
config BPF_JIT_ALWAYS_ON
bool "Permanently enable BPF JIT and remove BPF interpreter"
depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT
@@ -1611,6 +1614,10 @@ config BPF_JIT_ALWAYS_ON
Enables BPF JIT and removes BPF interpreter to avoid
speculative execution of BPF instructions by the interpreter
+config BPF_JIT_DEFAULT_ON
+ def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON
+ depends on HAVE_EBPF_JIT && BPF_JIT
+
config USERFAULTFD
bool "Enable userfaultfd() system call"
depends on MMU
diff --git a/init/do_mounts.c b/init/do_mounts.c
index af9cda887a23..0ae9cc22f2ae 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -387,12 +387,27 @@ static void __init get_fs_names(char *page)
*s = '\0';
}
-static int __init do_mount_root(char *name, char *fs, int flags, void *data)
+static int __init do_mount_root(const char *name, const char *fs,
+ const int flags, const void *data)
{
struct super_block *s;
- int err = ksys_mount(name, "/root", fs, flags, data);
- if (err)
- return err;
+ struct page *p = NULL;
+ char *data_page = NULL;
+ int ret;
+
+ if (data) {
+ /* do_mount() requires a full page as fifth argument */
+ p = alloc_page(GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+ data_page = page_address(p);
+ /* zero-pad. do_mount() will make sure it's terminated */
+ strncpy(data_page, data, PAGE_SIZE);
+ }
+
+ ret = do_mount(name, "/root", fs, flags, data_page);
+ if (ret)
+ goto out;
ksys_chdir("/root");
s = current->fs->pwd.dentry->d_sb;
@@ -402,7 +417,11 @@ static int __init do_mount_root(char *name, char *fs, int flags, void *data)
s->s_type->name,
sb_rdonly(s) ? " readonly" : "",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
- return 0;
+
+out:
+ if (p)
+ put_page(p);
+ return ret;
}
void __init mount_block_root(char *name, int flags)
@@ -670,8 +689,8 @@ void __init prepare_namespace(void)
mount_root();
out:
- devtmpfs_mount("dev");
- ksys_mount(".", "/", NULL, MS_MOVE, NULL);
+ devtmpfs_mount();
+ do_mount(".", "/", NULL, MS_MOVE, NULL);
ksys_chroot(".");
}
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index a9c6cc56f505..dab8b1151b56 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -48,13 +48,10 @@ early_param("initrd", early_initrd);
static int init_linuxrc(struct subprocess_info *info, struct cred *new)
{
ksys_unshare(CLONE_FS | CLONE_FILES);
- /* stdin/stdout/stderr for /linuxrc */
- ksys_open("/dev/console", O_RDWR, 0);
- ksys_dup(0);
- ksys_dup(0);
+ console_on_rootfs();
/* move initrd over / and chdir/chroot in initrd root */
ksys_chdir("/root");
- ksys_mount(".", "/", NULL, MS_MOVE, NULL);
+ do_mount(".", "/", NULL, MS_MOVE, NULL);
ksys_chroot(".");
ksys_setsid();
return 0;
@@ -89,7 +86,7 @@ static void __init handle_initrd(void)
current->flags &= ~PF_FREEZER_SKIP;
/* move initrd to rootfs' /old */
- ksys_mount("..", ".", NULL, MS_MOVE, NULL);
+ do_mount("..", ".", NULL, MS_MOVE, NULL);
/* switch root and cwd back to / of rootfs */
ksys_chroot("..");
@@ -103,7 +100,7 @@ static void __init handle_initrd(void)
mount_root();
printk(KERN_NOTICE "Trying to move old root to /initrd ... ");
- error = ksys_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL);
+ error = do_mount("/old", "/root/initrd", NULL, MS_MOVE, NULL);
if (!error)
printk("okay\n");
else {
diff --git a/init/main.c b/init/main.c
index 91f6ebb30ef0..2cd736059416 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1155,6 +1155,17 @@ static int __ref kernel_init(void *unused)
"See Linux Documentation/admin-guide/init.rst for guidance.");
}
+void console_on_rootfs(void)
+{
+ /* Open the /dev/console as stdin, this should never fail */
+ if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
+ pr_err("Warning: unable to open an initial console.\n");
+
+ /* create stdout/stderr */
+ (void) ksys_dup(0);
+ (void) ksys_dup(0);
+}
+
static noinline void __init kernel_init_freeable(void)
{
/*
@@ -1190,12 +1201,8 @@ static noinline void __init kernel_init_freeable(void)
do_basic_setup();
- /* Open the /dev/console on the rootfs, this should never fail */
- if (ksys_open((const char __user *) "/dev/console", O_RDWR, 0) < 0)
- pr_err("Warning: unable to open an initial console.\n");
+ console_on_rootfs();
- (void) ksys_dup(0);
- (void) ksys_dup(0);
/*
* check if there is an early userspace init. If yes, let it do all
* the work
diff --git a/ipc/util.c b/ipc/util.c
index d126d156efc6..915eacb9c059 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -100,7 +100,7 @@ device_initcall(ipc_init);
static const struct rhashtable_params ipc_kht_params = {
.head_offset = offsetof(struct kern_ipc_perm, khtnode),
.key_offset = offsetof(struct kern_ipc_perm, key),
- .key_len = FIELD_SIZEOF(struct kern_ipc_perm, key),
+ .key_len = sizeof_field(struct kern_ipc_perm, key),
.automatic_shrinking = true,
};
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 3f671bf617e8..d4f330351f87 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_JIT) += trampoline.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
+obj-$(CONFIG_BPF_JIT) += dispatcher.o
ifeq ($(CONFIG_NET),y)
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 7d40da240891..ed2075884724 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3470,6 +3470,7 @@ static u8 bpf_ctx_convert_map[] = {
[_id] = __ctx_convert##_id,
#include <linux/bpf_types.h>
#undef BPF_PROG_TYPE
+ 0, /* avoid empty array */
};
#undef BPF_MAP_TYPE
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 9f90d3c92bda..9a500fadbef5 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -35,8 +35,8 @@ void cgroup_bpf_offline(struct cgroup *cgrp)
*/
static void cgroup_bpf_release(struct work_struct *work)
{
- struct cgroup *cgrp = container_of(work, struct cgroup,
- bpf.release_work);
+ struct cgroup *p, *cgrp = container_of(work, struct cgroup,
+ bpf.release_work);
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *old_array;
unsigned int type;
@@ -65,6 +65,9 @@ static void cgroup_bpf_release(struct work_struct *work)
mutex_unlock(&cgroup_mutex);
+ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+ cgroup_bpf_put(p);
+
percpu_ref_exit(&cgrp->bpf.refcnt);
cgroup_put(cgrp);
}
@@ -103,8 +106,7 @@ static u32 prog_list_length(struct list_head *head)
* if parent has overridable or multi-prog, allow attaching
*/
static bool hierarchy_allows_attach(struct cgroup *cgrp,
- enum bpf_attach_type type,
- u32 new_flags)
+ enum bpf_attach_type type)
{
struct cgroup *p;
@@ -199,6 +201,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
*/
#define NR ARRAY_SIZE(cgrp->bpf.effective)
struct bpf_prog_array *arrays[NR] = {};
+ struct cgroup *p;
int ret, i;
ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
@@ -206,6 +209,9 @@ int cgroup_bpf_inherit(struct cgroup *cgrp)
if (ret)
return ret;
+ for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
+ cgroup_bpf_get(p);
+
for (i = 0; i < NR; i++)
INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
@@ -283,31 +289,34 @@ cleanup:
* propagate the change to descendants
* @cgrp: The cgroup which descendants to traverse
* @prog: A program to attach
+ * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
* @type: Type of attach operation
* @flags: Option flags
*
* Must be called with cgroup_mutex held.
*/
int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
+ struct bpf_prog *replace_prog,
enum bpf_attach_type type, u32 flags)
{
+ u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
+ struct bpf_prog_list *pl, *replace_pl = NULL;
enum bpf_cgroup_storage_type stype;
- struct bpf_prog_list *pl;
- bool pl_was_allocated;
int err;
- if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI))
+ if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
+ ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
/* invalid combination */
return -EINVAL;
- if (!hierarchy_allows_attach(cgrp, type, flags))
+ if (!hierarchy_allows_attach(cgrp, type))
return -EPERM;
- if (!list_empty(progs) && cgrp->bpf.flags[type] != flags)
+ if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
/* Disallow attaching non-overridable on top
* of existing overridable in this cgroup.
* Disallow attaching multi-prog if overridable or none
@@ -317,6 +326,21 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG;
+ if (flags & BPF_F_ALLOW_MULTI) {
+ list_for_each_entry(pl, progs, node) {
+ if (pl->prog == prog)
+ /* disallow attaching the same prog twice */
+ return -EINVAL;
+ if (pl->prog == replace_prog)
+ replace_pl = pl;
+ }
+ if ((flags & BPF_F_REPLACE) && !replace_pl)
+ /* prog to replace not found for cgroup */
+ return -ENOENT;
+ } else if (!list_empty(progs)) {
+ replace_pl = list_first_entry(progs, typeof(*pl), node);
+ }
+
for_each_cgroup_storage_type(stype) {
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
if (IS_ERR(storage[stype])) {
@@ -327,53 +351,28 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
}
}
- if (flags & BPF_F_ALLOW_MULTI) {
- list_for_each_entry(pl, progs, node) {
- if (pl->prog == prog) {
- /* disallow attaching the same prog twice */
- for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_free(storage[stype]);
- return -EINVAL;
- }
+ if (replace_pl) {
+ pl = replace_pl;
+ old_prog = pl->prog;
+ for_each_cgroup_storage_type(stype) {
+ old_storage[stype] = pl->storage[stype];
+ bpf_cgroup_storage_unlink(old_storage[stype]);
}
-
+ } else {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
return -ENOMEM;
}
-
- pl_was_allocated = true;
- pl->prog = prog;
- for_each_cgroup_storage_type(stype)
- pl->storage[stype] = storage[stype];
list_add_tail(&pl->node, progs);
- } else {
- if (list_empty(progs)) {
- pl = kmalloc(sizeof(*pl), GFP_KERNEL);
- if (!pl) {
- for_each_cgroup_storage_type(stype)
- bpf_cgroup_storage_free(storage[stype]);
- return -ENOMEM;
- }
- pl_was_allocated = true;
- list_add_tail(&pl->node, progs);
- } else {
- pl = list_first_entry(progs, typeof(*pl), node);
- old_prog = pl->prog;
- for_each_cgroup_storage_type(stype) {
- old_storage[stype] = pl->storage[stype];
- bpf_cgroup_storage_unlink(old_storage[stype]);
- }
- pl_was_allocated = false;
- }
- pl->prog = prog;
- for_each_cgroup_storage_type(stype)
- pl->storage[stype] = storage[stype];
}
- cgrp->bpf.flags[type] = flags;
+ pl->prog = prog;
+ for_each_cgroup_storage_type(stype)
+ pl->storage[stype] = storage[stype];
+
+ cgrp->bpf.flags[type] = saved_flags;
err = update_effective_progs(cgrp, type);
if (err)
@@ -401,7 +400,7 @@ cleanup:
pl->storage[stype] = old_storage[stype];
bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
}
- if (pl_was_allocated) {
+ if (!replace_pl) {
list_del(&pl->node);
kfree(pl);
}
@@ -539,6 +538,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog)
{
+ struct bpf_prog *replace_prog = NULL;
struct cgroup *cgrp;
int ret;
@@ -546,8 +546,20 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
if (IS_ERR(cgrp))
return PTR_ERR(cgrp);
- ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type,
+ if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
+ (attr->attach_flags & BPF_F_REPLACE)) {
+ replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
+ if (IS_ERR(replace_prog)) {
+ cgroup_put(cgrp);
+ return PTR_ERR(replace_prog);
+ }
+ }
+
+ ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type,
attr->attach_flags);
+
+ if (replace_prog)
+ bpf_prog_put(replace_prog);
cgroup_put(cgrp);
return ret;
}
@@ -1341,7 +1353,7 @@ static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct bpf_sysctl_kern, write,
- FIELD_SIZEOF(struct bpf_sysctl_kern,
+ sizeof_field(struct bpf_sysctl_kern,
write),
target_size));
break;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 49e32acad7d8..29d47aae0dd1 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -222,8 +222,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
u32 pages, delta;
int ret;
- BUG_ON(fp_old == NULL);
-
size = round_up(size, PAGE_SIZE);
pages = size / PAGE_SIZE;
if (pages <= fp_old->pages)
@@ -520,9 +518,9 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
#ifdef CONFIG_BPF_JIT
/* All BPF JIT sysctl knobs here. */
-int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
+int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
+int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON);
int bpf_jit_harden __read_mostly;
-int bpf_jit_kallsyms __read_mostly;
long bpf_jit_limit __read_mostly;
static __always_inline void
@@ -2043,23 +2041,28 @@ static void bpf_free_cgroup_storage(struct bpf_prog_aux *aux)
for_each_cgroup_storage_type(stype) {
if (!aux->cgroup_storage[stype])
continue;
- bpf_cgroup_storage_release(aux->prog,
- aux->cgroup_storage[stype]);
+ bpf_cgroup_storage_release(aux, aux->cgroup_storage[stype]);
}
}
-static void bpf_free_used_maps(struct bpf_prog_aux *aux)
+void __bpf_free_used_maps(struct bpf_prog_aux *aux,
+ struct bpf_map **used_maps, u32 len)
{
struct bpf_map *map;
- int i;
+ u32 i;
bpf_free_cgroup_storage(aux);
- for (i = 0; i < aux->used_map_cnt; i++) {
- map = aux->used_maps[i];
+ for (i = 0; i < len; i++) {
+ map = used_maps[i];
if (map->ops->map_poke_untrack)
map->ops->map_poke_untrack(map, aux);
bpf_map_put(map);
}
+}
+
+static void bpf_free_used_maps(struct bpf_prog_aux *aux)
+{
+ __bpf_free_used_maps(aux, aux->used_maps, aux->used_map_cnt);
kfree(aux->used_maps);
}
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index ef49e17ae47c..70f71b154fa5 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -72,17 +72,18 @@ struct bpf_cpu_map {
struct bpf_map map;
/* Below members specific for map type */
struct bpf_cpu_map_entry **cpu_map;
- struct list_head __percpu *flush_list;
};
-static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx);
+static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list);
+
+static int bq_flush_to_queue(struct xdp_bulk_queue *bq);
static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
{
struct bpf_cpu_map *cmap;
int err = -ENOMEM;
- int ret, cpu;
u64 cost;
+ int ret;
if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
@@ -106,7 +107,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
- cost += sizeof(struct list_head) * num_possible_cpus();
/* Notice returns -EPERM on if map size is larger than memlock limit */
ret = bpf_map_charge_init(&cmap->map.memory, cost);
@@ -115,23 +115,14 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
goto free_cmap;
}
- cmap->flush_list = alloc_percpu(struct list_head);
- if (!cmap->flush_list)
- goto free_charge;
-
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(per_cpu_ptr(cmap->flush_list, cpu));
-
/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
sizeof(struct bpf_cpu_map_entry *),
cmap->map.numa_node);
if (!cmap->cpu_map)
- goto free_percpu;
+ goto free_charge;
return &cmap->map;
-free_percpu:
- free_percpu(cmap->flush_list);
free_charge:
bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
@@ -399,22 +390,14 @@ free_rcu:
static void __cpu_map_entry_free(struct rcu_head *rcu)
{
struct bpf_cpu_map_entry *rcpu;
- int cpu;
/* This cpu_map_entry have been disconnected from map and one
- * RCU graze-period have elapsed. Thus, XDP cannot queue any
+ * RCU grace-period have elapsed. Thus, XDP cannot queue any
* new packets and cannot change/set flush_needed that can
* find this entry.
*/
rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu);
- /* Flush remaining packets in percpu bulkq */
- for_each_online_cpu(cpu) {
- struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu);
-
- /* No concurrent bq_enqueue can run at this point */
- bq_flush_to_queue(bq, false);
- }
free_percpu(rcpu->bulkq);
/* Cannot kthread_stop() here, last put free rcpu resources */
put_cpu_map_entry(rcpu);
@@ -436,7 +419,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu)
* percpu bulkq to queue. Due to caller map_delete_elem() disable
* preemption, cannot call kthread_stop() to make sure queue is empty.
* Instead a work_queue is started for stopping kthread,
- * cpu_map_kthread_stop, which waits for an RCU graze period before
+ * cpu_map_kthread_stop, which waits for an RCU grace period before
* stopping kthread, emptying the queue.
*/
static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap,
@@ -507,7 +490,6 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value,
static void cpu_map_free(struct bpf_map *map)
{
struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
- int cpu;
u32 i;
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
@@ -522,18 +504,6 @@ static void cpu_map_free(struct bpf_map *map)
bpf_clear_redirect_map(map);
synchronize_rcu();
- /* To ensure all pending flush operations have completed wait for flush
- * list be empty on _all_ cpus. Because the above synchronize_rcu()
- * ensures the map is disconnected from the program we can assume no new
- * items will be added to the list.
- */
- for_each_online_cpu(cpu) {
- struct list_head *flush_list = per_cpu_ptr(cmap->flush_list, cpu);
-
- while (!list_empty(flush_list))
- cond_resched();
- }
-
/* For cpu_map the remote CPUs can still be using the entries
* (struct bpf_cpu_map_entry).
*/
@@ -544,10 +514,9 @@ static void cpu_map_free(struct bpf_map *map)
if (!rcpu)
continue;
- /* bq flush and cleanup happens after RCU graze-period */
+ /* bq flush and cleanup happens after RCU grace-period */
__cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */
}
- free_percpu(cmap->flush_list);
bpf_map_area_free(cmap->cpu_map);
kfree(cmap);
}
@@ -599,7 +568,7 @@ const struct bpf_map_ops cpu_map_ops = {
.map_check_btf = map_check_no_btf,
};
-static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
+static int bq_flush_to_queue(struct xdp_bulk_queue *bq)
{
struct bpf_cpu_map_entry *rcpu = bq->obj;
unsigned int processed = 0, drops = 0;
@@ -620,10 +589,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
err = __ptr_ring_produce(q, xdpf);
if (err) {
drops++;
- if (likely(in_napi_ctx))
- xdp_return_frame_rx_napi(xdpf);
- else
- xdp_return_frame(xdpf);
+ xdp_return_frame_rx_napi(xdpf);
}
processed++;
}
@@ -642,11 +608,11 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx)
*/
static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf)
{
- struct list_head *flush_list = this_cpu_ptr(rcpu->cmap->flush_list);
+ struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq);
if (unlikely(bq->count == CPU_MAP_BULK_SIZE))
- bq_flush_to_queue(bq, true);
+ bq_flush_to_queue(bq);
/* Notice, xdp_buff/page MUST be queued here, long enough for
* driver to code invoking us to finished, due to driver
@@ -681,16 +647,26 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
return 0;
}
-void __cpu_map_flush(struct bpf_map *map)
+void __cpu_map_flush(void)
{
- struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map);
- struct list_head *flush_list = this_cpu_ptr(cmap->flush_list);
+ struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list);
struct xdp_bulk_queue *bq, *tmp;
list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
- bq_flush_to_queue(bq, true);
+ bq_flush_to_queue(bq);
/* If already running, costs spin_lock_irqsave + smb_mb */
wake_up_process(bq->obj->kthread);
}
}
+
+static int __init cpu_map_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu));
+ return 0;
+}
+
+subsys_initcall(cpu_map_init);
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 3d3d61b5985b..da9c832fc5c8 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -75,7 +75,6 @@ struct bpf_dtab_netdev {
struct bpf_dtab {
struct bpf_map map;
struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */
- struct list_head __percpu *flush_list;
struct list_head list;
/* these are only used for DEVMAP_HASH type maps */
@@ -85,6 +84,7 @@ struct bpf_dtab {
u32 n_buckets;
};
+static DEFINE_PER_CPU(struct list_head, dev_map_flush_list);
static DEFINE_SPINLOCK(dev_map_lock);
static LIST_HEAD(dev_map_list);
@@ -109,8 +109,8 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
- int err, cpu;
- u64 cost;
+ u64 cost = 0;
+ int err;
/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
@@ -125,9 +125,6 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
bpf_map_init_from_attr(&dtab->map, attr);
- /* make sure page count doesn't overflow */
- cost = (u64) sizeof(struct list_head) * num_possible_cpus();
-
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries);
@@ -143,17 +140,10 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
if (err)
return -EINVAL;
- dtab->flush_list = alloc_percpu(struct list_head);
- if (!dtab->flush_list)
- goto free_charge;
-
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu));
-
if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets);
if (!dtab->dev_index_head)
- goto free_percpu;
+ goto free_charge;
spin_lock_init(&dtab->index_lock);
} else {
@@ -161,13 +151,11 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
- goto free_percpu;
+ goto free_charge;
}
return 0;
-free_percpu:
- free_percpu(dtab->flush_list);
free_charge:
bpf_map_charge_finish(&dtab->map.memory);
return -ENOMEM;
@@ -201,7 +189,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
static void dev_map_free(struct bpf_map *map)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- int i, cpu;
+ int i;
/* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
* so the programs (can be more than one that used this map) were
@@ -221,18 +209,6 @@ static void dev_map_free(struct bpf_map *map)
/* Make sure prior __dev_map_entry_free() have completed. */
rcu_barrier();
- /* To ensure all pending flush operations have completed wait for flush
- * list to empty on _all_ cpus.
- * Because the above synchronize_rcu() ensures the map is disconnected
- * from the program we can assume no new items will be added.
- */
- for_each_online_cpu(cpu) {
- struct list_head *flush_list = per_cpu_ptr(dtab->flush_list, cpu);
-
- while (!list_empty(flush_list))
- cond_resched();
- }
-
if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
for (i = 0; i < dtab->n_buckets; i++) {
struct bpf_dtab_netdev *dev;
@@ -266,7 +242,6 @@ static void dev_map_free(struct bpf_map *map)
bpf_map_area_free(dtab->netdev_map);
}
- free_percpu(dtab->flush_list);
kfree(dtab);
}
@@ -345,8 +320,7 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT;
}
-static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags,
- bool in_napi_ctx)
+static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags)
{
struct bpf_dtab_netdev *obj = bq->obj;
struct net_device *dev = obj->dev;
@@ -384,11 +358,7 @@ error:
for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
- /* RX path under NAPI protection, can return frames faster */
- if (likely(in_napi_ctx))
- xdp_return_frame_rx_napi(xdpf);
- else
- xdp_return_frame(xdpf);
+ xdp_return_frame_rx_napi(xdpf);
drops++;
}
goto out;
@@ -401,15 +371,14 @@ error:
* net device can be torn down. On devmap tear down we ensure the flush list
* is empty before completing to ensure all flush operations have completed.
*/
-void __dev_map_flush(struct bpf_map *map)
+void __dev_map_flush(void)
{
- struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
- struct list_head *flush_list = this_cpu_ptr(dtab->flush_list);
+ struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
struct xdp_bulk_queue *bq, *tmp;
rcu_read_lock();
list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
- bq_xmit_all(bq, XDP_XMIT_FLUSH, true);
+ bq_xmit_all(bq, XDP_XMIT_FLUSH);
rcu_read_unlock();
}
@@ -436,11 +405,11 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf,
struct net_device *dev_rx)
{
- struct list_head *flush_list = this_cpu_ptr(obj->dtab->flush_list);
+ struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list);
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);
if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
- bq_xmit_all(bq, 0, true);
+ bq_xmit_all(bq, 0);
/* Ingress dev_rx will be the same for all xdp_frame's in
* bulk_queue, because bq stored per-CPU and must be flushed
@@ -509,27 +478,11 @@ static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
return dev ? &dev->ifindex : NULL;
}
-static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
-{
- if (dev->dev->netdev_ops->ndo_xdp_xmit) {
- struct xdp_bulk_queue *bq;
- int cpu;
-
- rcu_read_lock();
- for_each_online_cpu(cpu) {
- bq = per_cpu_ptr(dev->bulkq, cpu);
- bq_xmit_all(bq, XDP_XMIT_FLUSH, false);
- }
- rcu_read_unlock();
- }
-}
-
static void __dev_map_entry_free(struct rcu_head *rcu)
{
struct bpf_dtab_netdev *dev;
dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
- dev_map_flush_old(dev);
free_percpu(dev->bulkq);
dev_put(dev->dev);
kfree(dev);
@@ -810,10 +763,15 @@ static struct notifier_block dev_map_notifier = {
static int __init dev_map_init(void)
{
+ int cpu;
+
/* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */
BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) !=
offsetof(struct _bpf_dtab_netdev, dev));
register_netdevice_notifier(&dev_map_notifier);
+
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(&per_cpu(dev_map_flush_list, cpu));
return 0;
}
diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
new file mode 100644
index 000000000000..204ee61a3904
--- /dev/null
+++ b/kernel/bpf/dispatcher.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2019 Intel Corporation. */
+
+#include <linux/hash.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+
+/* The BPF dispatcher is a multiway branch code generator. The
+ * dispatcher is a mechanism to avoid the performance penalty of an
+ * indirect call, which is expensive when retpolines are enabled. A
+ * dispatch client registers a BPF program into the dispatcher, and if
+ * there is available room in the dispatcher a direct call to the BPF
+ * program will be generated. All calls to the BPF programs called via
+ * the dispatcher will then be a direct call, instead of an
+ * indirect. The dispatcher hijacks a trampoline function it via the
+ * __fentry__ of the trampoline. The trampoline function has the
+ * following signature:
+ *
+ * unsigned int trampoline(const void *ctx, const struct bpf_insn *insnsi,
+ * unsigned int (*bpf_func)(const void *,
+ * const struct bpf_insn *));
+ */
+
+static struct bpf_dispatcher_prog *bpf_dispatcher_find_prog(
+ struct bpf_dispatcher *d, struct bpf_prog *prog)
+{
+ int i;
+
+ for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
+ if (prog == d->progs[i].prog)
+ return &d->progs[i];
+ }
+ return NULL;
+}
+
+static struct bpf_dispatcher_prog *bpf_dispatcher_find_free(
+ struct bpf_dispatcher *d)
+{
+ return bpf_dispatcher_find_prog(d, NULL);
+}
+
+static bool bpf_dispatcher_add_prog(struct bpf_dispatcher *d,
+ struct bpf_prog *prog)
+{
+ struct bpf_dispatcher_prog *entry;
+
+ if (!prog)
+ return false;
+
+ entry = bpf_dispatcher_find_prog(d, prog);
+ if (entry) {
+ refcount_inc(&entry->users);
+ return false;
+ }
+
+ entry = bpf_dispatcher_find_free(d);
+ if (!entry)
+ return false;
+
+ bpf_prog_inc(prog);
+ entry->prog = prog;
+ refcount_set(&entry->users, 1);
+ d->num_progs++;
+ return true;
+}
+
+static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d,
+ struct bpf_prog *prog)
+{
+ struct bpf_dispatcher_prog *entry;
+
+ if (!prog)
+ return false;
+
+ entry = bpf_dispatcher_find_prog(d, prog);
+ if (!entry)
+ return false;
+
+ if (refcount_dec_and_test(&entry->users)) {
+ entry->prog = NULL;
+ bpf_prog_put(prog);
+ d->num_progs--;
+ return true;
+ }
+ return false;
+}
+
+int __weak arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs)
+{
+ return -ENOTSUPP;
+}
+
+static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image)
+{
+ s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0];
+ int i;
+
+ for (i = 0; i < BPF_DISPATCHER_MAX; i++) {
+ if (d->progs[i].prog)
+ *ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func;
+ }
+ return arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs);
+}
+
+static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
+{
+ void *old, *new;
+ u32 noff;
+ int err;
+
+ if (!prev_num_progs) {
+ old = NULL;
+ noff = 0;
+ } else {
+ old = d->image + d->image_off;
+ noff = d->image_off ^ (PAGE_SIZE / 2);
+ }
+
+ new = d->num_progs ? d->image + noff : NULL;
+ if (new) {
+ if (bpf_dispatcher_prepare(d, new))
+ return;
+ }
+
+ err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new);
+ if (err || !new)
+ return;
+
+ d->image_off = noff;
+}
+
+void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from,
+ struct bpf_prog *to)
+{
+ bool changed = false;
+ int prev_num_progs;
+
+ if (from == to)
+ return;
+
+ mutex_lock(&d->mutex);
+ if (!d->image) {
+ d->image = bpf_jit_alloc_exec_page();
+ if (!d->image)
+ goto out;
+ }
+
+ prev_num_progs = d->num_progs;
+ changed |= bpf_dispatcher_remove_prog(d, from);
+ changed |= bpf_dispatcher_add_prog(d, to);
+
+ if (!changed)
+ goto out;
+
+ bpf_dispatcher_update(d, prev_num_progs);
+out:
+ mutex_unlock(&d->mutex);
+}
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 2ba750725cb2..33d01866bcc2 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -20,7 +20,7 @@ struct bpf_cgroup_storage_map {
struct bpf_map map;
spinlock_t lock;
- struct bpf_prog *prog;
+ struct bpf_prog_aux *aux;
struct rb_root root;
struct list_head list;
};
@@ -357,7 +357,7 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
* The first field must be a 64 bit integer at 0 offset.
*/
m = (struct btf_member *)(key_type + 1);
- size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, cgroup_inode_id);
+ size = sizeof_field(struct bpf_cgroup_storage_key, cgroup_inode_id);
if (!btf_member_is_reg_int(btf, key_type, m, 0, size))
return -EINVAL;
@@ -366,7 +366,7 @@ static int cgroup_storage_check_btf(const struct bpf_map *map,
*/
m++;
offset = offsetof(struct bpf_cgroup_storage_key, attach_type);
- size = FIELD_SIZEOF(struct bpf_cgroup_storage_key, attach_type);
+ size = sizeof_field(struct bpf_cgroup_storage_key, attach_type);
if (!btf_member_is_reg_int(btf, key_type, m, offset, size))
return -EINVAL;
@@ -420,7 +420,7 @@ const struct bpf_map_ops cgroup_storage_map_ops = {
.map_seq_show_elem = cgroup_storage_seq_show_elem,
};
-int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
+int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux, struct bpf_map *_map)
{
enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
@@ -428,14 +428,14 @@ int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
spin_lock_bh(&map->lock);
- if (map->prog && map->prog != prog)
+ if (map->aux && map->aux != aux)
goto unlock;
- if (prog->aux->cgroup_storage[stype] &&
- prog->aux->cgroup_storage[stype] != _map)
+ if (aux->cgroup_storage[stype] &&
+ aux->cgroup_storage[stype] != _map)
goto unlock;
- map->prog = prog;
- prog->aux->cgroup_storage[stype] = _map;
+ map->aux = aux;
+ aux->cgroup_storage[stype] = _map;
ret = 0;
unlock:
spin_unlock_bh(&map->lock);
@@ -443,16 +443,16 @@ unlock:
return ret;
}
-void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
+void bpf_cgroup_storage_release(struct bpf_prog_aux *aux, struct bpf_map *_map)
{
enum bpf_cgroup_storage_type stype = cgroup_storage_type(_map);
struct bpf_cgroup_storage_map *map = map_to_storage(_map);
spin_lock_bh(&map->lock);
- if (map->prog == prog) {
- WARN_ON(prog->aux->cgroup_storage[stype] != _map);
- map->prog = NULL;
- prog->aux->cgroup_storage[stype] = NULL;
+ if (map->aux == aux) {
+ WARN_ON(aux->cgroup_storage[stype] != _map);
+ map->aux = NULL;
+ aux->cgroup_storage[stype] = NULL;
}
spin_unlock_bh(&map->lock);
}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e3461ec59570..81ee8595dfee 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -23,6 +23,7 @@
#include <linux/timekeeping.h>
#include <linux/ctype.h>
#include <linux/nospec.h>
+#include <linux/audit.h>
#include <uapi/linux/btf.h>
#define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
@@ -1306,6 +1307,36 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
return 0;
}
+enum bpf_audit {
+ BPF_AUDIT_LOAD,
+ BPF_AUDIT_UNLOAD,
+ BPF_AUDIT_MAX,
+};
+
+static const char * const bpf_audit_str[BPF_AUDIT_MAX] = {
+ [BPF_AUDIT_LOAD] = "LOAD",
+ [BPF_AUDIT_UNLOAD] = "UNLOAD",
+};
+
+static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op)
+{
+ struct audit_context *ctx = NULL;
+ struct audit_buffer *ab;
+
+ if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX))
+ return;
+ if (audit_enabled == AUDIT_OFF)
+ return;
+ if (op == BPF_AUDIT_LOAD)
+ ctx = audit_context();
+ ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF);
+ if (unlikely(!ab))
+ return;
+ audit_log_format(ab, "prog-id=%u op=%s",
+ prog->aux->id, bpf_audit_str[op]);
+ audit_log_end(ab);
+}
+
int __bpf_prog_charge(struct user_struct *user, u32 pages)
{
unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
@@ -1421,6 +1452,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
{
if (atomic64_dec_and_test(&prog->aux->refcnt)) {
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0);
+ bpf_audit_prog(prog, BPF_AUDIT_UNLOAD);
/* bpf_prog_free_id() must be called first */
bpf_prog_free_id(prog, do_idr_lock);
__bpf_prog_put_noref(prog, true);
@@ -1830,6 +1862,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
*/
bpf_prog_kallsyms_add(prog);
perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0);
+ bpf_audit_prog(prog, BPF_AUDIT_LOAD);
err = bpf_prog_new_fd(prog);
if (err < 0)
@@ -2040,10 +2073,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
}
}
-#define BPF_PROG_ATTACH_LAST_FIELD attach_flags
+#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd
#define BPF_F_ATTACH_MASK \
- (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)
+ (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE)
static int bpf_prog_attach(const union bpf_attr *attr)
{
@@ -2305,17 +2338,12 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
#define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
-static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
+struct bpf_prog *bpf_prog_by_id(u32 id)
{
struct bpf_prog *prog;
- u32 id = attr->prog_id;
- int fd;
-
- if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
- return -EINVAL;
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
+ if (!id)
+ return ERR_PTR(-ENOENT);
spin_lock_bh(&prog_idr_lock);
prog = idr_find(&prog_idr, id);
@@ -2324,7 +2352,22 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
else
prog = ERR_PTR(-ENOENT);
spin_unlock_bh(&prog_idr_lock);
+ return prog;
+}
+
+static int bpf_prog_get_fd_by_id(const union bpf_attr *attr)
+{
+ struct bpf_prog *prog;
+ u32 id = attr->prog_id;
+ int fd;
+
+ if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID))
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ prog = bpf_prog_by_id(id);
if (IS_ERR(prog))
return PTR_ERR(prog);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index 7e89f1f49d77..505f4e4b31d2 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -3,6 +3,7 @@
#include <linux/hash.h>
#include <linux/bpf.h>
#include <linux/filter.h>
+#include <linux/ftrace.h>
/* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */
#define TRAMPOLINE_HASH_BITS 10
@@ -13,6 +14,22 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE];
/* serializes access to trampoline_table */
static DEFINE_MUTEX(trampoline_mutex);
+void *bpf_jit_alloc_exec_page(void)
+{
+ void *image;
+
+ image = bpf_jit_alloc_exec(PAGE_SIZE);
+ if (!image)
+ return NULL;
+
+ set_vm_flush_reset_perms(image);
+ /* Keep image as writeable. The alternative is to keep flipping ro/rw
+ * everytime new program is attached or detached.
+ */
+ set_memory_x((long)image, 1);
+ return image;
+}
+
struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
{
struct bpf_trampoline *tr;
@@ -33,7 +50,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
goto out;
/* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
- image = bpf_jit_alloc_exec(PAGE_SIZE);
+ image = bpf_jit_alloc_exec_page();
if (!image) {
kfree(tr);
tr = NULL;
@@ -47,18 +64,66 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
mutex_init(&tr->mutex);
for (i = 0; i < BPF_TRAMP_MAX; i++)
INIT_HLIST_HEAD(&tr->progs_hlist[i]);
-
- set_vm_flush_reset_perms(image);
- /* Keep image as writeable. The alternative is to keep flipping ro/rw
- * everytime new program is attached or detached.
- */
- set_memory_x((long)image, 1);
tr->image = image;
out:
mutex_unlock(&trampoline_mutex);
return tr;
}
+static int is_ftrace_location(void *ip)
+{
+ long addr;
+
+ addr = ftrace_location((long)ip);
+ if (!addr)
+ return 0;
+ if (WARN_ON_ONCE(addr != (long)ip))
+ return -EFAULT;
+ return 1;
+}
+
+static int unregister_fentry(struct bpf_trampoline *tr, void *old_addr)
+{
+ void *ip = tr->func.addr;
+ int ret;
+
+ if (tr->func.ftrace_managed)
+ ret = unregister_ftrace_direct((long)ip, (long)old_addr);
+ else
+ ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, NULL);
+ return ret;
+}
+
+static int modify_fentry(struct bpf_trampoline *tr, void *old_addr, void *new_addr)
+{
+ void *ip = tr->func.addr;
+ int ret;
+
+ if (tr->func.ftrace_managed)
+ ret = modify_ftrace_direct((long)ip, (long)old_addr, (long)new_addr);
+ else
+ ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, old_addr, new_addr);
+ return ret;
+}
+
+/* first time registering */
+static int register_fentry(struct bpf_trampoline *tr, void *new_addr)
+{
+ void *ip = tr->func.addr;
+ int ret;
+
+ ret = is_ftrace_location(ip);
+ if (ret < 0)
+ return ret;
+ tr->func.ftrace_managed = ret;
+
+ if (tr->func.ftrace_managed)
+ ret = register_ftrace_direct((long)ip, (long)new_addr);
+ else
+ ret = bpf_arch_text_poke(ip, BPF_MOD_CALL, NULL, new_addr);
+ return ret;
+}
+
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into PAGE_SIZE / 2
*/
@@ -77,8 +142,7 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
int err;
if (fentry_cnt + fexit_cnt == 0) {
- err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL,
- old_image, NULL);
+ err = unregister_fentry(tr, old_image);
tr->selector = 0;
goto out;
}
@@ -105,12 +169,10 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
if (tr->selector)
/* progs already running at this address */
- err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL,
- old_image, new_image);
+ err = modify_fentry(tr, old_image, new_image);
else
/* first time registering */
- err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL, NULL,
- new_image);
+ err = register_fentry(tr, new_image);
if (err)
goto out;
tr->selector++;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 034ef81f935b..ce85e7041f0c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -907,7 +907,8 @@ static const int caller_saved[CALLER_SAVED_REGS] = {
BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
};
-static void __mark_reg_not_init(struct bpf_reg_state *reg);
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg);
/* Mark the unknown part of a register (variable offset or scalar value) as
* known to have the value @imm.
@@ -945,7 +946,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs */
for (regno = 0; regno < MAX_BPF_REG; regno++)
- __mark_reg_not_init(regs + regno);
+ __mark_reg_not_init(env, regs + regno);
return;
}
__mark_reg_known_zero(regs + regno);
@@ -1054,7 +1055,8 @@ static void __mark_reg_unbounded(struct bpf_reg_state *reg)
}
/* Mark a register as having a completely unknown (scalar) value. */
-static void __mark_reg_unknown(struct bpf_reg_state *reg)
+static void __mark_reg_unknown(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
{
/*
* Clear type, id, off, and union(map_ptr, range) and
@@ -1064,6 +1066,8 @@ static void __mark_reg_unknown(struct bpf_reg_state *reg)
reg->type = SCALAR_VALUE;
reg->var_off = tnum_unknown;
reg->frameno = 0;
+ reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
+ true : false;
__mark_reg_unbounded(reg);
}
@@ -1074,19 +1078,16 @@ static void mark_reg_unknown(struct bpf_verifier_env *env,
verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs except FP */
for (regno = 0; regno < BPF_REG_FP; regno++)
- __mark_reg_not_init(regs + regno);
+ __mark_reg_not_init(env, regs + regno);
return;
}
- regs += regno;
- __mark_reg_unknown(regs);
- /* constant backtracking is enabled for root without bpf2bpf calls */
- regs->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks ?
- true : false;
+ __mark_reg_unknown(env, regs + regno);
}
-static void __mark_reg_not_init(struct bpf_reg_state *reg)
+static void __mark_reg_not_init(const struct bpf_verifier_env *env,
+ struct bpf_reg_state *reg)
{
- __mark_reg_unknown(reg);
+ __mark_reg_unknown(env, reg);
reg->type = NOT_INIT;
}
@@ -1097,10 +1098,10 @@ static void mark_reg_not_init(struct bpf_verifier_env *env,
verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
/* Something bad happened, let's kill all regs except FP */
for (regno = 0; regno < BPF_REG_FP; regno++)
- __mark_reg_not_init(regs + regno);
+ __mark_reg_not_init(env, regs + regno);
return;
}
- __mark_reg_not_init(regs + regno);
+ __mark_reg_not_init(env, regs + regno);
}
#define DEF_NOT_SUBREG (0)
@@ -3234,7 +3235,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
}
if (state->stack[spi].slot_type[0] == STACK_SPILL &&
state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
- __mark_reg_unknown(&state->stack[spi].spilled_ptr);
+ __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
for (j = 0; j < BPF_REG_SIZE; j++)
state->stack[spi].slot_type[j] = STACK_MISC;
goto mark;
@@ -3892,7 +3893,7 @@ static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
if (!reg)
continue;
if (reg_is_pkt_pointer_any(reg))
- __mark_reg_unknown(reg);
+ __mark_reg_unknown(env, reg);
}
}
@@ -3920,7 +3921,7 @@ static void release_reg_references(struct bpf_verifier_env *env,
if (!reg)
continue;
if (reg->ref_obj_id == ref_obj_id)
- __mark_reg_unknown(reg);
+ __mark_reg_unknown(env, reg);
}
}
@@ -4134,6 +4135,7 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
struct bpf_map *map = meta->map_ptr;
struct tnum range;
u64 val;
+ int err;
if (func_id != BPF_FUNC_tail_call)
return 0;
@@ -4150,6 +4152,10 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
return 0;
}
+ err = mark_chain_precision(env, BPF_REG_3);
+ if (err)
+ return err;
+
val = reg->var_off.value;
if (bpf_map_key_unseen(aux))
bpf_map_key_store(aux, val);
@@ -4577,7 +4583,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
/* Taint dst register if offset had invalid bounds derived from
* e.g. dead branches.
*/
- __mark_reg_unknown(dst_reg);
+ __mark_reg_unknown(env, dst_reg);
return 0;
}
@@ -4829,13 +4835,13 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
/* Taint dst register if offset had invalid bounds derived from
* e.g. dead branches.
*/
- __mark_reg_unknown(dst_reg);
+ __mark_reg_unknown(env, dst_reg);
return 0;
}
if (!src_known &&
opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
- __mark_reg_unknown(dst_reg);
+ __mark_reg_unknown(env, dst_reg);
return 0;
}
@@ -6258,6 +6264,7 @@ static bool may_access_skb(enum bpf_prog_type type)
static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
{
struct bpf_reg_state *regs = cur_regs(env);
+ static const int ctx_reg = BPF_REG_6;
u8 mode = BPF_MODE(insn->code);
int i, err;
@@ -6291,7 +6298,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
}
/* check whether implicit source operand (register R6) is readable */
- err = check_reg_arg(env, BPF_REG_6, SRC_OP);
+ err = check_reg_arg(env, ctx_reg, SRC_OP);
if (err)
return err;
@@ -6310,7 +6317,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
- if (regs[BPF_REG_6].type != PTR_TO_CTX) {
+ if (regs[ctx_reg].type != PTR_TO_CTX) {
verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
return -EINVAL;
@@ -6323,6 +6330,10 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return err;
}
+ err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
+ if (err < 0)
+ return err;
+
/* reset caller saved regs to unreadable */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
@@ -6977,7 +6988,7 @@ static void clean_func_state(struct bpf_verifier_env *env,
/* since the register is unused, clear its state
* to make further comparison simpler
*/
- __mark_reg_not_init(&st->regs[i]);
+ __mark_reg_not_init(env, &st->regs[i]);
}
for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
@@ -6985,7 +6996,7 @@ static void clean_func_state(struct bpf_verifier_env *env,
/* liveness must not touch this stack slot anymore */
st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
if (!(live & REG_LIVE_READ)) {
- __mark_reg_not_init(&st->stack[i].spilled_ptr);
+ __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
for (j = 0; j < BPF_REG_SIZE; j++)
st->stack[i].slot_type[j] = STACK_INVALID;
}
@@ -8268,7 +8279,7 @@ static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
env->used_maps[env->used_map_cnt++] = map;
if (bpf_map_is_cgroup_storage(map) &&
- bpf_cgroup_storage_assign(env->prog, map)) {
+ bpf_cgroup_storage_assign(env->prog->aux, map)) {
verbose(env, "only one cgroup storage of each type is allowed\n");
fdput(f);
return -EBUSY;
@@ -8298,18 +8309,8 @@ next_insn:
/* drop refcnt of maps used by the rejected program */
static void release_maps(struct bpf_verifier_env *env)
{
- enum bpf_cgroup_storage_type stype;
- int i;
-
- for_each_cgroup_storage_type(stype) {
- if (!env->prog->aux->cgroup_storage[stype])
- continue;
- bpf_cgroup_storage_release(env->prog,
- env->prog->aux->cgroup_storage[stype]);
- }
-
- for (i = 0; i < env->used_map_cnt; i++)
- bpf_map_put(env->used_maps[i]);
+ __bpf_free_used_maps(env->prog->aux, env->used_maps,
+ env->used_map_cnt);
}
/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
@@ -9282,7 +9283,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
insn->code = BPF_JMP | BPF_TAIL_CALL;
aux = &env->insn_aux_data[i + delta];
- if (prog->jit_requested && !expect_blinding &&
+ if (env->allow_ptr_leaks && !expect_blinding &&
+ prog->jit_requested &&
!bpf_map_key_poisoned(aux) &&
!bpf_map_ptr_poisoned(aux) &&
!bpf_map_ptr_unpriv(aux)) {
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
index 90c4fce1c981..2cc5c8f4c800 100644
--- a/kernel/bpf/xskmap.c
+++ b/kernel/bpf/xskmap.c
@@ -72,9 +72,9 @@ static void xsk_map_sock_delete(struct xdp_sock *xs,
static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
{
struct bpf_map_memory mem;
- int cpu, err, numa_node;
+ int err, numa_node;
struct xsk_map *m;
- u64 cost, size;
+ u64 size;
if (!capable(CAP_NET_ADMIN))
return ERR_PTR(-EPERM);
@@ -86,9 +86,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
numa_node = bpf_map_attr_numa_node(attr);
size = struct_size(m, xsk_map, attr->max_entries);
- cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus());
- err = bpf_map_charge_init(&mem, cost);
+ err = bpf_map_charge_init(&mem, size);
if (err < 0)
return ERR_PTR(err);
@@ -102,16 +101,6 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
bpf_map_charge_move(&m->map.memory, &mem);
spin_lock_init(&m->lock);
- m->flush_list = alloc_percpu(struct list_head);
- if (!m->flush_list) {
- bpf_map_charge_finish(&m->map.memory);
- bpf_map_area_free(m);
- return ERR_PTR(-ENOMEM);
- }
-
- for_each_possible_cpu(cpu)
- INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu));
-
return &m->map;
}
@@ -121,7 +110,6 @@ static void xsk_map_free(struct bpf_map *map)
bpf_clear_redirect_map(map);
synchronize_net();
- free_percpu(m->flush_list);
bpf_map_area_free(m);
}
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 735af8f15f95..725365df066d 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6288,12 +6288,13 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
#ifdef CONFIG_CGROUP_BPF
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
- enum bpf_attach_type type, u32 flags)
+ struct bpf_prog *replace_prog, enum bpf_attach_type type,
+ u32 flags)
{
int ret;
mutex_lock(&cgroup_mutex);
- ret = __cgroup_bpf_attach(cgrp, prog, type, flags);
+ ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags);
mutex_unlock(&cgroup_mutex);
return ret;
}
diff --git a/kernel/cred.c b/kernel/cred.c
index c0a4c12d38b2..9ed51b70ed80 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -223,7 +223,7 @@ struct cred *cred_alloc_blank(void)
new->magic = CRED_MAGIC;
#endif
- if (security_cred_alloc_blank(new, GFP_KERNEL) < 0)
+ if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0)
goto error;
return new;
@@ -282,7 +282,7 @@ struct cred *prepare_creds(void)
new->security = NULL;
#endif
- if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+ if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
goto error;
validate_creds(new);
return new;
@@ -715,7 +715,7 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
#ifdef CONFIG_SECURITY
new->security = NULL;
#endif
- if (security_prepare_creds(new, old, GFP_KERNEL) < 0)
+ if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0)
goto error;
put_cred(old);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4ff86d57f9e5..a1f8bde19b56 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10523,7 +10523,7 @@ again:
goto unlock;
}
- list_for_each_entry_rcu(pmu, &pmus, entry) {
+ list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
ret = perf_try_init_event(pmu, event);
if (!ret)
goto unlock;
diff --git a/kernel/exit.c b/kernel/exit.c
index bcbd59888e67..2833ffb0c211 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -517,10 +517,6 @@ static struct task_struct *find_child_reaper(struct task_struct *father,
}
write_unlock_irq(&tasklist_lock);
- if (unlikely(pid_ns == &init_pid_ns)) {
- panic("Attempted to kill init! exitcode=0x%08x\n",
- father->signal->group_exit_code ?: father->exit_code);
- }
list_for_each_entry_safe(p, n, dead, ptrace_entry) {
list_del_init(&p->ptrace_entry);
@@ -766,6 +762,14 @@ void __noreturn do_exit(long code)
acct_update_integrals(tsk);
group_dead = atomic_dec_and_test(&tsk->signal->live);
if (group_dead) {
+ /*
+ * If the last thread of global init has exited, panic
+ * immediately to get a useable coredump.
+ */
+ if (unlikely(is_global_init(tsk)))
+ panic("Attempted to kill init! exitcode=0x%08x\n",
+ tsk->signal->group_exit_code ?: (int)code);
+
#ifdef CONFIG_POSIX_TIMERS
hrtimer_cancel(&tsk->signal->real_timer);
exit_itimers(tsk->signal);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 54cc5f9286e9..5352ce50a97e 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -733,9 +733,6 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
*/
void __sched mutex_unlock(struct mutex *lock)
{
-#ifdef CONFIG_DEBUG_MUTEXES
- WARN_ON(in_interrupt());
-#endif
#ifndef CONFIG_DEBUG_LOCK_ALLOC
if (__mutex_unlock_fast(lock))
return;
@@ -1416,7 +1413,6 @@ int __sched mutex_trylock(struct mutex *lock)
#ifdef CONFIG_DEBUG_MUTEXES
DEBUG_LOCKS_WARN_ON(lock->magic != lock);
- WARN_ON(in_interrupt());
#endif
locked = __mutex_trylock(lock);
diff --git a/kernel/locking/spinlock_debug.c b/kernel/locking/spinlock_debug.c
index 399669f7eba8..472dd462a40c 100644
--- a/kernel/locking/spinlock_debug.c
+++ b/kernel/locking/spinlock_debug.c
@@ -51,19 +51,19 @@ EXPORT_SYMBOL(__rwlock_init);
static void spin_dump(raw_spinlock_t *lock, const char *msg)
{
- struct task_struct *owner = NULL;
+ struct task_struct *owner = READ_ONCE(lock->owner);
- if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
- owner = lock->owner;
+ if (owner == SPINLOCK_OWNER_INIT)
+ owner = NULL;
printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
msg, raw_smp_processor_id(),
current->comm, task_pid_nr(current));
printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
".owner_cpu: %d\n",
- lock, lock->magic,
+ lock, READ_ONCE(lock->magic),
owner ? owner->comm : "<none>",
owner ? task_pid_nr(owner) : -1,
- lock->owner_cpu);
+ READ_ONCE(lock->owner_cpu));
dump_stack();
}
@@ -80,16 +80,16 @@ static void spin_bug(raw_spinlock_t *lock, const char *msg)
static inline void
debug_spin_lock_before(raw_spinlock_t *lock)
{
- SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
- SPIN_BUG_ON(lock->owner == current, lock, "recursion");
- SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
+ SPIN_BUG_ON(READ_ONCE(lock->magic) != SPINLOCK_MAGIC, lock, "bad magic");
+ SPIN_BUG_ON(READ_ONCE(lock->owner) == current, lock, "recursion");
+ SPIN_BUG_ON(READ_ONCE(lock->owner_cpu) == raw_smp_processor_id(),
lock, "cpu recursion");
}
static inline void debug_spin_lock_after(raw_spinlock_t *lock)
{
- lock->owner_cpu = raw_smp_processor_id();
- lock->owner = current;
+ WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+ WRITE_ONCE(lock->owner, current);
}
static inline void debug_spin_unlock(raw_spinlock_t *lock)
@@ -99,8 +99,8 @@ static inline void debug_spin_unlock(raw_spinlock_t *lock)
SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
- lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
+ WRITE_ONCE(lock->owner_cpu, -1);
}
/*
@@ -187,8 +187,8 @@ static inline void debug_write_lock_before(rwlock_t *lock)
static inline void debug_write_lock_after(rwlock_t *lock)
{
- lock->owner_cpu = raw_smp_processor_id();
- lock->owner = current;
+ WRITE_ONCE(lock->owner_cpu, raw_smp_processor_id());
+ WRITE_ONCE(lock->owner, current);
}
static inline void debug_write_unlock(rwlock_t *lock)
@@ -197,8 +197,8 @@ static inline void debug_write_unlock(rwlock_t *lock)
RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
lock, "wrong CPU");
- lock->owner = SPINLOCK_OWNER_INIT;
- lock->owner_cpu = -1;
+ WRITE_ONCE(lock->owner, SPINLOCK_OWNER_INIT);
+ WRITE_ONCE(lock->owner_cpu, -1);
}
void do_raw_write_lock(rwlock_t *lock)
diff --git a/kernel/module.c b/kernel/module.c
index 3a486f826224..b56f3224b161 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3730,6 +3730,7 @@ static int complete_formation(struct module *mod, struct load_info *info)
module_enable_ro(mod, false);
module_enable_nx(mod);
+ module_enable_x(mod);
/* Mark state as coming so strong_try_module_get() ignores us,
* but kallsyms etc. can see us. */
@@ -3752,11 +3753,6 @@ static int prepare_coming_module(struct module *mod)
if (err)
return err;
- /* Make module executable after ftrace is enabled */
- mutex_lock(&module_mutex);
- module_enable_x(mod);
- mutex_unlock(&module_mutex);
-
blocking_notifier_call_chain(&module_notify_list,
MODULE_STATE_COMING, mod);
return 0;
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index b5dcd1d83c7f..7c2fe50fd76d 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -5,6 +5,8 @@
* Copyright (C) 2016, Intel Corporation
* Author: Rafael J. Wysocki <[email protected]>
*/
+#include <linux/cpufreq.h>
+
#include "sched.h"
DEFINE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
@@ -57,3 +59,19 @@ void cpufreq_remove_update_util_hook(int cpu)
rcu_assign_pointer(per_cpu(cpufreq_update_util_data, cpu), NULL);
}
EXPORT_SYMBOL_GPL(cpufreq_remove_update_util_hook);
+
+/**
+ * cpufreq_this_cpu_can_update - Check if cpufreq policy can be updated.
+ * @policy: cpufreq policy to check.
+ *
+ * Return 'true' if:
+ * - the local and remote CPUs share @policy,
+ * - dvfs_possible_from_any_cpu is set in @policy and the local CPU is not going
+ * offline (in which case it is not expected to run cpufreq updates any more).
+ */
+bool cpufreq_this_cpu_can_update(struct cpufreq_policy *policy)
+{
+ return cpumask_test_cpu(smp_processor_id(), policy->cpus) ||
+ (policy->dvfs_possible_from_any_cpu &&
+ rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data)));
+}
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 322ca8860f54..9b8916fd00a2 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -82,12 +82,10 @@ static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
* by the hardware, as calculating the frequency is pointless if
* we cannot in fact act on it.
*
- * For the slow switching platforms, the kthread is always scheduled on
- * the right set of CPUs and any CPU can find the next frequency and
- * schedule the kthread.
+ * This is needed on the slow switching platforms too to prevent CPUs
+ * going offline from leaving stale IRQ work items behind.
*/
- if (sg_policy->policy->fast_switch_enabled &&
- !cpufreq_this_cpu_can_update(sg_policy->policy))
+ if (!cpufreq_this_cpu_can_update(sg_policy->policy))
return false;
if (unlikely(sg_policy->limits_changed)) {
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 08a233e97a01..ba749f579714 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7328,7 +7328,14 @@ static int detach_tasks(struct lb_env *env)
load < 16 && !env->sd->nr_balance_failed)
goto next;
- if (load/2 > env->imbalance)
+ /*
+ * Make sure that we don't migrate too much load.
+ * Nevertheless, let relax the constraint if
+ * scheduler fails to find a good waiting task to
+ * migrate.
+ */
+ if (load/2 > env->imbalance &&
+ env->sd->nr_balance_failed <= env->sd->cache_nice_tries)
goto next;
env->imbalance -= load;
@@ -8417,6 +8424,10 @@ find_idlest_group(struct sched_domain *sd, struct task_struct *p,
if (!idlest)
return NULL;
+ /* The local group has been skipped because of CPU affinity */
+ if (!local)
+ return idlest;
+
/*
* If the local group is idler than the selected idlest group
* don't try and push the task.
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 517e3719027e..ce8f6748678a 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -185,7 +185,8 @@ static void group_init(struct psi_group *group)
for_each_possible_cpu(cpu)
seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
- group->avg_next_update = sched_clock() + psi_period;
+ group->avg_last_update = sched_clock();
+ group->avg_next_update = group->avg_last_update + psi_period;
INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
mutex_init(&group->avgs_lock);
/* Init trigger-related members */
@@ -481,7 +482,7 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
u32 remaining;
remaining = win->size - elapsed;
- growth += div_u64(win->prev_growth * remaining, win->size);
+ growth += div64_u64(win->prev_growth * remaining, win->size);
}
return growth;
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 12d2227e5786..b6ea3dcb57bf 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1026,6 +1026,13 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
struct seccomp_notif unotif;
ssize_t ret;
+ /* Verify that we're not given garbage to keep struct extensible. */
+ ret = check_zeroed_user(buf, sizeof(unotif));
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -EINVAL;
+
memset(&unotif, 0, sizeof(unotif));
ret = down_interruptible(&filter->notif->request);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 13a0f2e6ebc2..e2ac0e37c4ae 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -554,25 +554,33 @@ static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk)
{
struct signal_struct *sig = tsk->signal;
- struct taskstats *stats;
+ struct taskstats *stats_new, *stats;
- if (sig->stats || thread_group_empty(tsk))
- goto ret;
+ /* Pairs with smp_store_release() below. */
+ stats = smp_load_acquire(&sig->stats);
+ if (stats || thread_group_empty(tsk))
+ return stats;
/* No problem if kmem_cache_zalloc() fails */
- stats = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
+ stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL);
spin_lock_irq(&tsk->sighand->siglock);
- if (!sig->stats) {
- sig->stats = stats;
- stats = NULL;
+ stats = sig->stats;
+ if (!stats) {
+ /*
+ * Pairs with smp_store_release() above and order the
+ * kmem_cache_zalloc().
+ */
+ smp_store_release(&sig->stats, stats_new);
+ stats = stats_new;
+ stats_new = NULL;
}
spin_unlock_irq(&tsk->sighand->siglock);
- if (stats)
- kmem_cache_free(taskstats_cache, stats);
-ret:
- return sig->stats;
+ if (stats_new)
+ kmem_cache_free(taskstats_cache, stats_new);
+
+ return stats;
}
/* Send pid data out on exit */
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ec960bb939fd..200fb2d3be99 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -14,8 +14,6 @@
#include "posix-timers.h"
-static void delete_clock(struct kref *kref);
-
/*
* Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
*/
@@ -125,7 +123,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp)
err = 0;
if (!err) {
- kref_get(&clk->kref);
+ get_device(clk->dev);
fp->private_data = clk;
}
out:
@@ -141,7 +139,7 @@ static int posix_clock_release(struct inode *inode, struct file *fp)
if (clk->ops.release)
err = clk->ops.release(clk);
- kref_put(&clk->kref, delete_clock);
+ put_device(clk->dev);
fp->private_data = NULL;
@@ -161,38 +159,35 @@ static const struct file_operations posix_clock_file_operations = {
#endif
};
-int posix_clock_register(struct posix_clock *clk, dev_t devid)
+int posix_clock_register(struct posix_clock *clk, struct device *dev)
{
int err;
- kref_init(&clk->kref);
init_rwsem(&clk->rwsem);
cdev_init(&clk->cdev, &posix_clock_file_operations);
+ err = cdev_device_add(&clk->cdev, dev);
+ if (err) {
+ pr_err("%s unable to add device %d:%d\n",
+ dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
+ return err;
+ }
clk->cdev.owner = clk->ops.owner;
- err = cdev_add(&clk->cdev, devid, 1);
+ clk->dev = dev;
- return err;
+ return 0;
}
EXPORT_SYMBOL_GPL(posix_clock_register);
-static void delete_clock(struct kref *kref)
-{
- struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
-
- if (clk->release)
- clk->release(clk);
-}
-
void posix_clock_unregister(struct posix_clock *clk)
{
- cdev_del(&clk->cdev);
+ cdev_device_del(&clk->cdev, clk->dev);
down_write(&clk->rwsem);
clk->zombie = true;
up_write(&clk->rwsem);
- kref_put(&clk->kref, delete_clock);
+ put_device(clk->dev);
}
EXPORT_SYMBOL_GPL(posix_clock_unregister);
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 67e0c462b059..1af321dec0f1 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -96,11 +96,34 @@ ftrace_push_return_trace(unsigned long ret, unsigned long func,
return 0;
}
+/*
+ * Not all archs define MCOUNT_INSN_SIZE which is used to look for direct
+ * functions. But those archs currently don't support direct functions
+ * anyway, and ftrace_find_rec_direct() is just a stub for them.
+ * Define MCOUNT_INSN_SIZE to keep those archs compiling.
+ */
+#ifndef MCOUNT_INSN_SIZE
+/* Make sure this only works without direct calls */
+# ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+# error MCOUNT_INSN_SIZE not defined with direct calls enabled
+# endif
+# define MCOUNT_INSN_SIZE 0
+#endif
+
int function_graph_enter(unsigned long ret, unsigned long func,
unsigned long frame_pointer, unsigned long *retp)
{
struct ftrace_graph_ent trace;
+ /*
+ * Skip graph tracing if the return location is served by direct trampoline,
+ * since call sequence and return addresses is unpredicatable anymore.
+ * Ex: BPF trampoline may call original function and may skip frame
+ * depending on type of BPF programs attached.
+ */
+ if (ftrace_direct_func_count &&
+ ftrace_find_rec_direct(ret - MCOUNT_INSN_SIZE))
+ return -EBUSY;
trace.func = func;
trace.depth = ++current->curr_ret_depth;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 74439ab5c2b6..9bf1f2cd515e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -526,8 +526,7 @@ static int function_stat_show(struct seq_file *m, void *v)
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- avg = rec->time;
- do_div(avg, rec->counter);
+ avg = div64_ul(rec->time, rec->counter);
if (tracing_thresh && (avg < tracing_thresh))
goto out;
#endif
@@ -553,7 +552,8 @@ static int function_stat_show(struct seq_file *m, void *v)
* Divide only 1000 for ns^2 -> us^2 conversion.
* trace_print_graph_duration will divide 1000 again.
*/
- do_div(stddev, rec->counter * (rec->counter - 1) * 1000);
+ stddev = div64_ul(stddev,
+ rec->counter * (rec->counter - 1) * 1000);
}
trace_seq_init(&s);
@@ -2364,7 +2364,7 @@ int ftrace_direct_func_count;
* Search the direct_functions hash to see if the given instruction pointer
* has a direct caller attached to it.
*/
-static unsigned long find_rec_direct(unsigned long ip)
+unsigned long ftrace_find_rec_direct(unsigned long ip)
{
struct ftrace_func_entry *entry;
@@ -2380,7 +2380,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
{
unsigned long addr;
- addr = find_rec_direct(ip);
+ addr = ftrace_find_rec_direct(ip);
if (!addr)
return;
@@ -2393,11 +2393,6 @@ struct ftrace_ops direct_ops = {
| FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
| FTRACE_OPS_FL_PERMANENT,
};
-#else
-static inline unsigned long find_rec_direct(unsigned long ip)
-{
- return 0;
-}
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
/**
@@ -2417,7 +2412,7 @@ unsigned long ftrace_get_addr_new(struct dyn_ftrace *rec)
if ((rec->flags & FTRACE_FL_DIRECT) &&
(ftrace_rec_count(rec) == 1)) {
- addr = find_rec_direct(rec->ip);
+ addr = ftrace_find_rec_direct(rec->ip);
if (addr)
return addr;
WARN_ON_ONCE(1);
@@ -2458,7 +2453,7 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
/* Direct calls take precedence over trampolines */
if (rec->flags & FTRACE_FL_DIRECT_EN) {
- addr = find_rec_direct(rec->ip);
+ addr = ftrace_find_rec_direct(rec->ip);
if (addr)
return addr;
WARN_ON_ONCE(1);
@@ -3604,7 +3599,7 @@ static int t_show(struct seq_file *m, void *v)
if (rec->flags & FTRACE_FL_DIRECT) {
unsigned long direct;
- direct = find_rec_direct(rec->ip);
+ direct = ftrace_find_rec_direct(rec->ip);
if (direct)
seq_printf(m, "\n\tdirect-->%pS", (void *)direct);
}
@@ -5008,7 +5003,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
mutex_lock(&direct_mutex);
/* See if there's a direct function at @ip already */
- if (find_rec_direct(ip))
+ if (ftrace_find_rec_direct(ip))
goto out_unlock;
ret = -ENODEV;
@@ -5027,7 +5022,7 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
if (ip != rec->ip) {
ip = rec->ip;
/* Need to check this ip for a direct. */
- if (find_rec_direct(ip))
+ if (ftrace_find_rec_direct(ip))
goto out_unlock;
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 4bf050fcfe3b..3f655371eaf6 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5070,7 +5070,7 @@ static __init int test_ringbuffer(void)
int ret = 0;
if (security_locked_down(LOCKDOWN_TRACEFS)) {
- pr_warning("Lockdown is enabled, skipping ring buffer tests\n");
+ pr_warn("Lockdown is enabled, skipping ring buffer tests\n");
return 0;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 23459d53d576..ddb7e7f5fe8d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1889,7 +1889,7 @@ int __init register_tracer(struct tracer *type)
}
if (security_locked_down(LOCKDOWN_TRACEFS)) {
- pr_warning("Can not register tracer %s due to lockdown\n",
+ pr_warn("Can not register tracer %s due to lockdown\n",
type->name);
return -EPERM;
}
@@ -4685,6 +4685,10 @@ int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
{
+ if ((mask == TRACE_ITER_RECORD_TGID) ||
+ (mask == TRACE_ITER_RECORD_CMD))
+ lockdep_assert_held(&event_mutex);
+
/* do nothing if flag is already set */
if (!!(tr->trace_flags & mask) == !!enabled)
return 0;
@@ -4752,6 +4756,7 @@ static int trace_set_options(struct trace_array *tr, char *option)
cmp += len;
+ mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
ret = match_string(trace_options, -1, cmp);
@@ -4762,6 +4767,7 @@ static int trace_set_options(struct trace_array *tr, char *option)
ret = set_tracer_flag(tr, 1 << ret, !neg);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
/*
* If the first trailing whitespace is replaced with '\0' by strstrip,
@@ -8076,9 +8082,11 @@ trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
if (val != 0 && val != 1)
return -EINVAL;
+ mutex_lock(&event_mutex);
mutex_lock(&trace_types_lock);
ret = set_tracer_flag(tr, 1 << index, val);
mutex_unlock(&trace_types_lock);
+ mutex_unlock(&event_mutex);
if (ret < 0)
return ret;
@@ -8796,7 +8804,7 @@ struct dentry *tracing_init_dentry(void)
struct trace_array *tr = &global_trace;
if (security_locked_down(LOCKDOWN_TRACEFS)) {
- pr_warning("Tracing disabled due to lockdown\n");
+ pr_warn("Tracing disabled due to lockdown\n");
return ERR_PTR(-EPERM);
}
@@ -9244,7 +9252,7 @@ __init static int tracer_alloc_buffers(void)
if (security_locked_down(LOCKDOWN_TRACEFS)) {
- pr_warning("Tracing disabled due to lockdown\n");
+ pr_warn("Tracing disabled due to lockdown\n");
return -EPERM;
}
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index c6de3cebc127..a5b614cc3887 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -320,7 +320,8 @@ void trace_event_enable_cmd_record(bool enable)
struct trace_event_file *file;
struct trace_array *tr;
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
+
do_for_each_event_file(tr, file) {
if (!(file->flags & EVENT_FILE_FL_ENABLED))
@@ -334,7 +335,6 @@ void trace_event_enable_cmd_record(bool enable)
clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
}
} while_for_each_event_file();
- mutex_unlock(&event_mutex);
}
void trace_event_enable_tgid_record(bool enable)
@@ -342,7 +342,8 @@ void trace_event_enable_tgid_record(bool enable)
struct trace_event_file *file;
struct trace_array *tr;
- mutex_lock(&event_mutex);
+ lockdep_assert_held(&event_mutex);
+
do_for_each_event_file(tr, file) {
if (!(file->flags & EVENT_FILE_FL_ENABLED))
continue;
@@ -356,7 +357,6 @@ void trace_event_enable_tgid_record(bool enable)
&file->flags);
}
} while_for_each_event_file();
- mutex_unlock(&event_mutex);
}
static int __ftrace_event_enable_disable(struct trace_event_file *file,
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index c9a74f82b14a..bf44f6bbd0c3 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1662,7 +1662,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir,
parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0);
return -EINVAL;
fail_mem:
- kfree(filter);
+ __free_filter(filter);
/* If any call succeeded, we still need to sync */
if (!fail)
tracepoint_synchronize_unregister();
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index f49d1a36d3ae..f62de5f43e79 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -911,7 +911,26 @@ static notrace void trace_event_raw_event_synth(void *__data,
strscpy(str_field, str_val, STR_VAR_LEN_MAX);
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
} else {
- entry->fields[n_u64] = var_ref_vals[var_ref_idx + i];
+ struct synth_field *field = event->fields[i];
+ u64 val = var_ref_vals[var_ref_idx + i];
+
+ switch (field->size) {
+ case 1:
+ *(u8 *)&entry->fields[n_u64] = (u8)val;
+ break;
+
+ case 2:
+ *(u16 *)&entry->fields[n_u64] = (u16)val;
+ break;
+
+ case 4:
+ *(u32 *)&entry->fields[n_u64] = (u32)val;
+ break;
+
+ default:
+ entry->fields[n_u64] = val;
+ break;
+ }
n_u64++;
}
}
diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c
index d43710718ee5..22bcf7c51d1e 100644
--- a/kernel/trace/trace_events_inject.c
+++ b/kernel/trace/trace_events_inject.c
@@ -17,12 +17,10 @@ static int
trace_inject_entry(struct trace_event_file *file, void *rec, int len)
{
struct trace_event_buffer fbuffer;
- struct ring_buffer *buffer;
int written = 0;
void *entry;
rcu_read_lock_sched();
- buffer = file->tr->trace_buffer.buffer;
entry = trace_event_buffer_reserve(&fbuffer, file, len);
if (entry) {
memcpy(entry, rec, len);
@@ -197,7 +195,7 @@ static int parse_entry(char *str, struct trace_event_call *call, void **pentry)
unsigned long irq_flags;
void *entry = NULL;
int entry_size;
- u64 val;
+ u64 val = 0;
int len;
entry = trace_alloc_entry(call, &entry_size);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 5e43b9664eca..617e297f46dc 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -630,7 +630,7 @@ static void start_wakeup_tracer(struct trace_array *tr)
if (ret) {
pr_info("wakeup trace: Couldn't activate tracepoint"
" probe to kernel_sched_migrate_task\n");
- return;
+ goto fail_deprobe_sched_switch;
}
wakeup_reset(tr);
@@ -648,6 +648,8 @@ static void start_wakeup_tracer(struct trace_array *tr)
printk(KERN_ERR "failed to start wakeup tracer\n");
return;
+fail_deprobe_sched_switch:
+ unregister_trace_sched_switch(probe_wakeup_sched_switch, NULL);
fail_deprobe_wake_new:
unregister_trace_sched_wakeup_new(probe_wakeup, NULL);
fail_deprobe:
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 344e4c1aa09c..87de6edafd14 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -381,7 +381,7 @@ int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str,
int prefix_type, int rowsize, int groupsize,
const void *buf, size_t len, bool ascii)
{
- unsigned int save_len = s->seq.len;
+ unsigned int save_len = s->seq.len;
if (s->full)
return 0;
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 4df9a209f7ca..c557f42a9397 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -283,6 +283,11 @@ static void check_stack(unsigned long ip, unsigned long *stack)
local_irq_restore(flags);
}
+/* Some archs may not define MCOUNT_INSN_SIZE */
+#ifndef MCOUNT_INSN_SIZE
+# define MCOUNT_INSN_SIZE 0
+#endif
+
static void
stack_trace_call(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *pt_regs)
diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c
index 9a1c22310323..9e31bfc818ff 100644
--- a/kernel/trace/tracing_map.c
+++ b/kernel/trace/tracing_map.c
@@ -148,8 +148,8 @@ static int tracing_map_cmp_atomic64(void *val_a, void *val_b)
#define DEFINE_TRACING_MAP_CMP_FN(type) \
static int tracing_map_cmp_##type(void *val_a, void *val_b) \
{ \
- type a = *(type *)val_a; \
- type b = *(type *)val_b; \
+ type a = (type)(*(u64 *)val_a); \
+ type b = (type)(*(u64 *)val_b); \
\
return (a > b) ? 1 : ((a < b) ? -1 : 0); \
}
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index bc88fd939f4e..cfc923558e04 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -4374,8 +4374,8 @@ void destroy_workqueue(struct workqueue_struct *wq)
for_each_pwq(pwq, wq) {
spin_lock_irq(&pwq->pool->lock);
if (WARN_ON(pwq_busy(pwq))) {
- pr_warning("%s: %s has the following busy pwq\n",
- __func__, wq->name);
+ pr_warn("%s: %s has the following busy pwq\n",
+ __func__, wq->name);
show_pwq(pwq);
spin_unlock_irq(&pwq->pool->lock);
mutex_unlock(&wq->mutex);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d1842fe756d5..5ffe144c9794 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1483,6 +1483,55 @@ config PROVIDE_OHCI1394_DMA_INIT
See Documentation/debugging-via-ohci1394.txt for more information.
+source "samples/Kconfig"
+
+config ARCH_HAS_DEVMEM_IS_ALLOWED
+ bool
+
+config STRICT_DEVMEM
+ bool "Filter access to /dev/mem"
+ depends on MMU && DEVMEM
+ depends on ARCH_HAS_DEVMEM_IS_ALLOWED
+ default y if PPC || X86 || ARM64
+ help
+ If this option is disabled, you allow userspace (root) access to all
+ of memory, including kernel and userspace memory. Accidental
+ access to this is obviously disastrous, but specific access can
+ be used by people debugging the kernel. Note that with PAT support
+ enabled, even in this case there are restrictions on /dev/mem
+ use due to the cache aliasing requirements.
+
+ If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
+ file only allows userspace access to PCI space and the BIOS code and
+ data regions. This is sufficient for dosemu and X and all common
+ users of /dev/mem.
+
+ If in doubt, say Y.
+
+config IO_STRICT_DEVMEM
+ bool "Filter I/O access to /dev/mem"
+ depends on STRICT_DEVMEM
+ help
+ If this option is disabled, you allow userspace (root) access to all
+ io-memory regardless of whether a driver is actively using that
+ range. Accidental access to this is obviously disastrous, but
+ specific access can be used by people debugging kernel drivers.
+
+ If this option is switched on, the /dev/mem file only allows
+ userspace access to *idle* io-memory ranges (see /proc/iomem) This
+ may break traditional users of /dev/mem (dosemu, legacy X, etc...)
+ if the driver using a given range cannot be disabled.
+
+ If in doubt, say Y.
+
+menu "$(SRCARCH) Debugging"
+
+source "arch/$(SRCARCH)/Kconfig.debug"
+
+endmenu
+
+menu "Kernel Testing and Coverage"
+
source "lib/kunit/Kconfig"
config NOTIFIER_ERROR_INJECTION
@@ -1643,10 +1692,6 @@ config FAULT_INJECTION_STACKTRACE_FILTER
help
Provide stacktrace filter for fault-injection capabilities
-endmenu # "Kernel Testing and Coverage"
-
-menu "Kernel Testing and Coverage"
-
config ARCH_HAS_KCOV
bool
help
@@ -2130,52 +2175,7 @@ config MEMTEST
memtest=17, mean do 17 test patterns.
If you are unsure how to answer this question, answer N.
-source "samples/Kconfig"
-
-config ARCH_HAS_DEVMEM_IS_ALLOWED
- bool
-
-config STRICT_DEVMEM
- bool "Filter access to /dev/mem"
- depends on MMU && DEVMEM
- depends on ARCH_HAS_DEVMEM_IS_ALLOWED
- default y if PPC || X86 || ARM64
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- of memory, including kernel and userspace memory. Accidental
- access to this is obviously disastrous, but specific access can
- be used by people debugging the kernel. Note that with PAT support
- enabled, even in this case there are restrictions on /dev/mem
- use due to the cache aliasing requirements.
-
- If this option is switched on, and IO_STRICT_DEVMEM=n, the /dev/mem
- file only allows userspace access to PCI space and the BIOS code and
- data regions. This is sufficient for dosemu and X and all common
- users of /dev/mem.
-
- If in doubt, say Y.
-config IO_STRICT_DEVMEM
- bool "Filter I/O access to /dev/mem"
- depends on STRICT_DEVMEM
- ---help---
- If this option is disabled, you allow userspace (root) access to all
- io-memory regardless of whether a driver is actively using that
- range. Accidental access to this is obviously disastrous, but
- specific access can be used by people debugging kernel drivers.
-
- If this option is switched on, the /dev/mem file only allows
- userspace access to *idle* io-memory ranges (see /proc/iomem) This
- may break traditional users of /dev/mem (dosemu, legacy X, etc...)
- if the driver using a given range cannot be disabled.
-
- If in doubt, say Y.
-
-menu "$(SRCARCH) Debugging"
-
-source "arch/$(SRCARCH)/Kconfig.debug"
-
-endmenu
config HYPERV_TESTING
bool "Microsoft Hyper-V driver testing"
@@ -2184,4 +2184,6 @@ config HYPERV_TESTING
help
Select this option to enable Hyper-V vmbus testing.
+endmenu # "Kernel Testing and Coverage"
+
endmenu # Kernel hacking
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index fb29c02c6a3c..51595bf3af85 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1222,11 +1222,12 @@ EXPORT_SYMBOL(iov_iter_discard);
unsigned long iov_iter_alignment(const struct iov_iter *i)
{
- unsigned int p_mask = i->pipe->ring_size - 1;
unsigned long res = 0;
size_t size = i->count;
if (unlikely(iov_iter_is_pipe(i))) {
+ unsigned int p_mask = i->pipe->ring_size - 1;
+
if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
return size | i->iov_offset;
return size;
diff --git a/lib/raid6/unroll.awk b/lib/raid6/unroll.awk
index c6aa03631df8..0809805a7e23 100644
--- a/lib/raid6/unroll.awk
+++ b/lib/raid6/unroll.awk
@@ -13,7 +13,7 @@ BEGIN {
for (i = 0; i < rep; ++i) {
tmp = $0
gsub(/\$\$/, i, tmp)
- gsub(/\$\#/, n, tmp)
+ gsub(/\$#/, n, tmp)
gsub(/\$\*/, "$", tmp)
print tmp
}
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 33feec8989f1..af88d1346dd7 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -650,8 +650,8 @@ void sbitmap_add_wait_queue(struct sbitmap_queue *sbq,
if (!sbq_wait->sbq) {
sbq_wait->sbq = sbq;
atomic_inc(&sbq->ws_active);
+ add_wait_queue(&ws->wait, &sbq_wait->wait);
}
- add_wait_queue(&ws->wait, &sbq_wait->wait);
}
EXPORT_SYMBOL_GPL(sbitmap_add_wait_queue);
diff --git a/mm/gup_benchmark.c b/mm/gup_benchmark.c
index 7dd602d7f8db..ad9d5b1c4473 100644
--- a/mm/gup_benchmark.c
+++ b/mm/gup_benchmark.c
@@ -26,6 +26,7 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
unsigned long i, nr_pages, addr, next;
int nr;
struct page **pages;
+ int ret = 0;
if (gup->size > ULONG_MAX)
return -EINVAL;
@@ -63,7 +64,9 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
NULL);
break;
default:
- return -1;
+ kvfree(pages);
+ ret = -EINVAL;
+ goto out;
}
if (nr <= 0)
@@ -85,7 +88,8 @@ static int __gup_benchmark_ioctl(unsigned int cmd,
gup->put_delta_usec = ktime_us_delta(end_time, start_time);
kvfree(pages);
- return 0;
+out:
+ return ret;
}
static long gup_benchmark_ioctl(struct file *filep, unsigned int cmd,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ac65bb5e38ac..dd8737a94bec 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -27,6 +27,7 @@
#include <linux/swapops.h>
#include <linux/jhash.h>
#include <linux/numa.h>
+#include <linux/llist.h>
#include <asm/page.h>
#include <asm/pgtable.h>
@@ -1136,7 +1137,7 @@ static inline void ClearPageHugeTemporary(struct page *page)
page[2].mapping = NULL;
}
-void free_huge_page(struct page *page)
+static void __free_huge_page(struct page *page)
{
/*
* Can't pass hstate in here because it is called from the
@@ -1199,6 +1200,54 @@ void free_huge_page(struct page *page)
spin_unlock(&hugetlb_lock);
}
+/*
+ * As free_huge_page() can be called from a non-task context, we have
+ * to defer the actual freeing in a workqueue to prevent potential
+ * hugetlb_lock deadlock.
+ *
+ * free_hpage_workfn() locklessly retrieves the linked list of pages to
+ * be freed and frees them one-by-one. As the page->mapping pointer is
+ * going to be cleared in __free_huge_page() anyway, it is reused as the
+ * llist_node structure of a lockless linked list of huge pages to be freed.
+ */
+static LLIST_HEAD(hpage_freelist);
+
+static void free_hpage_workfn(struct work_struct *work)
+{
+ struct llist_node *node;
+ struct page *page;
+
+ node = llist_del_all(&hpage_freelist);
+
+ while (node) {
+ page = container_of((struct address_space **)node,
+ struct page, mapping);
+ node = node->next;
+ __free_huge_page(page);
+ }
+}
+static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
+
+void free_huge_page(struct page *page)
+{
+ /*
+ * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
+ */
+ if (!in_task()) {
+ /*
+ * Only call schedule_work() if hpage_freelist is previously
+ * empty. Otherwise, schedule_work() had been called but the
+ * workfn hasn't retrieved the list yet.
+ */
+ if (llist_add((struct llist_node *)&page->mapping,
+ &hpage_freelist))
+ schedule_work(&free_hpage_work);
+ return;
+ }
+
+ __free_huge_page(page);
+}
+
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
{
INIT_LIST_HEAD(&page->lru);
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index 2fa710bb6358..c15d8ae68c96 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -778,15 +778,17 @@ static int kasan_populate_vmalloc_pte(pte_t *ptep, unsigned long addr,
return 0;
}
-int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
+int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
{
unsigned long shadow_start, shadow_end;
int ret;
- shadow_start = (unsigned long)kasan_mem_to_shadow(area->addr);
+ if (!is_vmalloc_or_module_addr((void *)addr))
+ return 0;
+
+ shadow_start = (unsigned long)kasan_mem_to_shadow((void *)addr);
shadow_start = ALIGN_DOWN(shadow_start, PAGE_SIZE);
- shadow_end = (unsigned long)kasan_mem_to_shadow(area->addr +
- area->size);
+ shadow_end = (unsigned long)kasan_mem_to_shadow((void *)addr + size);
shadow_end = ALIGN(shadow_end, PAGE_SIZE);
ret = apply_to_page_range(&init_mm, shadow_start,
@@ -797,10 +799,6 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
flush_cache_vmap(shadow_start, shadow_end);
- kasan_unpoison_shadow(area->addr, requested_size);
-
- area->flags |= VM_KASAN;
-
/*
* We need to be careful about inter-cpu effects here. Consider:
*
@@ -843,12 +841,23 @@ int kasan_populate_vmalloc(unsigned long requested_size, struct vm_struct *area)
* Poison the shadow for a vmalloc region. Called as part of the
* freeing process at the time the region is freed.
*/
-void kasan_poison_vmalloc(void *start, unsigned long size)
+void kasan_poison_vmalloc(const void *start, unsigned long size)
{
+ if (!is_vmalloc_or_module_addr(start))
+ return;
+
size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
kasan_poison_shadow(start, size, KASAN_VMALLOC_INVALID);
}
+void kasan_unpoison_vmalloc(const void *start, unsigned long size)
+{
+ if (!is_vmalloc_or_module_addr(start))
+ return;
+
+ kasan_unpoison_shadow(start, size);
+}
+
static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
void *unused)
{
@@ -948,6 +957,7 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
{
void *shadow_start, *shadow_end;
unsigned long region_start, region_end;
+ unsigned long size;
region_start = ALIGN(start, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
region_end = ALIGN_DOWN(end, PAGE_SIZE * KASAN_SHADOW_SCALE_SIZE);
@@ -970,9 +980,11 @@ void kasan_release_vmalloc(unsigned long start, unsigned long end,
shadow_end = kasan_mem_to_shadow((void *)region_end);
if (shadow_end > shadow_start) {
- apply_to_page_range(&init_mm, (unsigned long)shadow_start,
- (unsigned long)(shadow_end - shadow_start),
- kasan_depopulate_vmalloc_pte, NULL);
+ size = shadow_end - shadow_start;
+ apply_to_existing_page_range(&init_mm,
+ (unsigned long)shadow_start,
+ size, kasan_depopulate_vmalloc_pte,
+ NULL);
flush_tlb_kernel_range((unsigned long)shadow_start,
(unsigned long)shadow_end);
}
diff --git a/mm/memory.c b/mm/memory.c
index 606da187d1de..45442d9a4f52 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2021,26 +2021,34 @@ EXPORT_SYMBOL(vm_iomap_memory);
static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pte_t *pte;
- int err;
+ int err = 0;
spinlock_t *uninitialized_var(ptl);
- pte = (mm == &init_mm) ?
- pte_alloc_kernel(pmd, addr) :
- pte_alloc_map_lock(mm, pmd, addr, &ptl);
- if (!pte)
- return -ENOMEM;
+ if (create) {
+ pte = (mm == &init_mm) ?
+ pte_alloc_kernel(pmd, addr) :
+ pte_alloc_map_lock(mm, pmd, addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+ } else {
+ pte = (mm == &init_mm) ?
+ pte_offset_kernel(pmd, addr) :
+ pte_offset_map_lock(mm, pmd, addr, &ptl);
+ }
BUG_ON(pmd_huge(*pmd));
arch_enter_lazy_mmu_mode();
do {
- err = fn(pte++, addr, data);
- if (err)
- break;
+ if (create || !pte_none(*pte)) {
+ err = fn(pte++, addr, data);
+ if (err)
+ break;
+ }
} while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
@@ -2052,77 +2060,95 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pmd_t *pmd;
unsigned long next;
- int err;
+ int err = 0;
BUG_ON(pud_huge(*pud));
- pmd = pmd_alloc(mm, pud, addr);
- if (!pmd)
- return -ENOMEM;
+ if (create) {
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return -ENOMEM;
+ } else {
+ pmd = pmd_offset(pud, addr);
+ }
do {
next = pmd_addr_end(addr, end);
- err = apply_to_pte_range(mm, pmd, addr, next, fn, data);
- if (err)
- break;
+ if (create || !pmd_none_or_clear_bad(pmd)) {
+ err = apply_to_pte_range(mm, pmd, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (pmd++, addr = next, addr != end);
return err;
}
static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
pud_t *pud;
unsigned long next;
- int err;
+ int err = 0;
- pud = pud_alloc(mm, p4d, addr);
- if (!pud)
- return -ENOMEM;
+ if (create) {
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return -ENOMEM;
+ } else {
+ pud = pud_offset(p4d, addr);
+ }
do {
next = pud_addr_end(addr, end);
- err = apply_to_pmd_range(mm, pud, addr, next, fn, data);
- if (err)
- break;
+ if (create || !pud_none_or_clear_bad(pud)) {
+ err = apply_to_pmd_range(mm, pud, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (pud++, addr = next, addr != end);
return err;
}
static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd,
unsigned long addr, unsigned long end,
- pte_fn_t fn, void *data)
+ pte_fn_t fn, void *data, bool create)
{
p4d_t *p4d;
unsigned long next;
- int err;
+ int err = 0;
- p4d = p4d_alloc(mm, pgd, addr);
- if (!p4d)
- return -ENOMEM;
+ if (create) {
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return -ENOMEM;
+ } else {
+ p4d = p4d_offset(pgd, addr);
+ }
do {
next = p4d_addr_end(addr, end);
- err = apply_to_pud_range(mm, p4d, addr, next, fn, data);
- if (err)
- break;
+ if (create || !p4d_none_or_clear_bad(p4d)) {
+ err = apply_to_pud_range(mm, p4d, addr, next, fn, data,
+ create);
+ if (err)
+ break;
+ }
} while (p4d++, addr = next, addr != end);
return err;
}
-/*
- * Scan a region of virtual memory, filling in page tables as necessary
- * and calling a provided function on each leaf page table.
- */
-int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
- unsigned long size, pte_fn_t fn, void *data)
+static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn,
+ void *data, bool create)
{
pgd_t *pgd;
unsigned long next;
unsigned long end = addr + size;
- int err;
+ int err = 0;
if (WARN_ON(addr >= end))
return -EINVAL;
@@ -2130,16 +2156,42 @@ int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
pgd = pgd_offset(mm, addr);
do {
next = pgd_addr_end(addr, end);
- err = apply_to_p4d_range(mm, pgd, addr, next, fn, data);
+ if (!create && pgd_none_or_clear_bad(pgd))
+ continue;
+ err = apply_to_p4d_range(mm, pgd, addr, next, fn, data, create);
if (err)
break;
} while (pgd++, addr = next, addr != end);
return err;
}
+
+/*
+ * Scan a region of virtual memory, filling in page tables as necessary
+ * and calling a provided function on each leaf page table.
+ */
+int apply_to_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn, void *data)
+{
+ return __apply_to_page_range(mm, addr, size, fn, data, true);
+}
EXPORT_SYMBOL_GPL(apply_to_page_range);
/*
+ * Scan a region of virtual memory, calling a provided function on
+ * each leaf page table where it exists.
+ *
+ * Unlike apply_to_page_range, this does _not_ fill in page tables
+ * where they are absent.
+ */
+int apply_to_existing_page_range(struct mm_struct *mm, unsigned long addr,
+ unsigned long size, pte_fn_t fn, void *data)
+{
+ return __apply_to_page_range(mm, addr, size, fn, data, false);
+}
+EXPORT_SYMBOL_GPL(apply_to_existing_page_range);
+
+/*
* handle_pte_fault chooses page fault handler according to an entry which was
* read non-atomically. Before making any commitment, on those architectures
* or configurations (e.g. i386 with PAE) which might give a mix of unmatched
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 55ac23ef11c1..a91a072f2b2c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -483,8 +483,9 @@ static void update_pgdat_span(struct pglist_data *pgdat)
pgdat->node_spanned_pages = node_end_pfn - node_start_pfn;
}
-static void __remove_zone(struct zone *zone, unsigned long start_pfn,
- unsigned long nr_pages)
+void __ref remove_pfn_range_from_zone(struct zone *zone,
+ unsigned long start_pfn,
+ unsigned long nr_pages)
{
struct pglist_data *pgdat = zone->zone_pgdat;
unsigned long flags;
@@ -499,28 +500,30 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn,
return;
#endif
+ clear_zone_contiguous(zone);
+
pgdat_resize_lock(zone->zone_pgdat, &flags);
shrink_zone_span(zone, start_pfn, start_pfn + nr_pages);
update_pgdat_span(pgdat);
pgdat_resize_unlock(zone->zone_pgdat, &flags);
+
+ set_zone_contiguous(zone);
}
-static void __remove_section(struct zone *zone, unsigned long pfn,
- unsigned long nr_pages, unsigned long map_offset,
- struct vmem_altmap *altmap)
+static void __remove_section(unsigned long pfn, unsigned long nr_pages,
+ unsigned long map_offset,
+ struct vmem_altmap *altmap)
{
struct mem_section *ms = __nr_to_section(pfn_to_section_nr(pfn));
if (WARN_ON_ONCE(!valid_section(ms)))
return;
- __remove_zone(zone, pfn, nr_pages);
sparse_remove_section(ms, pfn, nr_pages, map_offset, altmap);
}
/**
- * __remove_pages() - remove sections of pages from a zone
- * @zone: zone from which pages need to be removed
+ * __remove_pages() - remove sections of pages
* @pfn: starting pageframe (must be aligned to start of a section)
* @nr_pages: number of pages to remove (must be multiple of section size)
* @altmap: alternative device page map or %NULL if default memmap is used
@@ -530,16 +533,14 @@ static void __remove_section(struct zone *zone, unsigned long pfn,
* sure that pages are marked reserved and zones are adjust properly by
* calling offline_pages().
*/
-void __remove_pages(struct zone *zone, unsigned long pfn,
- unsigned long nr_pages, struct vmem_altmap *altmap)
+void __remove_pages(unsigned long pfn, unsigned long nr_pages,
+ struct vmem_altmap *altmap)
{
unsigned long map_offset = 0;
unsigned long nr, start_sec, end_sec;
map_offset = vmem_altmap_offset(altmap);
- clear_zone_contiguous(zone);
-
if (check_pfn_span(pfn, nr_pages, "remove"))
return;
@@ -551,13 +552,11 @@ void __remove_pages(struct zone *zone, unsigned long pfn,
cond_resched();
pfns = min(nr_pages, PAGES_PER_SECTION
- (pfn & ~PAGE_SECTION_MASK));
- __remove_section(zone, pfn, pfns, map_offset, altmap);
+ __remove_section(pfn, pfns, map_offset, altmap);
pfn += pfns;
nr_pages -= pfns;
map_offset = 0;
}
-
- set_zone_contiguous(zone);
}
int set_online_page_callback(online_page_callback_t callback)
@@ -869,6 +868,7 @@ failed_addition:
(unsigned long long) pfn << PAGE_SHIFT,
(((unsigned long long) pfn + nr_pages) << PAGE_SHIFT) - 1);
memory_notify(MEM_CANCEL_ONLINE, &arg);
+ remove_pfn_range_from_zone(zone, pfn, nr_pages);
mem_hotplug_done();
return ret;
}
@@ -1628,6 +1628,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
writeback_set_ratelimit();
memory_notify(MEM_OFFLINE, &arg);
+ remove_pfn_range_from_zone(zone, start_pfn, nr_pages);
mem_hotplug_done();
return 0;
diff --git a/mm/memremap.c b/mm/memremap.c
index 03ccbdfeb697..c51c6bd2fe34 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -120,7 +120,7 @@ void memunmap_pages(struct dev_pagemap *pgmap)
mem_hotplug_begin();
if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
- __remove_pages(page_zone(first_page), PHYS_PFN(res->start),
+ __remove_pages(PHYS_PFN(res->start),
PHYS_PFN(resource_size(res)), NULL);
} else {
arch_remove_memory(nid, res->start, resource_size(res),
diff --git a/mm/migrate.c b/mm/migrate.c
index eae1565285e3..86873b6f38a7 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1512,9 +1512,11 @@ static int do_move_pages_to_node(struct mm_struct *mm,
/*
* Resolves the given address to a struct page, isolates it from the LRU and
* puts it to the given pagelist.
- * Returns -errno if the page cannot be found/isolated or 0 when it has been
- * queued or the page doesn't need to be migrated because it is already on
- * the target node
+ * Returns:
+ * errno - if the page cannot be found/isolated
+ * 0 - when it doesn't have to be migrated because it is already on the
+ * target node
+ * 1 - when it has been queued
*/
static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
int node, struct list_head *pagelist, bool migrate_all)
@@ -1553,7 +1555,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
if (PageHuge(page)) {
if (PageHead(page)) {
isolate_huge_page(page, pagelist);
- err = 0;
+ err = 1;
}
} else {
struct page *head;
@@ -1563,7 +1565,7 @@ static int add_page_for_migration(struct mm_struct *mm, unsigned long addr,
if (err)
goto out_putpage;
- err = 0;
+ err = 1;
list_add_tail(&head->lru, pagelist);
mod_node_page_state(page_pgdat(head),
NR_ISOLATED_ANON + page_is_file_cache(head),
@@ -1640,8 +1642,17 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
*/
err = add_page_for_migration(mm, addr, current_node,
&pagelist, flags & MPOL_MF_MOVE_ALL);
- if (!err)
+
+ if (!err) {
+ /* The page is already on the target node */
+ err = store_status(status, i, current_node, 1);
+ if (err)
+ goto out_flush;
continue;
+ } else if (err > 0) {
+ /* The page is successfully queued for migration */
+ continue;
+ }
err = store_status(status, i, err, 1);
if (err)
diff --git a/mm/mmap.c b/mm/mmap.c
index 9c648524e4dc..71e4ffc83bcd 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -90,12 +90,6 @@ static void unmap_region(struct mm_struct *mm,
* MAP_PRIVATE r: (no) no r: (yes) yes r: (no) yes r: (no) yes
* w: (no) no w: (no) no w: (copy) copy w: (no) no
* x: (no) no x: (no) yes x: (no) yes x: (yes) yes
- *
- * On arm64, PROT_EXEC has the following behaviour for both MAP_SHARED and
- * MAP_PRIVATE:
- * r: (no) no
- * w: (no) no
- * x: (yes) yes
*/
pgprot_t protection_map[16] __ro_after_init = {
__P000, __P001, __P010, __P011, __P100, __P101, __P110, __P111,
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 71e3acea7817..d58c481b3df8 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -890,7 +890,7 @@ static void __oom_kill_process(struct task_struct *victim, const char *message)
K(get_mm_counter(mm, MM_FILEPAGES)),
K(get_mm_counter(mm, MM_SHMEMPAGES)),
from_kuid(&init_user_ns, task_uid(victim)),
- mm_pgtables_bytes(mm), victim->signal->oom_score_adj);
+ mm_pgtables_bytes(mm) >> 10, victim->signal->oom_score_adj);
task_unlock(victim);
/*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 4d3b3d60d893..e9681dc4aa75 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1062,6 +1062,26 @@ __alloc_vmap_area(unsigned long size, unsigned long align,
}
/*
+ * Free a region of KVA allocated by alloc_vmap_area
+ */
+static void free_vmap_area(struct vmap_area *va)
+{
+ /*
+ * Remove from the busy tree/list.
+ */
+ spin_lock(&vmap_area_lock);
+ unlink_va(va, &vmap_area_root);
+ spin_unlock(&vmap_area_lock);
+
+ /*
+ * Insert/Merge it back to the free tree/list.
+ */
+ spin_lock(&free_vmap_area_lock);
+ merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
+ spin_unlock(&free_vmap_area_lock);
+}
+
+/*
* Allocate a region of KVA of the specified size and alignment, within the
* vstart and vend.
*/
@@ -1073,6 +1093,7 @@ static struct vmap_area *alloc_vmap_area(unsigned long size,
struct vmap_area *va, *pva;
unsigned long addr;
int purged = 0;
+ int ret;
BUG_ON(!size);
BUG_ON(offset_in_page(size));
@@ -1139,6 +1160,7 @@ retry:
va->va_end = addr + size;
va->vm = NULL;
+
spin_lock(&vmap_area_lock);
insert_vmap_area(va, &vmap_area_root, &vmap_area_list);
spin_unlock(&vmap_area_lock);
@@ -1147,6 +1169,12 @@ retry:
BUG_ON(va->va_start < vstart);
BUG_ON(va->va_end > vend);
+ ret = kasan_populate_vmalloc(addr, size);
+ if (ret) {
+ free_vmap_area(va);
+ return ERR_PTR(ret);
+ }
+
return va;
overflow:
@@ -1186,26 +1214,6 @@ int unregister_vmap_purge_notifier(struct notifier_block *nb)
EXPORT_SYMBOL_GPL(unregister_vmap_purge_notifier);
/*
- * Free a region of KVA allocated by alloc_vmap_area
- */
-static void free_vmap_area(struct vmap_area *va)
-{
- /*
- * Remove from the busy tree/list.
- */
- spin_lock(&vmap_area_lock);
- unlink_va(va, &vmap_area_root);
- spin_unlock(&vmap_area_lock);
-
- /*
- * Insert/Merge it back to the free tree/list.
- */
- spin_lock(&free_vmap_area_lock);
- merge_or_add_vmap_area(va, &free_vmap_area_root, &free_vmap_area_list);
- spin_unlock(&free_vmap_area_lock);
-}
-
-/*
* Clear the pagetable entries of a given vmap_area
*/
static void unmap_vmap_area(struct vmap_area *va)
@@ -1771,6 +1779,8 @@ void vm_unmap_ram(const void *mem, unsigned int count)
BUG_ON(addr > VMALLOC_END);
BUG_ON(!PAGE_ALIGNED(addr));
+ kasan_poison_vmalloc(mem, size);
+
if (likely(count <= VMAP_MAX_ALLOC)) {
debug_check_no_locks_freed(mem, size);
vb_free(mem, size);
@@ -1821,6 +1831,9 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
addr = va->va_start;
mem = (void *)addr;
}
+
+ kasan_unpoison_vmalloc(mem, size);
+
if (vmap_page_range(addr, addr + size, prot, pages) < 0) {
vm_unmap_ram(mem, count);
return NULL;
@@ -2075,6 +2088,7 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
{
struct vmap_area *va;
struct vm_struct *area;
+ unsigned long requested_size = size;
BUG_ON(in_interrupt());
size = PAGE_ALIGN(size);
@@ -2098,23 +2112,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
return NULL;
}
- setup_vmalloc_vm(area, va, flags, caller);
+ kasan_unpoison_vmalloc((void *)va->va_start, requested_size);
- /*
- * For KASAN, if we are in vmalloc space, we need to cover the shadow
- * area with real memory. If we come here through VM_ALLOC, this is
- * done by a higher level function that has access to the true size,
- * which might not be a full page.
- *
- * We assume module space comes via VM_ALLOC path.
- */
- if (is_vmalloc_addr(area->addr) && !(area->flags & VM_ALLOC)) {
- if (kasan_populate_vmalloc(area->size, area)) {
- unmap_vmap_area(va);
- kfree(area);
- return NULL;
- }
- }
+ setup_vmalloc_vm(area, va, flags, caller);
return area;
}
@@ -2293,8 +2293,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
debug_check_no_locks_freed(area->addr, get_vm_area_size(area));
debug_check_no_obj_freed(area->addr, get_vm_area_size(area));
- if (area->flags & VM_KASAN)
- kasan_poison_vmalloc(area->addr, area->size);
+ kasan_poison_vmalloc(area->addr, area->size);
vm_remove_mappings(area, deallocate_pages);
@@ -2539,7 +2538,7 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!size || (size >> PAGE_SHIFT) > totalram_pages())
goto fail;
- area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
+ area = __get_vm_area_node(real_size, align, VM_ALLOC | VM_UNINITIALIZED |
vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
@@ -2548,11 +2547,6 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!addr)
return NULL;
- if (is_vmalloc_or_module_addr(area->addr)) {
- if (kasan_populate_vmalloc(real_size, area))
- return NULL;
- }
-
/*
* In this function, newly allocated vm_struct has VM_UNINITIALIZED
* flag. It means that vm_struct is not fully initialized.
@@ -3294,7 +3288,7 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets,
struct vmap_area **vas, *va;
struct vm_struct **vms;
int area, area2, last_area, term_area;
- unsigned long base, start, size, end, last_end;
+ unsigned long base, start, size, end, last_end, orig_start, orig_end;
bool purged = false;
enum fit_type type;
@@ -3424,6 +3418,15 @@ retry:
spin_unlock(&free_vmap_area_lock);
+ /* populate the kasan shadow space */
+ for (area = 0; area < nr_vms; area++) {
+ if (kasan_populate_vmalloc(vas[area]->va_start, sizes[area]))
+ goto err_free_shadow;
+
+ kasan_unpoison_vmalloc((void *)vas[area]->va_start,
+ sizes[area]);
+ }
+
/* insert all vm's */
spin_lock(&vmap_area_lock);
for (area = 0; area < nr_vms; area++) {
@@ -3434,12 +3437,6 @@ retry:
}
spin_unlock(&vmap_area_lock);
- /* populate the shadow space outside of the lock */
- for (area = 0; area < nr_vms; area++) {
- /* assume success here */
- kasan_populate_vmalloc(sizes[area], vms[area]);
- }
-
kfree(vas);
return vms;
@@ -3451,8 +3448,12 @@ recovery:
* and when pcpu_get_vm_areas() is success.
*/
while (area--) {
- merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
- &free_vmap_area_list);
+ orig_start = vas[area]->va_start;
+ orig_end = vas[area]->va_end;
+ va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
+ &free_vmap_area_list);
+ kasan_release_vmalloc(orig_start, orig_end,
+ va->va_start, va->va_end);
vas[area] = NULL;
}
@@ -3487,6 +3488,28 @@ err_free2:
kfree(vas);
kfree(vms);
return NULL;
+
+err_free_shadow:
+ spin_lock(&free_vmap_area_lock);
+ /*
+ * We release all the vmalloc shadows, even the ones for regions that
+ * hadn't been successfully added. This relies on kasan_release_vmalloc
+ * being able to tolerate this case.
+ */
+ for (area = 0; area < nr_vms; area++) {
+ orig_start = vas[area]->va_start;
+ orig_end = vas[area]->va_end;
+ va = merge_or_add_vmap_area(vas[area], &free_vmap_area_root,
+ &free_vmap_area_list);
+ kasan_release_vmalloc(orig_start, orig_end,
+ va->va_start, va->va_end);
+ vas[area] = NULL;
+ kfree(vms[area]);
+ }
+ spin_unlock(&free_vmap_area_lock);
+ kfree(vas);
+ kfree(vms);
+ return NULL;
}
/**
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 74e8edce83ca..572fb17c6273 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -387,7 +387,7 @@ void register_shrinker_prepared(struct shrinker *shrinker)
{
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
-#ifdef CONFIG_MEMCG_KMEM
+#ifdef CONFIG_MEMCG
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
idr_replace(&shrinker_idr, shrinker, shrinker->id);
#endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2b2b9aae8a3c..22d17ecfe7df 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -2069,6 +2069,11 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
zs_pool_dec_isolated(pool);
}
+ if (page_zone(newpage) != page_zone(page)) {
+ dec_zone_page_state(page, NR_ZSPAGES);
+ inc_zone_page_state(newpage, NR_ZSPAGES);
+ }
+
reset_page(page);
put_page(page);
page = newpage;
diff --git a/net/802/mrp.c b/net/802/mrp.c
index 2cfdfbfbb2ed..bea6e43d45a0 100644
--- a/net/802/mrp.c
+++ b/net/802/mrp.c
@@ -523,7 +523,7 @@ int mrp_request_join(const struct net_device *dev,
struct mrp_attr *attr;
if (sizeof(struct mrp_skb_cb) + len >
- FIELD_SIZEOF(struct sk_buff, cb))
+ sizeof_field(struct sk_buff, cb))
return -ENOMEM;
spin_lock_bh(&app->lock);
@@ -548,7 +548,7 @@ void mrp_request_leave(const struct net_device *dev,
struct mrp_attr *attr;
if (sizeof(struct mrp_skb_cb) + len >
- FIELD_SIZEOF(struct sk_buff, cb))
+ sizeof_field(struct sk_buff, cb))
return;
spin_lock_bh(&app->lock);
@@ -692,7 +692,7 @@ static int mrp_pdu_parse_vecattr(struct mrp_applicant *app,
* advance to the next event in its Vector.
*/
if (sizeof(struct mrp_skb_cb) + mrp_cb(skb)->mh->attrlen >
- FIELD_SIZEOF(struct sk_buff, cb))
+ sizeof_field(struct sk_buff, cb))
return -1;
if (skb_copy_bits(skb, *offset, mrp_cb(skb)->attrvalue,
mrp_cb(skb)->mh->attrlen) < 0)
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index c46daf09a501..bb7ec1a3915d 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -126,6 +126,7 @@ int vlan_check_real_dev(struct net_device *real_dev,
void vlan_setup(struct net_device *dev);
int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack);
void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
+void vlan_dev_uninit(struct net_device *dev);
bool vlan_dev_inherit_address(struct net_device *dev,
struct net_device *real_dev);
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index e5bff5cc6f97..990b9fde28c6 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -586,7 +586,8 @@ static int vlan_dev_init(struct net_device *dev)
return 0;
}
-static void vlan_dev_uninit(struct net_device *dev)
+/* Note: this function might be called multiple times for the same device. */
+void vlan_dev_uninit(struct net_device *dev)
{
struct vlan_priority_tci_mapping *pm;
struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
@@ -646,8 +647,8 @@ static int vlan_ethtool_get_ts_info(struct net_device *dev,
const struct ethtool_ops *ops = vlan->real_dev->ethtool_ops;
struct phy_device *phydev = vlan->real_dev->phydev;
- if (phydev && phydev->drv && phydev->drv->ts_info) {
- return phydev->drv->ts_info(phydev, info);
+ if (phy_has_tsinfo(phydev)) {
+ return phy_ts_info(phydev, info);
} else if (ops->get_ts_info) {
return ops->get_ts_info(vlan->real_dev, info);
} else {
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index c482a6fe9393..0db85aeb119b 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -108,11 +108,13 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
struct ifla_vlan_flags *flags;
struct ifla_vlan_qos_mapping *m;
struct nlattr *attr;
- int rem;
+ int rem, err;
if (data[IFLA_VLAN_FLAGS]) {
flags = nla_data(data[IFLA_VLAN_FLAGS]);
- vlan_dev_change_flags(dev, flags->flags, flags->mask);
+ err = vlan_dev_change_flags(dev, flags->flags, flags->mask);
+ if (err)
+ return err;
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
@@ -123,7 +125,9 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
if (data[IFLA_VLAN_EGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
m = nla_data(attr);
- vlan_dev_set_egress_priority(dev, m->from, m->to);
+ err = vlan_dev_set_egress_priority(dev, m->from, m->to);
+ if (err)
+ return err;
}
}
return 0;
@@ -179,10 +183,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
return -EINVAL;
err = vlan_changelink(dev, tb, data, extack);
- if (err < 0)
- return err;
-
- return register_vlan_dev(dev, extack);
+ if (!err)
+ err = register_vlan_dev(dev, extack);
+ if (err)
+ vlan_dev_uninit(dev);
+ return err;
}
static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/Kconfig b/net/Kconfig
index bd191f978a23..54916b7adb9b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -108,9 +108,10 @@ config NETWORK_PHY_TIMESTAMPING
bool "Timestamping in PHY devices"
select NET_PTP_CLASSIFY
help
- This allows timestamping of network packets by PHYs with
- hardware timestamping capabilities. This option adds some
- overhead in the transmit and receive paths.
+ This allows timestamping of network packets by PHYs (or
+ other MII bus snooping devices) with hardware timestamping
+ capabilities. This option adds some overhead in the transmit
+ and receive paths.
If you are unsure how to answer this question, answer N.
@@ -448,6 +449,14 @@ config FAILOVER
migration of VMs with direct attached VFs by failing over to the
paravirtual datapath when the VF is unplugged.
+config ETHTOOL_NETLINK
+ bool "Netlink interface for ethtool"
+ default y
+ help
+ An alternative userspace interface for ethtool based on generic
+ netlink. It provides better extensibility and some new features,
+ e.g. notification messages.
+
endif # if NET
# Used by archs to tell that they support BPF JIT compiler plus which flavour.
diff --git a/net/Makefile b/net/Makefile
index 449fc0b221f8..848303d98d3d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_NET) += $(tmp-y)
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
-obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/
+obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ bpf/ ethtool/
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_INET) += ipv4/
obj-$(CONFIG_TLS) += tls/
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 5a77c235a212..b57368e70aab 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -194,7 +194,7 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
dev->stats.tx_bytes += skb->len;
}
-static void lec_tx_timeout(struct net_device *dev)
+static void lec_tx_timeout(struct net_device *dev, unsigned int txqueue)
{
pr_info("%s\n", dev->name);
netif_trans_update(dev);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 324306d6fde0..ff57ea89c27e 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -808,7 +808,7 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
ax25_cb *ax25;
- if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ if (protocol < 0 || protocol > U8_MAX)
return -EINVAL;
if (!net_eq(net, &init_net))
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 4a89177def64..4811ec65bc43 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -548,7 +548,7 @@ static void batadv_recv_handler_init(void)
BUILD_BUG_ON(sizeof(struct batadv_tvlv_tt_change) != 12);
BUILD_BUG_ON(sizeof(struct batadv_tvlv_roam_adv) != 8);
- i = FIELD_SIZEOF(struct sk_buff, cb);
+ i = sizeof_field(struct sk_buff, cb);
BUILD_BUG_ON(sizeof(struct batadv_skb_cb) > i);
/* broadcast packet */
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 1d4d7d415730..cc1cff63194f 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -112,7 +112,7 @@ static int bnep_net_set_mac_addr(struct net_device *dev, void *arg)
return 0;
}
-static void bnep_net_timeout(struct net_device *dev)
+static void bnep_net_timeout(struct net_device *dev, unsigned int txqueue)
{
BT_DBG("net_timeout");
netif_wake_queue(dev);
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 915c2d6f7fb9..d555c0d8657d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -15,7 +15,7 @@
#include <trace/events/bpf_test_run.h>
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
- u32 *retval, u32 *time)
+ u32 *retval, u32 *time, bool xdp)
{
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL };
enum bpf_cgroup_storage_type stype;
@@ -41,7 +41,11 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
time_start = ktime_get_ns();
for (i = 0; i < repeat; i++) {
bpf_cgroup_storage_set(storage);
- *retval = BPF_PROG_RUN(prog, ctx);
+
+ if (xdp)
+ *retval = bpf_prog_run_xdp(prog, ctx);
+ else
+ *retval = BPF_PROG_RUN(prog, ctx);
if (signal_pending(current)) {
ret = -EINTR;
@@ -247,34 +251,53 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
return 0;
/* make sure the fields we don't use are zeroed */
- if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority)))
+ if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark)))
+ return -EINVAL;
+
+ /* mark is allowed */
+
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark),
+ offsetof(struct __sk_buff, priority)))
return -EINVAL;
/* priority is allowed */
- if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) +
- FIELD_SIZEOF(struct __sk_buff, priority),
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
offsetof(struct __sk_buff, cb)))
return -EINVAL;
/* cb is allowed */
- if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) +
- FIELD_SIZEOF(struct __sk_buff, cb),
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
offsetof(struct __sk_buff, tstamp)))
return -EINVAL;
/* tstamp is allowed */
+ /* wire_len is allowed */
+ /* gso_segs is allowed */
- if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) +
- FIELD_SIZEOF(struct __sk_buff, tstamp),
+ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
sizeof(struct __sk_buff)))
return -EINVAL;
+ skb->mark = __skb->mark;
skb->priority = __skb->priority;
skb->tstamp = __skb->tstamp;
memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
+ if (__skb->wire_len == 0) {
+ cb->pkt_len = skb->len;
+ } else {
+ if (__skb->wire_len < skb->len ||
+ __skb->wire_len > GSO_MAX_SIZE)
+ return -EINVAL;
+ cb->pkt_len = __skb->wire_len;
+ }
+
+ if (__skb->gso_segs > GSO_MAX_SEGS)
+ return -EINVAL;
+ skb_shinfo(skb)->gso_segs = __skb->gso_segs;
+
return 0;
}
@@ -285,9 +308,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
if (!__skb)
return;
+ __skb->mark = skb->mark;
__skb->priority = skb->priority;
__skb->tstamp = skb->tstamp;
memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
+ __skb->wire_len = cb->pkt_len;
+ __skb->gso_segs = skb_shinfo(skb)->gso_segs;
}
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
@@ -359,7 +385,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
ret = convert___skb_to_skb(skb, ctx);
if (ret)
goto out;
- ret = bpf_test_run(prog, skb, repeat, &retval, &duration);
+ ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
if (ret)
goto out;
if (!is_l2) {
@@ -416,8 +442,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
xdp.rxq = &rxqueue->xdp_rxq;
-
- ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration);
+ bpf_prog_change_xdp(NULL, prog);
+ ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
if (ret)
goto out;
if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
@@ -425,6 +451,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
size = xdp.data_end - xdp.data;
ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
out:
+ bpf_prog_change_xdp(prog, NULL);
kfree(data);
return ret;
}
@@ -437,8 +464,7 @@ static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
/* flags is allowed */
- if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) +
- FIELD_SIZEOF(struct bpf_flow_keys, flags),
+ if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags),
sizeof(struct bpf_flow_keys)))
return -EINVAL;
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 8a8f9e5f264f..b6fe30e3768f 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -312,7 +312,7 @@ static int __init br_init(void)
{
int err;
- BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct br_input_skb_cb) > sizeof_field(struct sk_buff, cb));
err = stp_proto_register(&br_stp_proto);
if (err < 0) {
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index af7800103e51..59980ecfc962 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -662,6 +662,9 @@ static unsigned int br_nf_forward_arp(void *priv,
nf_bridge_pull_encap_header(skb);
}
+ if (unlikely(!pskb_may_pull(skb, sizeof(struct arphdr))))
+ return NF_DROP;
+
if (arp_hdr(skb)->ar_pln != 4) {
if (is_vlan_arp(skb, state->net))
nf_bridge_push_encap_header(skb);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index a0a54482aabc..60136575aea4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1607,6 +1607,19 @@ static int br_fill_linkxstats(struct sk_buff *skb,
br_multicast_get_stats(br, p, nla_data(nla));
}
#endif
+
+ if (p) {
+ nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_STP,
+ sizeof(p->stp_xstats),
+ BRIDGE_XSTATS_PAD);
+ if (!nla)
+ goto nla_put_failure;
+
+ spin_lock_bh(&br->lock);
+ memcpy(nla_data(nla), &p->stp_xstats, sizeof(p->stp_xstats));
+ spin_unlock_bh(&br->lock);
+ }
+
nla_nest_end(skb, nest);
*prividx = 0;
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
index 2cdfc5d6c25d..8c69f0c95a8e 100644
--- a/net/bridge/br_nf_core.c
+++ b/net/bridge/br_nf_core.c
@@ -22,7 +22,8 @@
#endif
static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 36b0367ca1e0..f540f3bdf294 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -283,6 +283,8 @@ struct net_bridge_port {
#endif
u16 group_fwd_mask;
u16 backup_redirected_cnt;
+
+ struct bridge_stp_xstats stp_xstats;
};
#define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj)
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1f1410f8d312..6856a6d9282b 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -45,6 +45,17 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
br_info(p->br, "port %u(%s) entered %s state\n",
(unsigned int) p->port_no, p->dev->name,
br_port_state_names[p->state]);
+
+ if (p->br->stp_enabled == BR_KERNEL_STP) {
+ switch (p->state) {
+ case BR_STATE_BLOCKING:
+ p->stp_xstats.transition_blk++;
+ break;
+ case BR_STATE_FORWARDING:
+ p->stp_xstats.transition_fwd++;
+ break;
+ }
+ }
}
/* called under bridge lock */
@@ -484,6 +495,8 @@ void br_received_config_bpdu(struct net_bridge_port *p,
struct net_bridge *br;
int was_root;
+ p->stp_xstats.rx_bpdu++;
+
br = p->br;
was_root = br_is_root_bridge(br);
@@ -517,6 +530,8 @@ void br_received_config_bpdu(struct net_bridge_port *p,
/* called under bridge lock */
void br_received_tcn_bpdu(struct net_bridge_port *p)
{
+ p->stp_xstats.rx_tcn++;
+
if (br_is_designated_port(p)) {
br_info(p->br, "port %u(%s) received tcn bpdu\n",
(unsigned int) p->port_no, p->dev->name);
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 7796dd9d42d7..0e4572f31330 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -118,6 +118,8 @@ void br_send_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *bpdu)
br_set_ticks(buf+33, bpdu->forward_delay);
br_send_bpdu(p, buf, 35);
+
+ p->stp_xstats.tx_bpdu++;
}
/* called under bridge lock */
@@ -133,6 +135,8 @@ void br_send_tcn_bpdu(struct net_bridge_port *p)
buf[2] = 0;
buf[3] = BPDU_TYPE_TCN;
br_send_bpdu(p, buf, 4);
+
+ p->stp_xstats.tx_tcn++;
}
/*
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 4096d8a74a2b..e1256e03a9a8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1867,7 +1867,7 @@ static int ebt_buf_count(struct ebt_entries_buf_state *state, unsigned int sz)
}
static int ebt_buf_add(struct ebt_entries_buf_state *state,
- void *data, unsigned int sz)
+ const void *data, unsigned int sz)
{
if (state->buf_kern_start == NULL)
goto count_only;
@@ -1901,7 +1901,7 @@ enum compat_mwt {
EBT_COMPAT_TARGET,
};
-static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
+static int compat_mtw_from_user(const struct compat_ebt_entry_mwt *mwt,
enum compat_mwt compat_mwt,
struct ebt_entries_buf_state *state,
const unsigned char *base)
@@ -1979,22 +1979,23 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
/* return size of all matches, watchers or target, including necessary
* alignment and padding.
*/
-static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
+static int ebt_size_mwt(const struct compat_ebt_entry_mwt *match32,
unsigned int size_left, enum compat_mwt type,
struct ebt_entries_buf_state *state, const void *base)
{
+ const char *buf = (const char *)match32;
int growth = 0;
- char *buf;
if (size_left == 0)
return 0;
- buf = (char *) match32;
-
- while (size_left >= sizeof(*match32)) {
+ do {
struct ebt_entry_match *match_kern;
int ret;
+ if (size_left < sizeof(*match32))
+ return -EINVAL;
+
match_kern = (struct ebt_entry_match *) state->buf_kern_start;
if (match_kern) {
char *tmp;
@@ -2031,22 +2032,18 @@ static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
if (match_kern)
match_kern->match_size = ret;
- /* rule should have no remaining data after target */
- if (type == EBT_COMPAT_TARGET && size_left)
- return -EINVAL;
-
match32 = (struct compat_ebt_entry_mwt *) buf;
- }
+ } while (size_left);
return growth;
}
/* called for all ebt_entry structures. */
-static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
+static int size_entry_mwt(const struct ebt_entry *entry, const unsigned char *base,
unsigned int *total,
struct ebt_entries_buf_state *state)
{
- unsigned int i, j, startoff, new_offset = 0;
+ unsigned int i, j, startoff, next_expected_off, new_offset = 0;
/* stores match/watchers/targets & offset of next struct ebt_entry: */
unsigned int offsets[4];
unsigned int *offsets_update = NULL;
@@ -2132,11 +2129,13 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
return ret;
}
- startoff = state->buf_user_offset - startoff;
+ next_expected_off = state->buf_user_offset - startoff;
+ if (next_expected_off != entry->next_offset)
+ return -EINVAL;
- if (WARN_ON(*total < startoff))
+ if (*total < entry->next_offset)
return -EINVAL;
- *total -= startoff;
+ *total -= entry->next_offset;
return 0;
}
diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c
index de09b0a65791..f7587428febd 100644
--- a/net/can/j1939/socket.c
+++ b/net/can/j1939/socket.c
@@ -423,9 +423,9 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
{
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct j1939_sock *jsk = j1939_sk(sock->sk);
- struct j1939_priv *priv = jsk->priv;
- struct sock *sk = sock->sk;
- struct net *net = sock_net(sk);
+ struct j1939_priv *priv;
+ struct sock *sk;
+ struct net *net;
int ret = 0;
ret = j1939_sk_sanity_check(addr, len);
@@ -434,6 +434,10 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
lock_sock(sock->sk);
+ priv = jsk->priv;
+ sk = sock->sk;
+ net = sock_net(sk);
+
/* Already bound to an interface? */
if (jsk->state & J1939_SOCK_BOUND) {
/* A re-bind() to a different interface is not
diff --git a/net/core/Makefile b/net/core/Makefile
index a104dc8faafc..3e2c378e5f31 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -8,7 +8,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
-obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
+obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
fib_notifier.o xdp.o flow_offload.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 2c277b8aba38..d99f88c58636 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4932,7 +4932,6 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
int *ret, struct net_device *orig_dev)
{
-#ifdef CONFIG_NETFILTER_INGRESS
if (nf_hook_ingress_active(skb)) {
int ingress_retval;
@@ -4946,7 +4945,6 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
rcu_read_unlock();
return ingress_retval;
}
-#endif /* CONFIG_NETFILTER_INGRESS */
return 0;
}
@@ -8542,7 +8540,17 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
struct netlink_ext_ack *extack, u32 flags,
struct bpf_prog *prog)
{
+ bool non_hw = !(flags & XDP_FLAGS_HW_MODE);
+ struct bpf_prog *prev_prog = NULL;
struct netdev_bpf xdp;
+ int err;
+
+ if (non_hw) {
+ prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op,
+ XDP_QUERY_PROG));
+ if (IS_ERR(prev_prog))
+ prev_prog = NULL;
+ }
memset(&xdp, 0, sizeof(xdp));
if (flags & XDP_FLAGS_HW_MODE)
@@ -8553,7 +8561,14 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op,
xdp.flags = flags;
xdp.prog = prog;
- return bpf_op(dev, &xdp);
+ err = bpf_op(dev, &xdp);
+ if (!err && non_hw)
+ bpf_prog_change_xdp(prev_prog, prog);
+
+ if (prev_prog)
+ bpf_prog_put(prev_prog);
+
+ return err;
}
static void dev_xdp_uninstall(struct net_device *dev)
@@ -10165,7 +10180,7 @@ static struct hlist_head * __net_init netdev_create_hash(void)
static int __net_init netdev_init(struct net *net)
{
BUILD_BUG_ON(GRO_HASH_BUCKETS >
- 8 * FIELD_SIZEOF(struct napi_struct, gro_bitmask));
+ 8 * sizeof_field(struct napi_struct, gro_bitmask));
if (net != &init_net)
INIT_LIST_HEAD(&net->dev_base_head);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 5163d900bb4f..dbaebbe573f0 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -187,6 +187,7 @@ static int net_hwtstamp_validate(struct ifreq *ifr)
case HWTSTAMP_TX_OFF:
case HWTSTAMP_TX_ON:
case HWTSTAMP_TX_ONESTEP_SYNC:
+ case HWTSTAMP_TX_ONESTEP_P2P:
tx_type_valid = 1;
break;
}
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4c63c9a4c09e..d30aa47052aa 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4844,21 +4844,12 @@ devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
void
-devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
- enum devlink_health_reporter_state state)
+devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter)
{
- if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
- state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
- return;
-
- if (reporter->health_state == state)
- return;
-
- reporter->health_state = state;
- trace_devlink_health_reporter_state_update(reporter->devlink,
- reporter->ops->name, state);
+ reporter->recovery_count++;
+ reporter->last_recovery_ts = jiffies;
}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
+EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done);
static int
devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
@@ -4876,9 +4867,8 @@ devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
if (err)
return err;
- reporter->recovery_count++;
+ devlink_health_reporter_recovery_done(reporter);
reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
- reporter->last_recovery_ts = jiffies;
return 0;
}
@@ -5090,6 +5080,48 @@ genlmsg_cancel:
return -EMSGSIZE;
}
+static void devlink_recover_notify(struct devlink_health_reporter *reporter,
+ enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_health_reporter_fill(msg, reporter->devlink,
+ reporter, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family,
+ devlink_net(reporter->devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+ enum devlink_health_reporter_state state)
+{
+ if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
+ state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+ return;
+
+ if (reporter->health_state == state)
+ return;
+
+ reporter->health_state = state;
+ trace_devlink_health_reporter_state_update(reporter->devlink,
+ reporter->ops->name, state);
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
+
static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
diff --git a/net/core/filter.c b/net/core/filter.c
index f1e703eed3d2..ef01c5599501 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -274,7 +274,7 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
switch (skb_field) {
case SKF_AD_MARK:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
offsetof(struct sk_buff, mark));
@@ -289,14 +289,14 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
break;
case SKF_AD_QUEUE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
offsetof(struct sk_buff, queue_mapping));
break;
case SKF_AD_VLAN_TAG:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
@@ -322,7 +322,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
switch (fp->k) {
case SKF_AD_OFF + SKF_AD_PROTOCOL:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);
/* A = *(u16 *) (CTX + offsetof(protocol)) */
*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
@@ -338,8 +338,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
case SKF_AD_OFF + SKF_AD_IFINDEX:
case SKF_AD_OFF + SKF_AD_HATYPE:
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
+ BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
+ BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
BPF_REG_TMP, BPF_REG_CTX,
@@ -361,7 +361,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_RXHASH:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
offsetof(struct sk_buff, hash));
@@ -385,7 +385,7 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
break;
case SKF_AD_OFF + SKF_AD_VLAN_TPID:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);
/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
@@ -2055,6 +2055,7 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
}
skb->dev = dev;
+ skb->tstamp = 0;
dev_xmit_recursion_inc();
ret = dev_queue_xmit(skb);
@@ -3510,36 +3511,16 @@ err:
}
static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
- struct bpf_map *map,
- struct xdp_buff *xdp,
- u32 index)
+ struct bpf_map *map, struct xdp_buff *xdp)
{
- int err;
-
switch (map->map_type) {
case BPF_MAP_TYPE_DEVMAP:
- case BPF_MAP_TYPE_DEVMAP_HASH: {
- struct bpf_dtab_netdev *dst = fwd;
-
- err = dev_map_enqueue(dst, xdp, dev_rx);
- if (unlikely(err))
- return err;
- break;
- }
- case BPF_MAP_TYPE_CPUMAP: {
- struct bpf_cpu_map_entry *rcpu = fwd;
-
- err = cpu_map_enqueue(rcpu, xdp, dev_rx);
- if (unlikely(err))
- return err;
- break;
- }
- case BPF_MAP_TYPE_XSKMAP: {
- struct xdp_sock *xs = fwd;
-
- err = __xsk_map_redirect(map, xdp, xs);
- return err;
- }
+ case BPF_MAP_TYPE_DEVMAP_HASH:
+ return dev_map_enqueue(fwd, xdp, dev_rx);
+ case BPF_MAP_TYPE_CPUMAP:
+ return cpu_map_enqueue(fwd, xdp, dev_rx);
+ case BPF_MAP_TYPE_XSKMAP:
+ return __xsk_map_redirect(fwd, xdp);
default:
break;
}
@@ -3548,26 +3529,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
void xdp_do_flush_map(void)
{
- struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
- struct bpf_map *map = ri->map_to_flush;
-
- ri->map_to_flush = NULL;
- if (map) {
- switch (map->map_type) {
- case BPF_MAP_TYPE_DEVMAP:
- case BPF_MAP_TYPE_DEVMAP_HASH:
- __dev_map_flush(map);
- break;
- case BPF_MAP_TYPE_CPUMAP:
- __cpu_map_flush(map);
- break;
- case BPF_MAP_TYPE_XSKMAP:
- __xsk_map_flush(map);
- break;
- default:
- break;
- }
- }
+ __dev_map_flush();
+ __cpu_map_flush();
+ __xsk_map_flush();
}
EXPORT_SYMBOL_GPL(xdp_do_flush_map);
@@ -3616,14 +3580,10 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
ri->tgt_value = NULL;
WRITE_ONCE(ri->map, NULL);
- if (ri->map_to_flush && unlikely(ri->map_to_flush != map))
- xdp_do_flush_map();
-
- err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
+ err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
if (unlikely(err))
goto err;
- ri->map_to_flush = map;
_trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
return 0;
err:
@@ -5589,8 +5549,8 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD) > \
- FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+ BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) > \
+ sizeof_field(struct bpf_tcp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct tcp_sock, FIELD)); \
@@ -5598,9 +5558,9 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
#define BPF_INET_SOCK_GET_COMMON(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct inet_connection_sock, \
+ BUILD_BUG_ON(sizeof_field(struct inet_connection_sock, \
FIELD) > \
- FIELD_SIZEOF(struct bpf_tcp_sock, FIELD)); \
+ sizeof_field(struct bpf_tcp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct inet_connection_sock, \
FIELD), \
@@ -5615,7 +5575,7 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
switch (si->off) {
case offsetof(struct bpf_tcp_sock, rtt_min):
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
sizeof(struct minmax));
BUILD_BUG_ON(sizeof(struct minmax) <
sizeof(struct minmax_sample));
@@ -5780,8 +5740,8 @@ u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
#define BPF_XDP_SOCK_GET(FIELD) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_sock, FIELD) > \
- FIELD_SIZEOF(struct bpf_xdp_sock, FIELD)); \
+ BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) > \
+ sizeof_field(struct bpf_xdp_sock, FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
si->dst_reg, si->src_reg, \
offsetof(struct xdp_sock, FIELD)); \
@@ -7344,7 +7304,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, cb[0]) ...
offsetofend(struct __sk_buff, cb[4]) - 1:
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+ BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
offsetof(struct qdisc_skb_cb, data)) %
sizeof(__u64));
@@ -7363,7 +7323,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, tc_classid):
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
+ BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2);
off = si->off;
off -= offsetof(struct __sk_buff, tc_classid);
@@ -7434,7 +7394,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
#endif
break;
case offsetof(struct __sk_buff, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@ -7445,7 +7405,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
2, target_size));
break;
case offsetof(struct __sk_buff, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@ -7456,7 +7416,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
4, target_size));
break;
case offsetof(struct __sk_buff, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
@@ -7470,7 +7430,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, remote_ip6[0]) ...
offsetof(struct __sk_buff, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@ -7490,7 +7450,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct __sk_buff, local_ip6[0]) ...
offsetof(struct __sk_buff, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@ -7509,7 +7469,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@ -7524,7 +7484,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
si->dst_reg, si->src_reg,
@@ -7535,7 +7495,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct __sk_buff, tstamp):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8);
+ BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_DW,
@@ -7573,7 +7533,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
target_size));
break;
case offsetof(struct __sk_buff, wire_len):
- BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, pkt_len) != 4);
+ BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
off = si->off;
off -= offsetof(struct __sk_buff, wire_len);
@@ -7603,7 +7563,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
switch (si->off) {
case offsetof(struct bpf_sock, bound_dev_if):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@ -7614,7 +7574,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, mark):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@ -7625,7 +7585,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, priority):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
if (type == BPF_WRITE)
*insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
@@ -7641,34 +7601,34 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common,
skc_family,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_family),
target_size));
break;
case offsetof(struct bpf_sock, type):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
- *target_size = 2;
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_type),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_type,
+ sizeof_field(struct sock, sk_type),
+ target_size));
break;
case offsetof(struct bpf_sock, protocol):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
- *target_size = 1;
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_protocol),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_protocol,
+ sizeof_field(struct sock, sk_protocol),
+ target_size));
break;
case offsetof(struct bpf_sock, src_ip4):
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_rcv_saddr,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_rcv_saddr),
target_size));
break;
@@ -7677,7 +7637,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_daddr,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_daddr),
target_size));
break;
@@ -7691,7 +7651,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(
struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0],
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]),
target_size) + off);
#else
@@ -7708,7 +7668,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BPF_SIZE(si->code), si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common,
skc_v6_daddr.s6_addr32[0],
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_v6_daddr.s6_addr32[0]),
target_size) + off);
#else
@@ -7722,7 +7682,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct sock_common, skc_num),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_num,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_num),
target_size));
break;
@@ -7732,7 +7692,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_dport,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_dport),
target_size));
break;
@@ -7742,7 +7702,7 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
BPF_FIELD_SIZEOF(struct sock_common, skc_state),
si->dst_reg, si->src_reg,
bpf_target_off(struct sock_common, skc_state,
- FIELD_SIZEOF(struct sock_common,
+ sizeof_field(struct sock_common,
skc_state),
target_size));
break;
@@ -7837,7 +7797,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
si->src_reg, offsetof(S, F)); \
*insn++ = BPF_LDX_MEM( \
SIZE, si->dst_reg, si->dst_reg, \
- bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ bpf_target_off(NS, NF, sizeof_field(NS, NF), \
target_size) \
+ OFF); \
} while (0)
@@ -7868,7 +7828,7 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
si->dst_reg, offsetof(S, F)); \
*insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
- bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \
+ bpf_target_off(NS, NF, sizeof_field(NS, NF), \
target_size) \
+ OFF); \
*insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
@@ -7930,8 +7890,8 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
*/
BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
offsetof(struct sockaddr_in6, sin6_port));
- BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
- FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
+ BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
+ sizeof_field(struct sockaddr_in6, sin6_port));
SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
struct sockaddr_in6, uaddr,
sin6_port, tmp_reg);
@@ -7943,20 +7903,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_addr, type):
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
- struct bpf_sock_addr_kern, struct sock, sk,
- __sk_flags_offset, BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_type);
break;
case offsetof(struct bpf_sock_addr, protocol):
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
- struct bpf_sock_addr_kern, struct sock, sk,
- __sk_flags_offset, BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
- SK_FL_PROTO_SHIFT);
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_protocol);
break;
case offsetof(struct bpf_sock_addr, msg_src_ip4):
@@ -7997,8 +7950,8 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
/* Helper macro for adding read access to tcp_sock or sock fields. */
#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
- BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
- FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
+ sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
struct bpf_sock_ops_kern, \
is_fullsock), \
@@ -8031,8 +7984,8 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
do { \
int reg = BPF_REG_9; \
- BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) > \
- FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD)); \
+ BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
+ sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
if (si->dst_reg == reg || si->src_reg == reg) \
reg--; \
if (si->dst_reg == reg || si->src_reg == reg) \
@@ -8073,12 +8026,12 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
switch (si->off) {
case offsetof(struct bpf_sock_ops, op) ...
offsetof(struct bpf_sock_ops, replylong[3]):
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
- BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
- FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
+ BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, op) !=
+ sizeof_field(struct bpf_sock_ops_kern, op));
+ BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
+ sizeof_field(struct bpf_sock_ops_kern, reply));
+ BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
+ sizeof_field(struct bpf_sock_ops_kern, replylong));
off = si->off;
off -= offsetof(struct bpf_sock_ops, op);
off += offsetof(struct bpf_sock_ops_kern, op);
@@ -8091,7 +8044,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8102,7 +8055,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8113,7 +8066,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
@@ -8128,7 +8081,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
offsetof(struct bpf_sock_ops, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@ -8149,7 +8102,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
offsetof(struct bpf_sock_ops, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@ -8168,7 +8121,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8182,7 +8135,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8202,7 +8155,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, state):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct bpf_sock_ops_kern, sk),
@@ -8213,7 +8166,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_ops, rtt_min):
- BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
+ BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
sizeof(struct minmax));
BUILD_BUG_ON(sizeof(struct minmax) <
sizeof(struct minmax_sample));
@@ -8224,7 +8177,7 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
offsetof(struct bpf_sock_ops_kern, sk));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct tcp_sock, rtt_min) +
- FIELD_SIZEOF(struct minmax_sample, t));
+ sizeof_field(struct minmax_sample, t));
break;
case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
@@ -8366,7 +8319,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
offsetof(struct sk_msg, data_end));
break;
case offsetof(struct sk_msg_md, family):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@ -8377,7 +8330,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_msg_md, remote_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@ -8388,7 +8341,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_msg_md, local_ip4):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_rcv_saddr) != 4);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
@@ -8403,7 +8356,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct sk_msg_md, remote_ip6[0]) ...
offsetof(struct sk_msg_md, remote_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_daddr.s6_addr32[0]) != 4);
off = si->off;
@@ -8424,7 +8377,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
case offsetof(struct sk_msg_md, local_ip6[0]) ...
offsetof(struct sk_msg_md, local_ip6[3]):
#if IS_ENABLED(CONFIG_IPV6)
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
+ BUILD_BUG_ON(sizeof_field(struct sock_common,
skc_v6_rcv_saddr.s6_addr32[0]) != 4);
off = si->off;
@@ -8443,7 +8396,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_msg_md, remote_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@ -8457,7 +8410,7 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_msg_md, local_port):
- BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
+ BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
struct sk_msg, sk),
@@ -8847,7 +8800,7 @@ sk_reuseport_is_valid_access(int off, int size,
/* Fields that allow narrowing */
case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
- if (size < FIELD_SIZEOF(struct sk_buff, protocol))
+ if (size < sizeof_field(struct sk_buff, protocol))
return false;
/* fall through */
case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
@@ -8865,7 +8818,7 @@ sk_reuseport_is_valid_access(int off, int size,
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
si->dst_reg, si->src_reg, \
bpf_target_off(struct sk_reuseport_kern, F, \
- FIELD_SIZEOF(struct sk_reuseport_kern, F), \
+ sizeof_field(struct sk_reuseport_kern, F), \
target_size)); \
})
@@ -8875,11 +8828,11 @@ sk_reuseport_is_valid_access(int off, int size,
skb, \
SKB_FIELD)
-#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \
- struct sock, \
- sk, \
- SK_FIELD, BPF_SIZE, EXTRA_OFF)
+#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
+ struct sock, \
+ sk, \
+ SK_FIELD)
static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
@@ -8903,16 +8856,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_reuseport_md, ip_protocol):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
- SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
- BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
- SK_FL_PROTO_SHIFT);
- /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
- * aware. No further narrowing or masking is needed.
- */
- *target_size = 1;
+ SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
break;
case offsetof(struct sk_reuseport_md, data_end):
@@ -8940,3 +8884,11 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
const struct bpf_prog_ops sk_reuseport_prog_ops = {
};
#endif /* CONFIG_INET */
+
+DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp)
+
+void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
+{
+ bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp),
+ prev_prog, prog);
+}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index d524a693e00f..f560b4902060 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -233,7 +233,7 @@ static bool icmp_has_id(u8 type)
* @skb: sk_buff to extract from
* @key_icmp: struct flow_dissector_key_icmp to fill
* @data: raw buffer pointer to the packet
- * @toff: offset to extract at
+ * @thoff: offset to extract at
* @hlen: packet header length
*/
void skb_flow_get_icmp_tci(const struct sk_buff *skb,
@@ -599,8 +599,8 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
offset += sizeof(struct gre_base_hdr);
if (hdr->flags & GRE_CSUM)
- offset += FIELD_SIZEOF(struct gre_full_hdr, csum) +
- FIELD_SIZEOF(struct gre_full_hdr, reserved1);
+ offset += sizeof_field(struct gre_full_hdr, csum) +
+ sizeof_field(struct gre_full_hdr, reserved1);
if (hdr->flags & GRE_KEY) {
const __be32 *keyid;
@@ -622,11 +622,11 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
else
key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK;
}
- offset += FIELD_SIZEOF(struct gre_full_hdr, key);
+ offset += sizeof_field(struct gre_full_hdr, key);
}
if (hdr->flags & GRE_SEQ)
- offset += FIELD_SIZEOF(struct pptp_gre_header, seq);
+ offset += sizeof_field(struct pptp_gre_header, seq);
if (gre_ver == 0) {
if (*p_proto == htons(ETH_P_TEB)) {
@@ -653,7 +653,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb,
u8 *ppp_hdr;
if (hdr->flags & GRE_ACK)
- offset += FIELD_SIZEOF(struct pptp_gre_header, ack);
+ offset += sizeof_field(struct pptp_gre_header, ack);
ppp_hdr = __skb_header_pointer(skb, *p_nhoff + offset,
sizeof(_ppp_hdr),
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 652da6369037..920784a9b7ff 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -98,9 +98,6 @@ static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
static void neigh_cleanup_and_release(struct neighbour *neigh)
{
- if (neigh->parms->neigh_cleanup)
- neigh->parms->neigh_cleanup(neigh);
-
trace_neigh_cleanup_and_release(neigh, 0);
__neigh_notify(neigh, RTM_DELNEIGH, 0, 0);
call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5c4624298996..4c826b8bf9b1 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -919,14 +919,17 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
struct kobject *kobj = &queue->kobj;
int error = 0;
+ /* Kobject_put later will trigger rx_queue_release call which
+ * decreases dev refcount: Take that reference here
+ */
+ dev_hold(queue->dev);
+
kobj->kset = dev->queues_kset;
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
"rx-%u", index);
if (error)
goto err;
- dev_hold(queue->dev);
-
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
if (error)
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index a6aefe989043..9b7cbe35df37 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -96,10 +96,65 @@ struct page_pool *page_pool_create(const struct page_pool_params *params)
}
EXPORT_SYMBOL(page_pool_create);
+static void __page_pool_return_page(struct page_pool *pool, struct page *page);
+
+noinline
+static struct page *page_pool_refill_alloc_cache(struct page_pool *pool,
+ bool refill)
+{
+ struct ptr_ring *r = &pool->ring;
+ struct page *page;
+ int pref_nid; /* preferred NUMA node */
+
+ /* Quicker fallback, avoid locks when ring is empty */
+ if (__ptr_ring_empty(r))
+ return NULL;
+
+ /* Softirq guarantee CPU and thus NUMA node is stable. This,
+ * assumes CPU refilling driver RX-ring will also run RX-NAPI.
+ */
+#ifdef CONFIG_NUMA
+ pref_nid = (pool->p.nid == NUMA_NO_NODE) ? numa_mem_id() : pool->p.nid;
+#else
+ /* Ignore pool->p.nid setting if !CONFIG_NUMA, helps compiler */
+ pref_nid = numa_mem_id(); /* will be zero like page_to_nid() */
+#endif
+
+ /* Slower-path: Get pages from locked ring queue */
+ spin_lock(&r->consumer_lock);
+
+ /* Refill alloc array, but only if NUMA match */
+ do {
+ page = __ptr_ring_consume(r);
+ if (unlikely(!page))
+ break;
+
+ if (likely(page_to_nid(page) == pref_nid)) {
+ pool->alloc.cache[pool->alloc.count++] = page;
+ } else {
+ /* NUMA mismatch;
+ * (1) release 1 page to page-allocator and
+ * (2) break out to fallthrough to alloc_pages_node.
+ * This limit stress on page buddy alloactor.
+ */
+ __page_pool_return_page(pool, page);
+ page = NULL;
+ break;
+ }
+ } while (pool->alloc.count < PP_ALLOC_CACHE_REFILL &&
+ refill);
+
+ /* Return last page */
+ if (likely(pool->alloc.count > 0))
+ page = pool->alloc.cache[--pool->alloc.count];
+
+ spin_unlock(&r->consumer_lock);
+ return page;
+}
+
/* fast path */
static struct page *__page_pool_get_cached(struct page_pool *pool)
{
- struct ptr_ring *r = &pool->ring;
bool refill = false;
struct page *page;
@@ -113,20 +168,7 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
refill = true;
}
- /* Quicker fallback, avoid locks when ring is empty */
- if (__ptr_ring_empty(r))
- return NULL;
-
- /* Slow-path: Get page from locked ring queue,
- * refill alloc array if requested.
- */
- spin_lock(&r->consumer_lock);
- page = __ptr_ring_consume(r);
- if (refill)
- pool->alloc.count = __ptr_ring_consume_batched(r,
- pool->alloc.cache,
- PP_ALLOC_CACHE_REFILL);
- spin_unlock(&r->consumer_lock);
+ page = page_pool_refill_alloc_cache(pool, refill);
return page;
}
@@ -163,7 +205,11 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
*/
/* Cache was empty, do real allocation */
+#ifdef CONFIG_NUMA
page = alloc_pages_node(pool->p.nid, gfp, pool->p.order);
+#else
+ page = alloc_pages(gfp, pool->p.order);
+#endif
if (!page)
return NULL;
@@ -311,13 +357,10 @@ static bool __page_pool_recycle_direct(struct page *page,
/* page is NOT reusable when:
* 1) allocated when system is under some pressure. (page_is_pfmemalloc)
- * 2) belongs to a different NUMA node than pool->p.nid.
- *
- * To update pool->p.nid users must call page_pool_update_nid.
*/
static bool pool_page_reusable(struct page_pool *pool, struct page *page)
{
- return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid;
+ return !page_is_pfmemalloc(page);
}
void __page_pool_put_page(struct page_pool *pool, struct page *page,
@@ -484,7 +527,15 @@ EXPORT_SYMBOL(page_pool_destroy);
/* Caller must provide appropriate safe context, e.g. NAPI. */
void page_pool_update_nid(struct page_pool *pool, int new_nid)
{
+ struct page *page;
+
trace_page_pool_update_nid(pool, new_nid);
pool->p.nid = new_nid;
+
+ /* Flush pool alloc cache, as refill will check NUMA node */
+ while (pool->alloc.count) {
+ page = pool->alloc.cache[--pool->alloc.count];
+ __page_pool_return_page(pool, page);
+ }
}
EXPORT_SYMBOL(page_pool_update_nid);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 02916f43bf63..20bc406f3871 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1041,6 +1041,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_MIN_MTU */
+ nla_total_size(4) /* IFLA_MAX_MTU */
+ rtnl_prop_list_size(dev)
+ + nla_total_size(MAX_ADDR_LEN) /* IFLA_PERM_ADDRESS */
+ 0;
}
@@ -1757,6 +1758,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_s32(skb, IFLA_NEW_IFINDEX, new_ifindex) < 0)
goto nla_put_failure;
+ if (memchr_inv(dev->perm_addr, '\0', dev->addr_len) &&
+ nla_put(skb, IFLA_PERM_ADDRESS, dev->addr_len, dev->perm_addr))
+ goto nla_put_failure;
rcu_read_lock();
if (rtnl_fill_link_af(skb, dev, ext_filter_mask))
@@ -1822,6 +1826,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PROP_LIST] = { .type = NLA_NESTED },
[IFLA_ALT_IFNAME] = { .type = NLA_STRING,
.len = ALTIFNAMSIZ - 1 },
+ [IFLA_PERM_ADDRESS] = { .type = NLA_REJECT },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 973a71f4bc89..48a7029529c9 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -68,6 +68,7 @@
#include <net/ip6_checksum.h>
#include <net/xfrm.h>
#include <net/mpls.h>
+#include <net/mptcp.h>
#include <linux/uaccess.h>
#include <trace/events/skb.h>
@@ -4109,6 +4110,9 @@ static const u8 skb_ext_type_len[] = {
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
[TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ [SKB_EXT_MPTCP] = SKB_EXT_CHUNKSIZEOF(struct mptcp_ext),
+#endif
};
static __always_inline unsigned int skb_ext_total_length(void)
@@ -4123,6 +4127,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
skb_ext_type_len[TC_SKB_EXT] +
#endif
+#if IS_ENABLED(CONFIG_MPTCP)
+ skb_ext_type_len[SKB_EXT_MPTCP] +
+#endif
0;
}
@@ -5472,12 +5479,15 @@ static void skb_mod_eth_type(struct sk_buff *skb, struct ethhdr *hdr,
}
/**
- * skb_mpls_push() - push a new MPLS header after the mac header
+ * skb_mpls_push() - push a new MPLS header after mac_len bytes from start of
+ * the packet
*
* @skb: buffer
* @mpls_lse: MPLS label stack entry to push
* @mpls_proto: ethertype of the new MPLS header (expects 0x8847 or 0x8848)
* @mac_len: length of the MAC header
+ * @ethernet: flag to indicate if the resulting packet after skb_mpls_push is
+ * ethernet
*
* Expects skb->data at mac header.
*
@@ -5501,7 +5511,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
return err;
if (!skb->inner_protocol) {
- skb_set_inner_network_header(skb, mac_len);
+ skb_set_inner_network_header(skb, skb_network_offset(skb));
skb_set_inner_protocol(skb, skb->protocol);
}
@@ -5510,6 +5520,7 @@ int skb_mpls_push(struct sk_buff *skb, __be32 mpls_lse, __be16 mpls_proto,
mac_len);
skb_reset_mac_header(skb);
skb_set_network_header(skb, mac_len);
+ skb_reset_mac_len(skb);
lse = mpls_hdr(skb);
lse->label_stack_entry = mpls_lse;
@@ -5529,7 +5540,7 @@ EXPORT_SYMBOL_GPL(skb_mpls_push);
* @skb: buffer
* @next_proto: ethertype of header after popped MPLS header
* @mac_len: length of the MAC header
- * @ethernet: flag to indicate if ethernet header is present in packet
+ * @ethernet: flag to indicate if the packet is ethernet
*
* Expects skb->data at mac header.
*
@@ -5976,7 +5987,14 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
return (void *)ext + (ext->offset[id] * SKB_EXT_ALIGN_VALUE);
}
-static struct skb_ext *skb_ext_alloc(void)
+/**
+ * __skb_ext_alloc - allocate a new skb extensions storage
+ *
+ * Returns the newly allocated pointer. The pointer can later attached to a
+ * skb via __skb_ext_set().
+ * Note: caller must handle the skb_ext as an opaque data.
+ */
+struct skb_ext *__skb_ext_alloc(void)
{
struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
@@ -6017,6 +6035,30 @@ static struct skb_ext *skb_ext_maybe_cow(struct skb_ext *old,
}
/**
+ * __skb_ext_set - attach the specified extension storage to this skb
+ * @skb: buffer
+ * @id: extension id
+ * @ext: extension storage previously allocated via __skb_ext_alloc()
+ *
+ * Existing extensions, if any, are cleared.
+ *
+ * Returns the pointer to the extension.
+ */
+void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
+ struct skb_ext *ext)
+{
+ unsigned int newlen, newoff = SKB_EXT_CHUNKSIZEOF(*ext);
+
+ skb_ext_put(skb);
+ newlen = newoff + skb_ext_type_len[id];
+ ext->chunks = newlen;
+ ext->offset[id] = newoff;
+ skb->extensions = ext;
+ skb->active_extensions = 1 << id;
+ return skb_ext_get_ptr(ext, id);
+}
+
+/**
* skb_ext_add - allocate space for given extension, COW if needed
* @skb: buffer
* @id: extension to allocate space for
@@ -6049,7 +6091,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
} else {
newoff = SKB_EXT_CHUNKSIZEOF(*new);
- new = skb_ext_alloc();
+ new = __skb_ext_alloc();
if (!new)
return NULL;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 043db3ce023e..8459ad579f73 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2916,7 +2916,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_max_pacing_rate = ~0UL;
sk->sk_pacing_rate = ~0UL;
- sk->sk_pacing_shift = 10;
+ WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
sk_rx_queue_clear(sk);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index eb29e5adc84d..9f9e00ba3ad7 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -288,6 +288,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
return ret;
}
+# ifdef CONFIG_HAVE_EBPF_JIT
static int
proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
@@ -298,6 +299,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
+# endif /* CONFIG_HAVE_EBPF_JIT */
static int
proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 7911235706a9..04840697fe79 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -13,7 +13,7 @@
static unsigned int classify(const struct sk_buff *skb)
{
if (likely(skb->dev && skb->dev->phydev &&
- skb->dev->phydev->drv))
+ skb->dev->phydev->mii_ts))
return ptp_classify_raw(skb);
else
return PTP_CLASS_NONE;
@@ -21,7 +21,7 @@ static unsigned int classify(const struct sk_buff *skb)
void skb_clone_tx_timestamp(struct sk_buff *skb)
{
- struct phy_device *phydev;
+ struct mii_timestamper *mii_ts;
struct sk_buff *clone;
unsigned int type;
@@ -32,22 +32,22 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return;
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->txtstamp)) {
+ mii_ts = skb->dev->phydev->mii_ts;
+ if (likely(mii_ts->txtstamp)) {
clone = skb_clone_sk(skb);
if (!clone)
return;
- phydev->drv->txtstamp(phydev, clone, type);
+ mii_ts->txtstamp(mii_ts, clone, type);
}
}
EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp);
bool skb_defer_rx_timestamp(struct sk_buff *skb)
{
- struct phy_device *phydev;
+ struct mii_timestamper *mii_ts;
unsigned int type;
- if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->drv)
+ if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts)
return false;
if (skb_headroom(skb) < ETH_HLEN)
@@ -62,9 +62,9 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
if (type == PTP_CLASS_NONE)
return false;
- phydev = skb->dev->phydev;
- if (likely(phydev->drv->rxtstamp))
- return phydev->drv->rxtstamp(phydev, skb, type);
+ mii_ts = skb->dev->phydev->mii_ts;
+ if (likely(mii_ts->rxtstamp))
+ return mii_ts->rxtstamp(mii_ts, skb, type);
return false;
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 7c8390ad4dc6..8310714c47fd 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -36,7 +36,7 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
const u32 *k = data;
const u32 key = *k;
- BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
+ BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator, mem.id)
!= sizeof(u32));
/* Use cyclic increasing ID as direct hash key */
@@ -56,7 +56,7 @@ static const struct rhashtable_params mem_id_rht_params = {
.nelem_hint = 64,
.head_offset = offsetof(struct xdp_mem_allocator, node),
.key_offset = offsetof(struct xdp_mem_allocator, mem.id),
- .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
+ .key_len = sizeof_field(struct xdp_mem_allocator, mem.id),
.max_size = MEM_ID_MAX,
.min_size = 8,
.automatic_shrinking = true,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a52e8ba1ced0..4af8a98fe784 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1132,7 +1132,7 @@ static int __init dccp_init(void)
int rc;
BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
if (rc)
goto out_fail;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index e19a92a62e14..0a46ea3bddd5 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -670,7 +670,7 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
- if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ if (protocol < 0 || protocol > U8_MAX)
return -EINVAL;
if (!net_eq(net, &init_net))
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index aea918135ec3..08c3dc45f1a4 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -110,7 +110,8 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how);
static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
static void dn_dst_link_failure(struct sk_buff *);
static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb , u32 mtu);
+ struct sk_buff *skb , u32 mtu,
+ bool confirm_neigh);
static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst,
@@ -251,7 +252,8 @@ static int dn_dst_gc(struct dst_ops *ops)
* advertise to the other end).
*/
static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
struct dn_route *rt = (struct dn_route *) dst;
struct neighbour *n = rt->n;
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 1e6c3cac11e6..92663dcb3aa2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -29,6 +29,12 @@ config NET_DSA_TAG_8021Q
Drivers which use these helpers should select this as dependency.
+config NET_DSA_TAG_AR9331
+ tristate "Tag driver for Atheros AR9331 SoC with built-in switch"
+ help
+ Say Y or M if you want to enable support for tagging frames for
+ the Atheros AR9331 SoC with built-in switch.
+
config NET_DSA_TAG_BRCM_COMMON
tristate
default n
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 9a482c38bdb1..108486cfdeef 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -5,6 +5,7 @@ dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o
# tagging formats
obj-$(CONFIG_NET_DSA_TAG_8021Q) += tag_8021q.o
+obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o
obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o
obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o
obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 9ef2caa13f27..c6d81f2baf4e 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -124,7 +124,8 @@ static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst,
return NULL;
}
-struct dsa_link *dsa_link_touch(struct dsa_port *dp, struct dsa_port *link_dp)
+static struct dsa_link *dsa_link_touch(struct dsa_port *dp,
+ struct dsa_port *link_dp)
{
struct dsa_switch *ds = dp->ds;
struct dsa_switch_tree *dst;
@@ -613,6 +614,32 @@ static int dsa_port_parse_dsa(struct dsa_port *dp)
return 0;
}
+static enum dsa_tag_protocol dsa_get_tag_protocol(struct dsa_port *dp,
+ struct net_device *master)
+{
+ enum dsa_tag_protocol tag_protocol = DSA_TAG_PROTO_NONE;
+ struct dsa_switch *mds, *ds = dp->ds;
+ unsigned int mdp_upstream;
+ struct dsa_port *mdp;
+
+ /* It is possible to stack DSA switches onto one another when that
+ * happens the switch driver may want to know if its tagging protocol
+ * is going to work in such a configuration.
+ */
+ if (dsa_slave_dev_check(master)) {
+ mdp = dsa_slave_to_port(master);
+ mds = mdp->ds;
+ mdp_upstream = dsa_upstream_port(mds, mdp->index);
+ tag_protocol = mds->ops->get_tag_protocol(mds, mdp_upstream,
+ DSA_TAG_PROTO_NONE);
+ }
+
+ /* If the master device is not itself a DSA slave in a disjoint DSA
+ * tree, then return immediately.
+ */
+ return ds->ops->get_tag_protocol(ds, dp->index, tag_protocol);
+}
+
static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
{
struct dsa_switch *ds = dp->ds;
@@ -620,20 +647,21 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
const struct dsa_device_ops *tag_ops;
enum dsa_tag_protocol tag_protocol;
- tag_protocol = ds->ops->get_tag_protocol(ds, dp->index);
+ tag_protocol = dsa_get_tag_protocol(dp, master);
tag_ops = dsa_tag_driver_get(tag_protocol);
if (IS_ERR(tag_ops)) {
if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
return -EPROBE_DEFER;
dev_warn(ds->dev, "No tagger for this switch\n");
+ dp->master = NULL;
return PTR_ERR(tag_ops);
}
+ dp->master = master;
dp->type = DSA_PORT_TYPE_CPU;
dp->filter = tag_ops->filter;
dp->rcv = tag_ops->rcv;
dp->tag_ops = tag_ops;
- dp->master = master;
dp->dst = dst;
return 0;
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 2dd86d9bcda9..a7662e7a691d 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -150,22 +150,6 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags);
int dsa_port_vid_del(struct dsa_port *dp, u16 vid);
int dsa_port_link_register_of(struct dsa_port *dp);
void dsa_port_link_unregister_of(struct dsa_port *dp);
-void dsa_port_phylink_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state);
-void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state);
-void dsa_port_phylink_mac_config(struct phylink_config *config,
- unsigned int mode,
- const struct phylink_link_state *state);
-void dsa_port_phylink_mac_an_restart(struct phylink_config *config);
-void dsa_port_phylink_mac_link_down(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface);
-void dsa_port_phylink_mac_link_up(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface,
- struct phy_device *phydev);
extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
/* slave.c */
@@ -173,13 +157,12 @@ extern const struct dsa_device_ops notag_netdev_ops;
void dsa_slave_mii_bus_init(struct dsa_switch *ds);
int dsa_slave_create(struct dsa_port *dp);
void dsa_slave_destroy(struct net_device *slave_dev);
+bool dsa_slave_dev_check(const struct net_device *dev);
int dsa_slave_suspend(struct net_device *slave_dev);
int dsa_slave_resume(struct net_device *slave_dev);
int dsa_slave_register_notifier(void);
void dsa_slave_unregister_notifier(void);
-void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev);
-
static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev)
{
struct dsa_slave_priv *p = netdev_priv(dev);
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 3255dfc97f86..bd44bde272f4 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -197,6 +197,35 @@ static int dsa_master_get_phys_port_name(struct net_device *dev,
return 0;
}
+static int dsa_master_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct dsa_port *cpu_dp = dev->dsa_ptr;
+ struct dsa_switch *ds = cpu_dp->ds;
+ struct dsa_switch_tree *dst;
+ int err = -EOPNOTSUPP;
+ struct dsa_port *dp;
+
+ dst = ds->dst;
+
+ switch (cmd) {
+ case SIOCGHWTSTAMP:
+ case SIOCSHWTSTAMP:
+ /* Deny PTP operations on master if there is at least one
+ * switch in the tree that is PTP capable.
+ */
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dp->ds->ops->port_hwtstamp_get ||
+ dp->ds->ops->port_hwtstamp_set)
+ return -EBUSY;
+ break;
+ }
+
+ if (cpu_dp->orig_ndo_ops && cpu_dp->orig_ndo_ops->ndo_do_ioctl)
+ err = cpu_dp->orig_ndo_ops->ndo_do_ioctl(dev, ifr, cmd);
+
+ return err;
+}
+
static int dsa_master_ethtool_setup(struct net_device *dev)
{
struct dsa_port *cpu_dp = dev->dsa_ptr;
@@ -249,6 +278,7 @@ static int dsa_master_ndo_setup(struct net_device *dev)
memcpy(ops, cpu_dp->orig_ndo_ops, sizeof(*ops));
ops->ndo_get_phys_port_name = dsa_master_get_phys_port_name;
+ ops->ndo_do_ioctl = dsa_master_ioctl;
dev->netdev_ops = ops;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 46ac9ba21987..774facb8d547 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -415,9 +415,9 @@ static struct phy_device *dsa_port_get_phy_device(struct dsa_port *dp)
return phydev;
}
-void dsa_port_phylink_validate(struct phylink_config *config,
- unsigned long *supported,
- struct phylink_link_state *state)
+static void dsa_port_phylink_validate(struct phylink_config *config,
+ unsigned long *supported,
+ struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -427,10 +427,9 @@ void dsa_port_phylink_validate(struct phylink_config *config,
ds->ops->phylink_validate(ds, dp->index, supported, state);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_validate);
-void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
- struct phylink_link_state *state)
+static void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
+ struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -444,11 +443,10 @@ void dsa_port_phylink_mac_pcs_get_state(struct phylink_config *config,
if (ds->ops->phylink_mac_link_state(ds, dp->index, state) < 0)
state->link = 0;
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_pcs_get_state);
-void dsa_port_phylink_mac_config(struct phylink_config *config,
- unsigned int mode,
- const struct phylink_link_state *state)
+static void dsa_port_phylink_mac_config(struct phylink_config *config,
+ unsigned int mode,
+ const struct phylink_link_state *state)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -458,9 +456,8 @@ void dsa_port_phylink_mac_config(struct phylink_config *config,
ds->ops->phylink_mac_config(ds, dp->index, mode, state);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_config);
-void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
+static void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -470,11 +467,10 @@ void dsa_port_phylink_mac_an_restart(struct phylink_config *config)
ds->ops->phylink_mac_an_restart(ds, dp->index);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_an_restart);
-void dsa_port_phylink_mac_link_down(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface)
+static void dsa_port_phylink_mac_link_down(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct phy_device *phydev = NULL;
@@ -491,12 +487,11 @@ void dsa_port_phylink_mac_link_down(struct phylink_config *config,
ds->ops->phylink_mac_link_down(ds, dp->index, mode, interface);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_down);
-void dsa_port_phylink_mac_link_up(struct phylink_config *config,
- unsigned int mode,
- phy_interface_t interface,
- struct phy_device *phydev)
+static void dsa_port_phylink_mac_link_up(struct phylink_config *config,
+ unsigned int mode,
+ phy_interface_t interface,
+ struct phy_device *phydev)
{
struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
struct dsa_switch *ds = dp->ds;
@@ -509,7 +504,6 @@ void dsa_port_phylink_mac_link_up(struct phylink_config *config,
ds->ops->phylink_mac_link_up(ds, dp->index, mode, interface, phydev);
}
-EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_link_up);
const struct phylink_mac_ops dsa_port_phylink_mac_ops = {
.validate = dsa_port_phylink_validate,
@@ -605,6 +599,7 @@ static int dsa_port_phylink_register(struct dsa_port *dp)
dp->pl_config.dev = ds->dev;
dp->pl_config.type = PHYLINK_DEV;
+ dp->pl_config.pcs_poll = ds->pcs_poll;
dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn),
mode, &dsa_port_phylink_mac_ops);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 78ffc87dc25e..088c886e609e 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -22,8 +22,6 @@
#include "dsa_priv.h"
-static bool dsa_slave_dev_check(const struct net_device *dev);
-
/* slave mii_bus handling ***************************************************/
static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
{
@@ -116,9 +114,6 @@ static int dsa_slave_close(struct net_device *dev)
struct net_device *master = dsa_slave_to_master(dev);
struct dsa_port *dp = dsa_slave_to_port(dev);
- cancel_work_sync(&dp->xmit_work);
- skb_queue_purge(&dp->xmit_queue);
-
phylink_stop(dp->pl);
dsa_port_disable(dp);
@@ -518,7 +513,6 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
s->tx_bytes += skb->len;
u64_stats_update_end(&s->syncp);
- DSA_SKB_CB(skb)->deferred_xmit = false;
DSA_SKB_CB(skb)->clone = NULL;
/* Identify PTP protocol packets, clone them, and pass them to the
@@ -531,39 +525,13 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
*/
nskb = p->xmit(skb, dev);
if (!nskb) {
- if (!DSA_SKB_CB(skb)->deferred_xmit)
- kfree_skb(skb);
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
return dsa_enqueue_skb(nskb, dev);
}
-void *dsa_defer_xmit(struct sk_buff *skb, struct net_device *dev)
-{
- struct dsa_port *dp = dsa_slave_to_port(dev);
-
- DSA_SKB_CB(skb)->deferred_xmit = true;
-
- skb_queue_tail(&dp->xmit_queue, skb);
- schedule_work(&dp->xmit_work);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(dsa_defer_xmit);
-
-static void dsa_port_xmit_work(struct work_struct *work)
-{
- struct dsa_port *dp = container_of(work, struct dsa_port, xmit_work);
- struct dsa_switch *ds = dp->ds;
- struct sk_buff *skb;
-
- if (unlikely(!ds->ops->port_deferred_xmit))
- return;
-
- while ((skb = skb_dequeue(&dp->xmit_queue)) != NULL)
- ds->ops->port_deferred_xmit(ds, dp->index, skb);
-}
-
/* ethtool operations *******************************************************/
static void dsa_slave_get_drvinfo(struct net_device *dev,
@@ -1367,9 +1335,6 @@ int dsa_slave_suspend(struct net_device *slave_dev)
if (!netif_running(slave_dev))
return 0;
- cancel_work_sync(&dp->xmit_work);
- skb_queue_purge(&dp->xmit_queue);
-
netif_device_detach(slave_dev);
rtnl_lock();
@@ -1455,8 +1420,6 @@ int dsa_slave_create(struct dsa_port *port)
}
p->dp = port;
INIT_LIST_HEAD(&p->mall_tc_list);
- INIT_WORK(&port->xmit_work, dsa_port_xmit_work);
- skb_queue_head_init(&port->xmit_queue);
p->xmit = cpu_dp->tag_ops->xmit;
port->slave = slave_dev;
@@ -1508,7 +1471,7 @@ void dsa_slave_destroy(struct net_device *slave_dev)
free_netdev(slave_dev);
}
-static bool dsa_slave_dev_check(const struct net_device *dev)
+bool dsa_slave_dev_check(const struct net_device *dev)
{
return dev->netdev_ops == &dsa_slave_netdev_ops;
}
diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c
new file mode 100644
index 000000000000..466ffa92a474
--- /dev/null
+++ b/net/dsa/tag_ar9331.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Pengutronix, Oleksij Rempel <[email protected]>
+ */
+
+
+#include <linux/bitfield.h>
+#include <linux/etherdevice.h>
+
+#include "dsa_priv.h"
+
+#define AR9331_HDR_LEN 2
+#define AR9331_HDR_VERSION 1
+
+#define AR9331_HDR_VERSION_MASK GENMASK(15, 14)
+#define AR9331_HDR_PRIORITY_MASK GENMASK(13, 12)
+#define AR9331_HDR_TYPE_MASK GENMASK(10, 8)
+#define AR9331_HDR_BROADCAST BIT(7)
+#define AR9331_HDR_FROM_CPU BIT(6)
+/* AR9331_HDR_RESERVED - not used or may be version field.
+ * According to the AR8216 doc it should 0b10. On AR9331 it is 0b11 on RX path
+ * and should be set to 0b11 to make it work.
+ */
+#define AR9331_HDR_RESERVED_MASK GENMASK(5, 4)
+#define AR9331_HDR_PORT_NUM_MASK GENMASK(3, 0)
+
+static struct sk_buff *ar9331_tag_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ __le16 *phdr;
+ u16 hdr;
+
+ if (skb_cow_head(skb, 0) < 0)
+ return NULL;
+
+ phdr = skb_push(skb, AR9331_HDR_LEN);
+
+ hdr = FIELD_PREP(AR9331_HDR_VERSION_MASK, AR9331_HDR_VERSION);
+ hdr |= AR9331_HDR_FROM_CPU | dp->index;
+ /* 0b10 for AR8216 and 0b11 for AR9331 */
+ hdr |= AR9331_HDR_RESERVED_MASK;
+
+ phdr[0] = cpu_to_le16(hdr);
+
+ return skb;
+}
+
+static struct sk_buff *ar9331_tag_rcv(struct sk_buff *skb,
+ struct net_device *ndev,
+ struct packet_type *pt)
+{
+ u8 ver, port;
+ u16 hdr;
+
+ if (unlikely(!pskb_may_pull(skb, AR9331_HDR_LEN)))
+ return NULL;
+
+ hdr = le16_to_cpu(*(__le16 *)skb_mac_header(skb));
+
+ ver = FIELD_GET(AR9331_HDR_VERSION_MASK, hdr);
+ if (unlikely(ver != AR9331_HDR_VERSION)) {
+ netdev_warn_once(ndev, "%s:%i wrong header version 0x%2x\n",
+ __func__, __LINE__, hdr);
+ return NULL;
+ }
+
+ if (unlikely(hdr & AR9331_HDR_FROM_CPU)) {
+ netdev_warn_once(ndev, "%s:%i packet should not be from cpu 0x%2x\n",
+ __func__, __LINE__, hdr);
+ return NULL;
+ }
+
+ skb_pull_rcsum(skb, AR9331_HDR_LEN);
+
+ /* Get source port information */
+ port = FIELD_GET(AR9331_HDR_PORT_NUM_MASK, hdr);
+
+ skb->dev = dsa_master_find_slave(ndev, 0, port);
+ if (!skb->dev)
+ return NULL;
+
+ return skb;
+}
+
+static const struct dsa_device_ops ar9331_netdev_ops = {
+ .name = "ar9331",
+ .proto = DSA_TAG_PROTO_AR9331,
+ .xmit = ar9331_tag_xmit,
+ .rcv = ar9331_tag_rcv,
+ .overhead = AR9331_HDR_LEN,
+};
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_AR9331);
+module_dsa_tag_driver(ar9331_netdev_ops);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 73605bcbb385..90d055c4df9e 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -84,8 +84,6 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
* (eg, 0x00=port1, 0x02=port3, 0x06=port7)
*/
-#define KSZ8795_INGRESS_TAG_LEN 1
-
#define KSZ8795_TAIL_TAG_OVERRIDE BIT(6)
#define KSZ8795_TAIL_TAG_LOOKUP BIT(7)
@@ -96,12 +94,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev)
u8 *tag;
u8 *addr;
- nskb = ksz_common_xmit(skb, dev, KSZ8795_INGRESS_TAG_LEN);
+ nskb = ksz_common_xmit(skb, dev, KSZ_INGRESS_TAG_LEN);
if (!nskb)
return NULL;
/* Tag encoding */
- tag = skb_put(nskb, KSZ8795_INGRESS_TAG_LEN);
+ tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(nskb);
*tag = 1 << dp->index;
@@ -124,7 +122,7 @@ static const struct dsa_device_ops ksz8795_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ8795,
.xmit = ksz8795_xmit,
.rcv = ksz8795_rcv,
- .overhead = KSZ8795_INGRESS_TAG_LEN,
+ .overhead = KSZ_INGRESS_TAG_LEN,
};
DSA_TAG_DRIVER(ksz8795_netdev_ops);
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 63ef2a14c934..5366ea430349 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -83,12 +83,24 @@ static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
return false;
}
+/* Calls sja1105_port_deferred_xmit in sja1105_main.c */
+static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp,
+ struct sk_buff *skb)
+{
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ skb_queue_tail(&sp->xmit_queue, skb_get(skb));
+ kthread_queue_work(sp->xmit_worker, &sp->xmit_work);
+
+ return NULL;
+}
+
static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
- struct dsa_switch *ds = dp->ds;
- u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index);
+ u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
u16 queue_mapping = skb_get_queue_mapping(skb);
u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
@@ -97,7 +109,7 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
* is the .port_deferred_xmit driver callback.
*/
if (unlikely(sja1105_is_link_local(skb)))
- return dsa_defer_xmit(skb, netdev);
+ return sja1105_defer_xmit(dp->priv, skb);
/* If we are under a vlan_filtering bridge, IP termination on
* switch ports based on 802.1Q tags is simply too brittle to
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
new file mode 100644
index 000000000000..9a1332fb0cc6
--- /dev/null
+++ b/net/ethtool/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+obj-y += ioctl.o common.o
+
+obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
+
+ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
+ linkstate.o
diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c
new file mode 100644
index 000000000000..fce45dac4205
--- /dev/null
+++ b/net/ethtool/bitset.c
@@ -0,0 +1,735 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool_netlink.h>
+#include <linux/bitmap.h>
+#include "netlink.h"
+#include "bitset.h"
+
+/* Some bitmaps are internally represented as an array of unsigned long, some
+ * as an array of u32 (some even as single u32 for now). To avoid the need of
+ * wrappers on caller side, we provide two set of functions: those with "32"
+ * suffix in their names expect u32 based bitmaps, those without it expect
+ * unsigned long bitmaps.
+ */
+
+static u32 ethnl_lower_bits(unsigned int n)
+{
+ return ~(u32)0 >> (32 - n % 32);
+}
+
+static u32 ethnl_upper_bits(unsigned int n)
+{
+ return ~(u32)0 << (n % 32);
+}
+
+/**
+ * ethnl_bitmap32_clear() - Clear u32 based bitmap
+ * @dst: bitmap to clear
+ * @start: beginning of the interval
+ * @end: end of the interval
+ * @mod: set if bitmap was modified
+ *
+ * Clear @nbits bits of a bitmap with indices @start <= i < @end
+ */
+static void ethnl_bitmap32_clear(u32 *dst, unsigned int start, unsigned int end,
+ bool *mod)
+{
+ unsigned int start_word = start / 32;
+ unsigned int end_word = end / 32;
+ unsigned int i;
+ u32 mask;
+
+ if (end <= start)
+ return;
+
+ if (start % 32) {
+ mask = ethnl_upper_bits(start);
+ if (end_word == start_word) {
+ mask &= ethnl_lower_bits(end);
+ if (dst[start_word] & mask) {
+ dst[start_word] &= ~mask;
+ *mod = true;
+ }
+ return;
+ }
+ if (dst[start_word] & mask) {
+ dst[start_word] &= ~mask;
+ *mod = true;
+ }
+ start_word++;
+ }
+
+ for (i = start_word; i < end_word; i++) {
+ if (dst[i]) {
+ dst[i] = 0;
+ *mod = true;
+ }
+ }
+ if (end % 32) {
+ mask = ethnl_lower_bits(end);
+ if (dst[end_word] & mask) {
+ dst[end_word] &= ~mask;
+ *mod = true;
+ }
+ }
+}
+
+/**
+ * ethnl_bitmap32_not_zero() - Check if any bit is set in an interval
+ * @map: bitmap to test
+ * @start: beginning of the interval
+ * @end: end of the interval
+ *
+ * Return: true if there is non-zero bit with index @start <= i < @end,
+ * false if the whole interval is zero
+ */
+static bool ethnl_bitmap32_not_zero(const u32 *map, unsigned int start,
+ unsigned int end)
+{
+ unsigned int start_word = start / 32;
+ unsigned int end_word = end / 32;
+ u32 mask;
+
+ if (end <= start)
+ return true;
+
+ if (start % 32) {
+ mask = ethnl_upper_bits(start);
+ if (end_word == start_word) {
+ mask &= ethnl_lower_bits(end);
+ return map[start_word] & mask;
+ }
+ if (map[start_word] & mask)
+ return true;
+ start_word++;
+ }
+
+ if (!memchr_inv(map + start_word, '\0',
+ (end_word - start_word) * sizeof(u32)))
+ return true;
+ if (end % 32 == 0)
+ return true;
+ return map[end_word] & ethnl_lower_bits(end);
+}
+
+/**
+ * ethnl_bitmap32_update() - Modify u32 based bitmap according to value/mask
+ * pair
+ * @dst: bitmap to update
+ * @nbits: bit size of the bitmap
+ * @value: values to set
+ * @mask: mask of bits to set
+ * @mod: set to true if bitmap is modified, preserve if not
+ *
+ * Set bits in @dst bitmap which are set in @mask to values from @value, leave
+ * the rest untouched. If destination bitmap was modified, set @mod to true,
+ * leave as it is if not.
+ */
+static void ethnl_bitmap32_update(u32 *dst, unsigned int nbits,
+ const u32 *value, const u32 *mask, bool *mod)
+{
+ while (nbits > 0) {
+ u32 real_mask = mask ? *mask : ~(u32)0;
+ u32 new_value;
+
+ if (nbits < 32)
+ real_mask &= ethnl_lower_bits(nbits);
+ new_value = (*dst & ~real_mask) | (*value & real_mask);
+ if (new_value != *dst) {
+ *dst = new_value;
+ *mod = true;
+ }
+
+ if (nbits <= 32)
+ break;
+ dst++;
+ nbits -= 32;
+ value++;
+ if (mask)
+ mask++;
+ }
+}
+
+static bool ethnl_bitmap32_test_bit(const u32 *map, unsigned int index)
+{
+ return map[index / 32] & (1U << (index % 32));
+}
+
+/**
+ * ethnl_bitset32_size() - Calculate size of bitset nested attribute
+ * @val: value bitmap (u32 based)
+ * @mask: mask bitmap (u32 based, optional)
+ * @nbits: bit length of the bitset
+ * @names: array of bit names (optional)
+ * @compact: assume compact format for output
+ *
+ * Estimate length of netlink attribute composed by a later call to
+ * ethnl_put_bitset32() call with the same arguments.
+ *
+ * Return: negative error code or attribute length estimate
+ */
+int ethnl_bitset32_size(const u32 *val, const u32 *mask, unsigned int nbits,
+ ethnl_string_array_t names, bool compact)
+{
+ unsigned int len = 0;
+
+ /* list flag */
+ if (!mask)
+ len += nla_total_size(sizeof(u32));
+ /* size */
+ len += nla_total_size(sizeof(u32));
+
+ if (compact) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+
+ /* value, mask */
+ len += (mask ? 2 : 1) * nla_total_size(nwords * sizeof(u32));
+ } else {
+ unsigned int bits_len = 0;
+ unsigned int bit_len, i;
+
+ for (i = 0; i < nbits; i++) {
+ const char *name = names ? names[i] : NULL;
+
+ if (!ethnl_bitmap32_test_bit(mask ?: val, i))
+ continue;
+ /* index */
+ bit_len = nla_total_size(sizeof(u32));
+ /* name */
+ if (name)
+ bit_len += ethnl_strz_size(name);
+ /* value */
+ if (mask && ethnl_bitmap32_test_bit(val, i))
+ bit_len += nla_total_size(0);
+
+ /* bit nest */
+ bits_len += nla_total_size(bit_len);
+ }
+ /* bits nest */
+ len += nla_total_size(bits_len);
+ }
+
+ /* outermost nest */
+ return nla_total_size(len);
+}
+
+/**
+ * ethnl_put_bitset32() - Put a bitset nest into a message
+ * @skb: skb with the message
+ * @attrtype: attribute type for the bitset nest
+ * @val: value bitmap (u32 based)
+ * @mask: mask bitmap (u32 based, optional)
+ * @nbits: bit length of the bitset
+ * @names: array of bit names (optional)
+ * @compact: use compact format for the output
+ *
+ * Compose a nested attribute representing a bitset. If @mask is null, simple
+ * bitmap (bit list) is created, if @mask is provided, represent a value/mask
+ * pair. Bit names are only used in verbose mode and when provided by calller.
+ *
+ * Return: 0 on success, negative error value on error
+ */
+int ethnl_put_bitset32(struct sk_buff *skb, int attrtype, const u32 *val,
+ const u32 *mask, unsigned int nbits,
+ ethnl_string_array_t names, bool compact)
+{
+ struct nlattr *nest;
+ struct nlattr *attr;
+
+ nest = nla_nest_start(skb, attrtype);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (!mask && nla_put_flag(skb, ETHTOOL_A_BITSET_NOMASK))
+ goto nla_put_failure;
+ if (nla_put_u32(skb, ETHTOOL_A_BITSET_SIZE, nbits))
+ goto nla_put_failure;
+ if (compact) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+ unsigned int nbytes = nwords * sizeof(u32);
+ u32 *dst;
+
+ attr = nla_reserve(skb, ETHTOOL_A_BITSET_VALUE, nbytes);
+ if (!attr)
+ goto nla_put_failure;
+ dst = nla_data(attr);
+ memcpy(dst, val, nbytes);
+ if (nbits % 32)
+ dst[nwords - 1] &= ethnl_lower_bits(nbits);
+
+ if (mask) {
+ attr = nla_reserve(skb, ETHTOOL_A_BITSET_MASK, nbytes);
+ if (!attr)
+ goto nla_put_failure;
+ dst = nla_data(attr);
+ memcpy(dst, mask, nbytes);
+ if (nbits % 32)
+ dst[nwords - 1] &= ethnl_lower_bits(nbits);
+ }
+ } else {
+ struct nlattr *bits;
+ unsigned int i;
+
+ bits = nla_nest_start(skb, ETHTOOL_A_BITSET_BITS);
+ if (!bits)
+ goto nla_put_failure;
+ for (i = 0; i < nbits; i++) {
+ const char *name = names ? names[i] : NULL;
+
+ if (!ethnl_bitmap32_test_bit(mask ?: val, i))
+ continue;
+ attr = nla_nest_start(skb, ETHTOOL_A_BITSET_BITS_BIT);
+ if (!attr)
+ goto nla_put_failure;
+ if (nla_put_u32(skb, ETHTOOL_A_BITSET_BIT_INDEX, i))
+ goto nla_put_failure;
+ if (name &&
+ ethnl_put_strz(skb, ETHTOOL_A_BITSET_BIT_NAME, name))
+ goto nla_put_failure;
+ if (mask && ethnl_bitmap32_test_bit(val, i) &&
+ nla_put_flag(skb, ETHTOOL_A_BITSET_BIT_VALUE))
+ goto nla_put_failure;
+ nla_nest_end(skb, attr);
+ }
+ nla_nest_end(skb, bits);
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy bitset_policy[ETHTOOL_A_BITSET_MAX + 1] = {
+ [ETHTOOL_A_BITSET_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_BITSET_NOMASK] = { .type = NLA_FLAG },
+ [ETHTOOL_A_BITSET_SIZE] = { .type = NLA_U32 },
+ [ETHTOOL_A_BITSET_BITS] = { .type = NLA_NESTED },
+ [ETHTOOL_A_BITSET_VALUE] = { .type = NLA_BINARY },
+ [ETHTOOL_A_BITSET_MASK] = { .type = NLA_BINARY },
+};
+
+static const struct nla_policy bit_policy[ETHTOOL_A_BITSET_BIT_MAX + 1] = {
+ [ETHTOOL_A_BITSET_BIT_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_BITSET_BIT_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_BITSET_BIT_NAME] = { .type = NLA_NUL_STRING },
+ [ETHTOOL_A_BITSET_BIT_VALUE] = { .type = NLA_FLAG },
+};
+
+/**
+ * ethnl_bitset_is_compact() - check if bitset attribute represents a compact
+ * bitset
+ * @bitset: nested attribute representing a bitset
+ * @compact: pointer for return value
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact)
+{
+ struct nlattr *tb[ETHTOOL_A_BITSET_MAX + 1];
+ int ret;
+
+ ret = nla_parse_nested(tb, ETHTOOL_A_BITSET_MAX, bitset,
+ bitset_policy, NULL);
+ if (ret < 0)
+ return ret;
+
+ if (tb[ETHTOOL_A_BITSET_BITS]) {
+ if (tb[ETHTOOL_A_BITSET_VALUE] || tb[ETHTOOL_A_BITSET_MASK])
+ return -EINVAL;
+ *compact = false;
+ return 0;
+ }
+ if (!tb[ETHTOOL_A_BITSET_SIZE] || !tb[ETHTOOL_A_BITSET_VALUE])
+ return -EINVAL;
+
+ *compact = true;
+ return 0;
+}
+
+/**
+ * ethnl_name_to_idx() - look up string index for a name
+ * @names: array of ETH_GSTRING_LEN sized strings
+ * @n_names: number of strings in the array
+ * @name: name to look up
+ *
+ * Return: index of the string if found, -ENOENT if not found
+ */
+static int ethnl_name_to_idx(ethnl_string_array_t names, unsigned int n_names,
+ const char *name)
+{
+ unsigned int i;
+
+ if (!names)
+ return -ENOENT;
+
+ for (i = 0; i < n_names; i++) {
+ /* names[i] may not be null terminated */
+ if (!strncmp(names[i], name, ETH_GSTRING_LEN) &&
+ strlen(name) <= ETH_GSTRING_LEN)
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+static int ethnl_parse_bit(unsigned int *index, bool *val, unsigned int nbits,
+ const struct nlattr *bit_attr, bool no_mask,
+ ethnl_string_array_t names,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[ETHTOOL_A_BITSET_BIT_MAX + 1];
+ int ret, idx;
+
+ ret = nla_parse_nested(tb, ETHTOOL_A_BITSET_BIT_MAX, bit_attr,
+ bit_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ if (tb[ETHTOOL_A_BITSET_BIT_INDEX]) {
+ const char *name;
+
+ idx = nla_get_u32(tb[ETHTOOL_A_BITSET_BIT_INDEX]);
+ if (idx >= nbits) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[ETHTOOL_A_BITSET_BIT_INDEX],
+ "bit index too high");
+ return -EOPNOTSUPP;
+ }
+ name = names ? names[idx] : NULL;
+ if (tb[ETHTOOL_A_BITSET_BIT_NAME] && name &&
+ strncmp(nla_data(tb[ETHTOOL_A_BITSET_BIT_NAME]), name,
+ nla_len(tb[ETHTOOL_A_BITSET_BIT_NAME]))) {
+ NL_SET_ERR_MSG_ATTR(extack, bit_attr,
+ "bit index and name mismatch");
+ return -EINVAL;
+ }
+ } else if (tb[ETHTOOL_A_BITSET_BIT_NAME]) {
+ idx = ethnl_name_to_idx(names, nbits,
+ nla_data(tb[ETHTOOL_A_BITSET_BIT_NAME]));
+ if (idx < 0) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[ETHTOOL_A_BITSET_BIT_NAME],
+ "bit name not found");
+ return -EOPNOTSUPP;
+ }
+ } else {
+ NL_SET_ERR_MSG_ATTR(extack, bit_attr,
+ "neither bit index nor name specified");
+ return -EINVAL;
+ }
+
+ *index = idx;
+ *val = no_mask || tb[ETHTOOL_A_BITSET_BIT_VALUE];
+ return 0;
+}
+
+static int
+ethnl_update_bitset32_verbose(u32 *bitmap, unsigned int nbits,
+ const struct nlattr *attr, struct nlattr **tb,
+ ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod)
+{
+ struct nlattr *bit_attr;
+ bool no_mask;
+ int rem;
+ int ret;
+
+ if (tb[ETHTOOL_A_BITSET_VALUE]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE],
+ "value only allowed in compact bitset");
+ return -EINVAL;
+ }
+ if (tb[ETHTOOL_A_BITSET_MASK]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK],
+ "mask only allowed in compact bitset");
+ return -EINVAL;
+ }
+ no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+
+ nla_for_each_nested(bit_attr, tb[ETHTOOL_A_BITSET_BITS], rem) {
+ bool old_val, new_val;
+ unsigned int idx;
+
+ if (nla_type(bit_attr) != ETHTOOL_A_BITSET_BITS_BIT) {
+ NL_SET_ERR_MSG_ATTR(extack, bit_attr,
+ "only ETHTOOL_A_BITSET_BITS_BIT allowed in ETHTOOL_A_BITSET_BITS");
+ return -EINVAL;
+ }
+ ret = ethnl_parse_bit(&idx, &new_val, nbits, bit_attr, no_mask,
+ names, extack);
+ if (ret < 0)
+ return ret;
+ old_val = bitmap[idx / 32] & ((u32)1 << (idx % 32));
+ if (new_val != old_val) {
+ if (new_val)
+ bitmap[idx / 32] |= ((u32)1 << (idx % 32));
+ else
+ bitmap[idx / 32] &= ~((u32)1 << (idx % 32));
+ *mod = true;
+ }
+ }
+
+ return 0;
+}
+
+static int ethnl_compact_sanity_checks(unsigned int nbits,
+ const struct nlattr *nest,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ bool no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ unsigned int attr_nbits, attr_nwords;
+ const struct nlattr *test_attr;
+
+ if (no_mask && tb[ETHTOOL_A_BITSET_MASK]) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK],
+ "mask not allowed in list bitset");
+ return -EINVAL;
+ }
+ if (!tb[ETHTOOL_A_BITSET_SIZE]) {
+ NL_SET_ERR_MSG_ATTR(extack, nest,
+ "missing size in compact bitset");
+ return -EINVAL;
+ }
+ if (!tb[ETHTOOL_A_BITSET_VALUE]) {
+ NL_SET_ERR_MSG_ATTR(extack, nest,
+ "missing value in compact bitset");
+ return -EINVAL;
+ }
+ if (!no_mask && !tb[ETHTOOL_A_BITSET_MASK]) {
+ NL_SET_ERR_MSG_ATTR(extack, nest,
+ "missing mask in compact nonlist bitset");
+ return -EINVAL;
+ }
+
+ attr_nbits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]);
+ attr_nwords = DIV_ROUND_UP(attr_nbits, 32);
+ if (nla_len(tb[ETHTOOL_A_BITSET_VALUE]) != attr_nwords * sizeof(u32)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_VALUE],
+ "bitset value length does not match size");
+ return -EINVAL;
+ }
+ if (tb[ETHTOOL_A_BITSET_MASK] &&
+ nla_len(tb[ETHTOOL_A_BITSET_MASK]) != attr_nwords * sizeof(u32)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_BITSET_MASK],
+ "bitset mask length does not match size");
+ return -EINVAL;
+ }
+ if (attr_nbits <= nbits)
+ return 0;
+
+ test_attr = no_mask ? tb[ETHTOOL_A_BITSET_VALUE] :
+ tb[ETHTOOL_A_BITSET_MASK];
+ if (ethnl_bitmap32_not_zero(nla_data(test_attr), nbits, attr_nbits)) {
+ NL_SET_ERR_MSG_ATTR(extack, test_attr,
+ "cannot modify bits past kernel bitset size");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/**
+ * ethnl_update_bitset32() - Apply a bitset nest to a u32 based bitmap
+ * @bitmap: bitmap to update
+ * @nbits: size of the updated bitmap in bits
+ * @attr: nest attribute to parse and apply
+ * @names: array of bit names; may be null for compact format
+ * @extack: extack for error reporting
+ * @mod: set this to true if bitmap is modified, leave as it is if not
+ *
+ * Apply bitset netsted attribute to a bitmap. If the attribute represents
+ * a bit list, @bitmap is set to its contents; otherwise, bits in mask are
+ * set to values from value. Bitmaps in the attribute may be longer than
+ * @nbits but the message must not request modifying any bits past @nbits.
+ *
+ * Return: negative error code on failure, 0 on success
+ */
+int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod)
+{
+ struct nlattr *tb[ETHTOOL_A_BITSET_MAX + 1];
+ unsigned int change_bits;
+ bool no_mask;
+ int ret;
+
+ if (!attr)
+ return 0;
+ ret = nla_parse_nested(tb, ETHTOOL_A_BITSET_MAX, attr, bitset_policy,
+ extack);
+ if (ret < 0)
+ return ret;
+
+ if (tb[ETHTOOL_A_BITSET_BITS])
+ return ethnl_update_bitset32_verbose(bitmap, nbits, attr, tb,
+ names, extack, mod);
+ ret = ethnl_compact_sanity_checks(nbits, attr, tb, extack);
+ if (ret < 0)
+ return ret;
+
+ no_mask = tb[ETHTOOL_A_BITSET_NOMASK];
+ change_bits = min_t(unsigned int,
+ nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]), nbits);
+ ethnl_bitmap32_update(bitmap, change_bits,
+ nla_data(tb[ETHTOOL_A_BITSET_VALUE]),
+ no_mask ? NULL :
+ nla_data(tb[ETHTOOL_A_BITSET_MASK]),
+ mod);
+ if (no_mask && change_bits < nbits)
+ ethnl_bitmap32_clear(bitmap, change_bits, nbits, mod);
+
+ return 0;
+}
+
+#if BITS_PER_LONG == 64 && defined(__BIG_ENDIAN)
+
+/* 64-bit big endian architectures are the only case when u32 based bitmaps
+ * and unsigned long based bitmaps have different memory layout so that we
+ * cannot simply cast the latter to the former and need actual wrappers
+ * converting the latter to the former.
+ *
+ * To reduce the number of slab allocations, the wrappers use fixed size local
+ * variables for bitmaps up to ETHNL_SMALL_BITMAP_BITS bits which is the
+ * majority of bitmaps used by ethtool.
+ */
+#define ETHNL_SMALL_BITMAP_BITS 128
+#define ETHNL_SMALL_BITMAP_WORDS DIV_ROUND_UP(ETHNL_SMALL_BITMAP_BITS, 32)
+
+int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact)
+{
+ u32 small_mask32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 small_val32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 *mask32;
+ u32 *val32;
+ int ret;
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+
+ val32 = kmalloc_array(2 * nwords, sizeof(u32), GFP_KERNEL);
+ if (!val32)
+ return -ENOMEM;
+ mask32 = val32 + nwords;
+ } else {
+ val32 = small_val32;
+ mask32 = small_mask32;
+ }
+
+ bitmap_to_arr32(val32, val, nbits);
+ if (mask)
+ bitmap_to_arr32(mask32, mask, nbits);
+ else
+ mask32 = NULL;
+ ret = ethnl_bitset32_size(val32, mask32, nbits, names, compact);
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS)
+ kfree(val32);
+
+ return ret;
+}
+
+int ethnl_put_bitset(struct sk_buff *skb, int attrtype,
+ const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact)
+{
+ u32 small_mask32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 small_val32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 *mask32;
+ u32 *val32;
+ int ret;
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS) {
+ unsigned int nwords = DIV_ROUND_UP(nbits, 32);
+
+ val32 = kmalloc_array(2 * nwords, sizeof(u32), GFP_KERNEL);
+ if (!val32)
+ return -ENOMEM;
+ mask32 = val32 + nwords;
+ } else {
+ val32 = small_val32;
+ mask32 = small_mask32;
+ }
+
+ bitmap_to_arr32(val32, val, nbits);
+ if (mask)
+ bitmap_to_arr32(mask32, mask, nbits);
+ else
+ mask32 = NULL;
+ ret = ethnl_put_bitset32(skb, attrtype, val32, mask32, nbits, names,
+ compact);
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS)
+ kfree(val32);
+
+ return ret;
+}
+
+int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod)
+{
+ u32 small_bitmap32[ETHNL_SMALL_BITMAP_WORDS];
+ u32 *bitmap32 = small_bitmap32;
+ bool u32_mod = false;
+ int ret;
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS) {
+ unsigned int dst_words = DIV_ROUND_UP(nbits, 32);
+
+ bitmap32 = kmalloc_array(dst_words, sizeof(u32), GFP_KERNEL);
+ if (!bitmap32)
+ return -ENOMEM;
+ }
+
+ bitmap_to_arr32(bitmap32, bitmap, nbits);
+ ret = ethnl_update_bitset32(bitmap32, nbits, attr, names, extack,
+ &u32_mod);
+ if (u32_mod) {
+ bitmap_from_arr32(bitmap, bitmap32, nbits);
+ *mod = true;
+ }
+
+ if (nbits > ETHNL_SMALL_BITMAP_BITS)
+ kfree(bitmap32);
+
+ return ret;
+}
+
+#else
+
+/* On little endian 64-bit and all 32-bit architectures, an unsigned long
+ * based bitmap can be interpreted as u32 based one using a simple cast.
+ */
+
+int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact)
+{
+ return ethnl_bitset32_size((const u32 *)val, (const u32 *)mask, nbits,
+ names, compact);
+}
+
+int ethnl_put_bitset(struct sk_buff *skb, int attrtype,
+ const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact)
+{
+ return ethnl_put_bitset32(skb, attrtype, (const u32 *)val,
+ (const u32 *)mask, nbits, names, compact);
+}
+
+int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod)
+{
+ return ethnl_update_bitset32((u32 *)bitmap, nbits, attr, names, extack,
+ mod);
+}
+
+#endif /* BITS_PER_LONG == 64 && defined(__BIG_ENDIAN) */
diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h
new file mode 100644
index 000000000000..b8247e34109d
--- /dev/null
+++ b/net/ethtool/bitset.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _NET_ETHTOOL_BITSET_H
+#define _NET_ETHTOOL_BITSET_H
+
+typedef const char (*const ethnl_string_array_t)[ETH_GSTRING_LEN];
+
+int ethnl_bitset_is_compact(const struct nlattr *bitset, bool *compact);
+int ethnl_bitset_size(const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact);
+int ethnl_bitset32_size(const u32 *val, const u32 *mask, unsigned int nbits,
+ ethnl_string_array_t names, bool compact);
+int ethnl_put_bitset(struct sk_buff *skb, int attrtype,
+ const unsigned long *val, const unsigned long *mask,
+ unsigned int nbits, ethnl_string_array_t names,
+ bool compact);
+int ethnl_put_bitset32(struct sk_buff *skb, int attrtype, const u32 *val,
+ const u32 *mask, unsigned int nbits,
+ ethnl_string_array_t names, bool compact);
+int ethnl_update_bitset(unsigned long *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod);
+int ethnl_update_bitset32(u32 *bitmap, unsigned int nbits,
+ const struct nlattr *attr, ethnl_string_array_t names,
+ struct netlink_ext_ack *extack, bool *mod);
+
+#endif /* _NET_ETHTOOL_BITSET_H */
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
new file mode 100644
index 000000000000..e621b1694d2f
--- /dev/null
+++ b/net/ethtool/common.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "common.h"
+
+const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
+ [NETIF_F_SG_BIT] = "tx-scatter-gather",
+ [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
+ [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
+ [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
+ [NETIF_F_HIGHDMA_BIT] = "highdma",
+ [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
+ [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
+
+ [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
+ [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
+ [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
+ [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
+ [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
+ [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
+ [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
+ [NETIF_F_LLTX_BIT] = "tx-lockless",
+ [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
+ [NETIF_F_GRO_BIT] = "rx-gro",
+ [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
+ [NETIF_F_LRO_BIT] = "rx-lro",
+
+ [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
+ [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
+ [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
+ [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
+ [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
+ [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
+ [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
+ [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation",
+ [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation",
+ [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation",
+ [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
+ [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
+ [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
+ [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
+ [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
+ [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
+
+ [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
+ [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
+ [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
+ [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
+ [NETIF_F_RXHASH_BIT] = "rx-hashing",
+ [NETIF_F_RXCSUM_BIT] = "rx-checksum",
+ [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
+ [NETIF_F_LOOPBACK_BIT] = "loopback",
+ [NETIF_F_RXFCS_BIT] = "rx-fcs",
+ [NETIF_F_RXALL_BIT] = "rx-all",
+ [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
+ [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
+ [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
+ [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
+ [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
+ [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
+ [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
+ [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
+};
+
+const char
+rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
+ [ETH_RSS_HASH_TOP_BIT] = "toeplitz",
+ [ETH_RSS_HASH_XOR_BIT] = "xor",
+ [ETH_RSS_HASH_CRC32_BIT] = "crc32",
+};
+
+const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
+ [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
+ [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
+};
+
+const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+ [ETHTOOL_ID_UNSPEC] = "Unspec",
+ [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
+ [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
+ [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down",
+};
+
+#define __LINK_MODE_NAME(speed, type, duplex) \
+ #speed "base" #type "/" #duplex
+#define __DEFINE_LINK_MODE_NAME(speed, type, duplex) \
+ [ETHTOOL_LINK_MODE(speed, type, duplex)] = \
+ __LINK_MODE_NAME(speed, type, duplex)
+#define __DEFINE_SPECIAL_MODE_NAME(_mode, _name) \
+ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = _name
+
+const char link_mode_names[][ETH_GSTRING_LEN] = {
+ __DEFINE_LINK_MODE_NAME(10, T, Half),
+ __DEFINE_LINK_MODE_NAME(10, T, Full),
+ __DEFINE_LINK_MODE_NAME(100, T, Half),
+ __DEFINE_LINK_MODE_NAME(100, T, Full),
+ __DEFINE_LINK_MODE_NAME(1000, T, Half),
+ __DEFINE_LINK_MODE_NAME(1000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Autoneg, "Autoneg"),
+ __DEFINE_SPECIAL_MODE_NAME(TP, "TP"),
+ __DEFINE_SPECIAL_MODE_NAME(AUI, "AUI"),
+ __DEFINE_SPECIAL_MODE_NAME(MII, "MII"),
+ __DEFINE_SPECIAL_MODE_NAME(FIBRE, "FIBRE"),
+ __DEFINE_SPECIAL_MODE_NAME(BNC, "BNC"),
+ __DEFINE_LINK_MODE_NAME(10000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Pause, "Pause"),
+ __DEFINE_SPECIAL_MODE_NAME(Asym_Pause, "Asym_Pause"),
+ __DEFINE_LINK_MODE_NAME(2500, X, Full),
+ __DEFINE_SPECIAL_MODE_NAME(Backplane, "Backplane"),
+ __DEFINE_LINK_MODE_NAME(1000, KX, Full),
+ __DEFINE_LINK_MODE_NAME(10000, KX4, Full),
+ __DEFINE_LINK_MODE_NAME(10000, KR, Full),
+ __DEFINE_SPECIAL_MODE_NAME(10000baseR_FEC, "10000baseR_FEC"),
+ __DEFINE_LINK_MODE_NAME(20000, MLD2, Full),
+ __DEFINE_LINK_MODE_NAME(20000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(40000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(40000, LR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(56000, LR4, Full),
+ __DEFINE_LINK_MODE_NAME(25000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(25000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(25000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(50000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(100000, LR4_ER4, Full),
+ __DEFINE_LINK_MODE_NAME(50000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(1000, X, Full),
+ __DEFINE_LINK_MODE_NAME(10000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, LR, Full),
+ __DEFINE_LINK_MODE_NAME(10000, LRM, Full),
+ __DEFINE_LINK_MODE_NAME(10000, ER, Full),
+ __DEFINE_LINK_MODE_NAME(2500, T, Full),
+ __DEFINE_LINK_MODE_NAME(5000, T, Full),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_NONE, "None"),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_RS, "RS"),
+ __DEFINE_SPECIAL_MODE_NAME(FEC_BASER, "BASER"),
+ __DEFINE_LINK_MODE_NAME(50000, KR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, SR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, CR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_NAME(50000, DR, Full),
+ __DEFINE_LINK_MODE_NAME(100000, KR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, SR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, CR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_NAME(100000, DR2, Full),
+ __DEFINE_LINK_MODE_NAME(200000, KR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, SR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, DR4, Full),
+ __DEFINE_LINK_MODE_NAME(200000, CR4, Full),
+ __DEFINE_LINK_MODE_NAME(100, T1, Full),
+ __DEFINE_LINK_MODE_NAME(1000, T1, Full),
+ __DEFINE_LINK_MODE_NAME(400000, KR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, SR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, LR8_ER8_FR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, DR8, Full),
+ __DEFINE_LINK_MODE_NAME(400000, CR8, Full),
+};
+static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+/* return false if legacy contained non-0 deprecated fields
+ * maxtxpkt/maxrxpkt. rest of ksettings always updated
+ */
+bool
+convert_legacy_settings_to_link_ksettings(
+ struct ethtool_link_ksettings *link_ksettings,
+ const struct ethtool_cmd *legacy_settings)
+{
+ bool retval = true;
+
+ memset(link_ksettings, 0, sizeof(*link_ksettings));
+
+ /* This is used to tell users that driver is still using these
+ * deprecated legacy fields, and they should not use
+ * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
+ */
+ if (legacy_settings->maxtxpkt ||
+ legacy_settings->maxrxpkt)
+ retval = false;
+
+ ethtool_convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.supported,
+ legacy_settings->supported);
+ ethtool_convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.advertising,
+ legacy_settings->advertising);
+ ethtool_convert_legacy_u32_to_link_mode(
+ link_ksettings->link_modes.lp_advertising,
+ legacy_settings->lp_advertising);
+ link_ksettings->base.speed
+ = ethtool_cmd_speed(legacy_settings);
+ link_ksettings->base.duplex
+ = legacy_settings->duplex;
+ link_ksettings->base.port
+ = legacy_settings->port;
+ link_ksettings->base.phy_address
+ = legacy_settings->phy_address;
+ link_ksettings->base.autoneg
+ = legacy_settings->autoneg;
+ link_ksettings->base.mdio_support
+ = legacy_settings->mdio_support;
+ link_ksettings->base.eth_tp_mdix
+ = legacy_settings->eth_tp_mdix;
+ link_ksettings->base.eth_tp_mdix_ctrl
+ = legacy_settings->eth_tp_mdix_ctrl;
+ return retval;
+}
+
+int __ethtool_get_link(struct net_device *dev)
+{
+ if (!dev->ethtool_ops->get_link)
+ return -EOPNOTSUPP;
+
+ return netif_running(dev) && dev->ethtool_ops->get_link(dev);
+}
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
new file mode 100644
index 000000000000..5c5f7dc90cd4
--- /dev/null
+++ b/net/ethtool/common.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ETHTOOL_COMMON_H
+#define _ETHTOOL_COMMON_H
+
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+
+/* compose link mode index from speed, type and duplex */
+#define ETHTOOL_LINK_MODE(speed, type, duplex) \
+ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT
+
+extern const char
+netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN];
+extern const char
+rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN];
+extern const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN];
+extern const char
+phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN];
+extern const char link_mode_names[][ETH_GSTRING_LEN];
+
+int __ethtool_get_link(struct net_device *dev);
+
+bool convert_legacy_settings_to_link_ksettings(
+ struct ethtool_link_ksettings *link_ksettings,
+ const struct ethtool_cmd *legacy_settings);
+
+#endif /* _ETHTOOL_COMMON_H */
diff --git a/net/core/ethtool.c b/net/ethtool/ioctl.c
index cd9bc67381b2..182bffbffa78 100644
--- a/net/core/ethtool.c
+++ b/net/ethtool/ioctl.c
@@ -26,6 +26,9 @@
#include <net/devlink.h>
#include <net/xdp_sock.h>
#include <net/flow_offload.h>
+#include <linux/ethtool_netlink.h>
+
+#include "common.h"
/*
* Some useful ethtool_ops methods that're device independent.
@@ -54,88 +57,6 @@ EXPORT_SYMBOL(ethtool_op_get_ts_info);
#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32)
-static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
- [NETIF_F_SG_BIT] = "tx-scatter-gather",
- [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4",
- [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic",
- [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6",
- [NETIF_F_HIGHDMA_BIT] = "highdma",
- [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist",
- [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert",
-
- [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse",
- [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter",
- [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert",
- [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse",
- [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter",
- [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged",
- [NETIF_F_GSO_BIT] = "tx-generic-segmentation",
- [NETIF_F_LLTX_BIT] = "tx-lockless",
- [NETIF_F_NETNS_LOCAL_BIT] = "netns-local",
- [NETIF_F_GRO_BIT] = "rx-gro",
- [NETIF_F_GRO_HW_BIT] = "rx-gro-hw",
- [NETIF_F_LRO_BIT] = "rx-lro",
-
- [NETIF_F_TSO_BIT] = "tx-tcp-segmentation",
- [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust",
- [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation",
- [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation",
- [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation",
- [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation",
- [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation",
- [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation",
- [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation",
- [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation",
- [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation",
- [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation",
- [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial",
- [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation",
- [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation",
- [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation",
-
- [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc",
- [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp",
- [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu",
- [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter",
- [NETIF_F_RXHASH_BIT] = "rx-hashing",
- [NETIF_F_RXCSUM_BIT] = "rx-checksum",
- [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy",
- [NETIF_F_LOOPBACK_BIT] = "loopback",
- [NETIF_F_RXFCS_BIT] = "rx-fcs",
- [NETIF_F_RXALL_BIT] = "rx-all",
- [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload",
- [NETIF_F_HW_TC_BIT] = "hw-tc-offload",
- [NETIF_F_HW_ESP_BIT] = "esp-hw-offload",
- [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload",
- [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload",
- [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record",
- [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload",
- [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload",
-};
-
-static const char
-rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
- [ETH_RSS_HASH_TOP_BIT] = "toeplitz",
- [ETH_RSS_HASH_XOR_BIT] = "xor",
- [ETH_RSS_HASH_CRC32_BIT] = "crc32",
-};
-
-static const char
-tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
- [ETHTOOL_ID_UNSPEC] = "Unspec",
- [ETHTOOL_RX_COPYBREAK] = "rx-copybreak",
- [ETHTOOL_TX_COPYBREAK] = "tx-copybreak",
- [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout",
-};
-
-static const char
-phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
- [ETHTOOL_ID_UNSPEC] = "Unspec",
- [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift",
- [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down",
- [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down",
-};
-
static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
{
struct ethtool_gfeatures cmd = {
@@ -234,6 +155,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
!ops->get_ethtool_phy_stats)
return phy_ethtool_get_sset_count(dev->phydev);
+ if (sset == ETH_SS_LINK_MODES)
+ return __ETHTOOL_LINK_MODE_MASK_NBITS;
+
if (ops->get_sset_count && ops->get_strings)
return ops->get_sset_count(dev, sset);
else
@@ -258,6 +182,9 @@ static void __ethtool_get_strings(struct net_device *dev,
else if (stringset == ETH_SS_PHY_STATS && dev->phydev &&
!ops->get_ethtool_phy_stats)
phy_ethtool_get_strings(dev->phydev, data);
+ else if (stringset == ETH_SS_LINK_MODES)
+ memcpy(data, link_mode_names,
+ __ETHTOOL_LINK_MODE_MASK_NBITS * ETH_GSTRING_LEN);
else
/* ops->get_strings is valid because checked earlier */
ops->get_strings(dev, stringset, data);
@@ -432,54 +359,6 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32,
}
EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32);
-/* return false if legacy contained non-0 deprecated fields
- * maxtxpkt/maxrxpkt. rest of ksettings always updated
- */
-static bool
-convert_legacy_settings_to_link_ksettings(
- struct ethtool_link_ksettings *link_ksettings,
- const struct ethtool_cmd *legacy_settings)
-{
- bool retval = true;
-
- memset(link_ksettings, 0, sizeof(*link_ksettings));
-
- /* This is used to tell users that driver is still using these
- * deprecated legacy fields, and they should not use
- * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS
- */
- if (legacy_settings->maxtxpkt ||
- legacy_settings->maxrxpkt)
- retval = false;
-
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.supported,
- legacy_settings->supported);
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.advertising,
- legacy_settings->advertising);
- ethtool_convert_legacy_u32_to_link_mode(
- link_ksettings->link_modes.lp_advertising,
- legacy_settings->lp_advertising);
- link_ksettings->base.speed
- = ethtool_cmd_speed(legacy_settings);
- link_ksettings->base.duplex
- = legacy_settings->duplex;
- link_ksettings->base.port
- = legacy_settings->port;
- link_ksettings->base.phy_address
- = legacy_settings->phy_address;
- link_ksettings->base.autoneg
- = legacy_settings->autoneg;
- link_ksettings->base.mdio_support
- = legacy_settings->mdio_support;
- link_ksettings->base.eth_tp_mdix
- = legacy_settings->eth_tp_mdix;
- link_ksettings->base.eth_tp_mdix_ctrl
- = legacy_settings->eth_tp_mdix_ctrl;
- return retval;
-}
-
/* return false if ksettings link modes had higher bits
* set. legacy_settings always updated (best effort)
*/
@@ -693,7 +572,12 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
!= link_ksettings.base.link_mode_masks_nwords)
return -EINVAL;
- return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+ err = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+ if (err >= 0) {
+ ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ }
+ return err;
}
/* Query device for its ethtool_cmd settings.
@@ -742,6 +626,7 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
{
struct ethtool_link_ksettings link_ksettings;
struct ethtool_cmd cmd;
+ int ret;
ASSERT_RTNL();
@@ -754,7 +639,12 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
return -EINVAL;
link_ksettings.base.link_mode_masks_nwords =
__ETHTOOL_LINK_MODE_MASK_NU32;
- return dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+ ret = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
+ if (ret >= 0) {
+ ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
+ ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ }
+ return ret;
}
static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
@@ -1475,12 +1365,12 @@ static int ethtool_nway_reset(struct net_device *dev)
static int ethtool_get_link(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata = { .cmd = ETHTOOL_GLINK };
+ int link = __ethtool_get_link(dev);
- if (!dev->ethtool_ops->get_link)
- return -EOPNOTSUPP;
-
- edata.data = netif_running(dev) && dev->ethtool_ops->get_link(dev);
+ if (link < 0)
+ return link;
+ edata.data = link;
if (copy_to_user(useraddr, &edata, sizeof(edata)))
return -EFAULT;
return 0;
@@ -2170,8 +2060,8 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
memset(&info, 0, sizeof(info));
info.cmd = ETHTOOL_GET_TS_INFO;
- if (phydev && phydev->drv && phydev->drv->ts_info) {
- err = phydev->drv->ts_info(phydev, &info);
+ if (phy_has_tsinfo(phydev)) {
+ err = phy_ts_info(phydev, &info);
} else if (ops->get_ts_info) {
err = ops->get_ts_info(dev, &info);
} else {
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
new file mode 100644
index 000000000000..5d16cb4e8693
--- /dev/null
+++ b/net/ethtool/linkinfo.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+
+struct linkinfo_req_info {
+ struct ethnl_req_info base;
+};
+
+struct linkinfo_reply_data {
+ struct ethnl_reply_data base;
+ struct ethtool_link_ksettings ksettings;
+ struct ethtool_link_settings *lsettings;
+};
+
+#define LINKINFO_REPDATA(__reply_base) \
+ container_of(__reply_base, struct linkinfo_reply_data, base)
+
+static const struct nla_policy
+linkinfo_get_policy[ETHTOOL_A_LINKINFO_MAX + 1] = {
+ [ETHTOOL_A_LINKINFO_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKINFO_PORT] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_PHYADDR] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_TP_MDIX] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_TRANSCEIVER] = { .type = NLA_REJECT },
+};
+
+static int linkinfo_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ data->lsettings = &data->ksettings.base;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
+ if (ret < 0 && info)
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ ethnl_ops_complete(dev);
+
+ return ret;
+}
+
+static int linkinfo_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ return nla_total_size(sizeof(u8)) /* LINKINFO_PORT */
+ + nla_total_size(sizeof(u8)) /* LINKINFO_PHYADDR */
+ + nla_total_size(sizeof(u8)) /* LINKINFO_TP_MDIX */
+ + nla_total_size(sizeof(u8)) /* LINKINFO_TP_MDIX_CTRL */
+ + nla_total_size(sizeof(u8)) /* LINKINFO_TRANSCEIVER */
+ + 0;
+}
+
+static int linkinfo_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct linkinfo_reply_data *data = LINKINFO_REPDATA(reply_base);
+
+ if (nla_put_u8(skb, ETHTOOL_A_LINKINFO_PORT, data->lsettings->port) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKINFO_PHYADDR,
+ data->lsettings->phy_address) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKINFO_TP_MDIX,
+ data->lsettings->eth_tp_mdix) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKINFO_TP_MDIX_CTRL,
+ data->lsettings->eth_tp_mdix_ctrl) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKINFO_TRANSCEIVER,
+ data->lsettings->transceiver))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_linkinfo_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKINFO_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKINFO_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKINFO_HEADER,
+ .max_attr = ETHTOOL_A_LINKINFO_MAX,
+ .req_info_size = sizeof(struct linkinfo_req_info),
+ .reply_data_size = sizeof(struct linkinfo_reply_data),
+ .request_policy = linkinfo_get_policy,
+
+ .prepare_data = linkinfo_prepare_data,
+ .reply_size = linkinfo_reply_size,
+ .fill_reply = linkinfo_fill_reply,
+};
+
+/* LINKINFO_SET */
+
+static const struct nla_policy
+linkinfo_set_policy[ETHTOOL_A_LINKINFO_MAX + 1] = {
+ [ETHTOOL_A_LINKINFO_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKINFO_PORT] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKINFO_PHYADDR] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKINFO_TP_MDIX] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKINFO_TRANSCEIVER] = { .type = NLA_REJECT },
+};
+
+int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ETHTOOL_A_LINKINFO_MAX + 1];
+ struct ethtool_link_ksettings ksettings = {};
+ struct ethtool_link_settings *lsettings;
+ struct ethnl_req_info req_info = {};
+ struct net_device *dev;
+ bool mod = false;
+ int ret;
+
+ ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
+ ETHTOOL_A_LINKINFO_MAX, linkinfo_set_policy,
+ info->extack);
+ if (ret < 0)
+ return ret;
+ ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_LINKINFO_HEADER],
+ genl_info_net(info), info->extack, true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+ if (!dev->ethtool_ops->get_link_ksettings ||
+ !dev->ethtool_ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = __ethtool_get_link_ksettings(dev, &ksettings);
+ if (ret < 0) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ goto out_ops;
+ }
+ lsettings = &ksettings.base;
+
+ ethnl_update_u8(&lsettings->port, tb[ETHTOOL_A_LINKINFO_PORT], &mod);
+ ethnl_update_u8(&lsettings->phy_address, tb[ETHTOOL_A_LINKINFO_PHYADDR],
+ &mod);
+ ethnl_update_u8(&lsettings->eth_tp_mdix_ctrl,
+ tb[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL], &mod);
+ ret = 0;
+ if (!mod)
+ goto out_ops;
+
+ ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
+ if (ret < 0)
+ GENL_SET_ERR_MSG(info, "link settings update failed");
+ else
+ ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
+
+out_ops:
+ ethnl_ops_complete(dev);
+out_rtnl:
+ rtnl_unlock();
+ dev_put(dev);
+ return ret;
+}
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
new file mode 100644
index 000000000000..96f20be64553
--- /dev/null
+++ b/net/ethtool/linkmodes.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+#include "bitset.h"
+
+struct linkmodes_req_info {
+ struct ethnl_req_info base;
+};
+
+struct linkmodes_reply_data {
+ struct ethnl_reply_data base;
+ struct ethtool_link_ksettings ksettings;
+ struct ethtool_link_settings *lsettings;
+ bool peer_empty;
+};
+
+#define LINKMODES_REPDATA(__reply_base) \
+ container_of(__reply_base, struct linkmodes_reply_data, base)
+
+static const struct nla_policy
+linkmodes_get_policy[ETHTOOL_A_LINKMODES_MAX + 1] = {
+ [ETHTOOL_A_LINKMODES_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKMODES_AUTONEG] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_OURS] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_PEER] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_REJECT },
+};
+
+static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ data->lsettings = &data->ksettings.base;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = __ethtool_get_link_ksettings(dev, &data->ksettings);
+ if (ret < 0 && info) {
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ goto out;
+ }
+
+ data->peer_empty =
+ bitmap_empty(data->ksettings.link_modes.lp_advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+out:
+ ethnl_ops_complete(dev);
+ return ret;
+}
+
+static int linkmodes_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
+ const struct ethtool_link_ksettings *ksettings = &data->ksettings;
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+ int len, ret;
+
+ len = nla_total_size(sizeof(u8)) /* LINKMODES_AUTONEG */
+ + nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */
+ + nla_total_size(sizeof(u8)) /* LINKMODES_DUPLEX */
+ + 0;
+ ret = ethnl_bitset_size(ksettings->link_modes.advertising,
+ ksettings->link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
+ if (ret < 0)
+ return ret;
+ len += ret;
+ if (!data->peer_empty) {
+ ret = ethnl_bitset_size(ksettings->link_modes.lp_advertising,
+ NULL, __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
+ if (ret < 0)
+ return ret;
+ len += ret;
+ }
+
+ return len;
+}
+
+static int linkmodes_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
+ const struct ethtool_link_ksettings *ksettings = &data->ksettings;
+ const struct ethtool_link_settings *lsettings = &ksettings->base;
+ bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
+ int ret;
+
+ if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_AUTONEG, lsettings->autoneg))
+ return -EMSGSIZE;
+
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_LINKMODES_OURS,
+ ksettings->link_modes.advertising,
+ ksettings->link_modes.supported,
+ __ETHTOOL_LINK_MODE_MASK_NBITS, link_mode_names,
+ compact);
+ if (ret < 0)
+ return -EMSGSIZE;
+ if (!data->peer_empty) {
+ ret = ethnl_put_bitset(skb, ETHTOOL_A_LINKMODES_PEER,
+ ksettings->link_modes.lp_advertising,
+ NULL, __ETHTOOL_LINK_MODE_MASK_NBITS,
+ link_mode_names, compact);
+ if (ret < 0)
+ return -EMSGSIZE;
+ }
+
+ if (nla_put_u32(skb, ETHTOOL_A_LINKMODES_SPEED, lsettings->speed) ||
+ nla_put_u8(skb, ETHTOOL_A_LINKMODES_DUPLEX, lsettings->duplex))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_linkmodes_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKMODES_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKMODES_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKMODES_HEADER,
+ .max_attr = ETHTOOL_A_LINKMODES_MAX,
+ .req_info_size = sizeof(struct linkmodes_req_info),
+ .reply_data_size = sizeof(struct linkmodes_reply_data),
+ .request_policy = linkmodes_get_policy,
+
+ .prepare_data = linkmodes_prepare_data,
+ .reply_size = linkmodes_reply_size,
+ .fill_reply = linkmodes_fill_reply,
+};
+
+/* LINKMODES_SET */
+
+struct link_mode_info {
+ int speed;
+ u8 duplex;
+};
+
+#define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \
+ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \
+ .speed = SPEED_ ## _speed, \
+ .duplex = __DUPLEX_ ## _duplex \
+ }
+#define __DUPLEX_Half DUPLEX_HALF
+#define __DUPLEX_Full DUPLEX_FULL
+#define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \
+ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \
+ .speed = SPEED_UNKNOWN, \
+ .duplex = DUPLEX_UNKNOWN, \
+ }
+
+static const struct link_mode_info link_mode_params[] = {
+ __DEFINE_LINK_MODE_PARAMS(10, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(10, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(100, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(100, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, T, Half),
+ __DEFINE_LINK_MODE_PARAMS(1000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Autoneg),
+ __DEFINE_SPECIAL_MODE_PARAMS(TP),
+ __DEFINE_SPECIAL_MODE_PARAMS(AUI),
+ __DEFINE_SPECIAL_MODE_PARAMS(MII),
+ __DEFINE_SPECIAL_MODE_PARAMS(FIBRE),
+ __DEFINE_SPECIAL_MODE_PARAMS(BNC),
+ __DEFINE_LINK_MODE_PARAMS(10000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Pause),
+ __DEFINE_SPECIAL_MODE_PARAMS(Asym_Pause),
+ __DEFINE_LINK_MODE_PARAMS(2500, X, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(Backplane),
+ __DEFINE_LINK_MODE_PARAMS(1000, KX, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, KX4, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, KR, Full),
+ [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = {
+ .speed = SPEED_10000,
+ .duplex = DUPLEX_FULL,
+ },
+ __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full),
+ __DEFINE_LINK_MODE_PARAMS(20000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(40000, LR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(56000, LR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(25000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, LR4_ER4, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, X, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, LR, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, LRM, Full),
+ __DEFINE_LINK_MODE_PARAMS(10000, ER, Full),
+ __DEFINE_LINK_MODE_PARAMS(2500, T, Full),
+ __DEFINE_LINK_MODE_PARAMS(5000, T, Full),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_NONE),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_RS),
+ __DEFINE_SPECIAL_MODE_PARAMS(FEC_BASER),
+ __DEFINE_LINK_MODE_PARAMS(50000, KR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, SR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, CR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, LR_ER_FR, Full),
+ __DEFINE_LINK_MODE_PARAMS(50000, DR, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, KR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, SR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, CR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, LR2_ER2_FR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(100000, DR2, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, KR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, SR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, LR4_ER4_FR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, DR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(200000, CR4, Full),
+ __DEFINE_LINK_MODE_PARAMS(100, T1, Full),
+ __DEFINE_LINK_MODE_PARAMS(1000, T1, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, KR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, SR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full),
+};
+
+static const struct nla_policy
+linkmodes_set_policy[ETHTOOL_A_LINKMODES_MAX + 1] = {
+ [ETHTOOL_A_LINKMODES_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKMODES_AUTONEG] = { .type = NLA_U8 },
+ [ETHTOOL_A_LINKMODES_OURS] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKMODES_PEER] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKMODES_SPEED] = { .type = NLA_U32 },
+ [ETHTOOL_A_LINKMODES_DUPLEX] = { .type = NLA_U8 },
+};
+
+/* Set advertised link modes to all supported modes matching requested speed
+ * and duplex values. Called when autonegotiation is on, speed or duplex is
+ * requested but no link mode change. This is done in userspace with ioctl()
+ * interface, move it into kernel for netlink.
+ * Returns true if advertised modes bitmap was modified.
+ */
+static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings,
+ bool req_speed, bool req_duplex)
+{
+ unsigned long *advertising = ksettings->link_modes.advertising;
+ unsigned long *supported = ksettings->link_modes.supported;
+ DECLARE_BITMAP(old_adv, __ETHTOOL_LINK_MODE_MASK_NBITS);
+ unsigned int i;
+
+ BUILD_BUG_ON(ARRAY_SIZE(link_mode_params) !=
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ bitmap_copy(old_adv, advertising, __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+ for (i = 0; i < __ETHTOOL_LINK_MODE_MASK_NBITS; i++) {
+ const struct link_mode_info *info = &link_mode_params[i];
+
+ if (info->speed == SPEED_UNKNOWN)
+ continue;
+ if (test_bit(i, supported) &&
+ (!req_speed || info->speed == ksettings->base.speed) &&
+ (!req_duplex || info->duplex == ksettings->base.duplex))
+ set_bit(i, advertising);
+ else
+ clear_bit(i, advertising);
+ }
+
+ return !bitmap_equal(old_adv, advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS);
+}
+
+static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
+ struct ethtool_link_ksettings *ksettings,
+ bool *mod)
+{
+ struct ethtool_link_settings *lsettings = &ksettings->base;
+ bool req_speed, req_duplex;
+ int ret;
+
+ *mod = false;
+ req_speed = tb[ETHTOOL_A_LINKMODES_SPEED];
+ req_duplex = tb[ETHTOOL_A_LINKMODES_DUPLEX];
+
+ ethnl_update_u8(&lsettings->autoneg, tb[ETHTOOL_A_LINKMODES_AUTONEG],
+ mod);
+ ret = ethnl_update_bitset(ksettings->link_modes.advertising,
+ __ETHTOOL_LINK_MODE_MASK_NBITS,
+ tb[ETHTOOL_A_LINKMODES_OURS], link_mode_names,
+ info->extack, mod);
+ if (ret < 0)
+ return ret;
+ ethnl_update_u32(&lsettings->speed, tb[ETHTOOL_A_LINKMODES_SPEED],
+ mod);
+ ethnl_update_u8(&lsettings->duplex, tb[ETHTOOL_A_LINKMODES_DUPLEX],
+ mod);
+
+ if (!tb[ETHTOOL_A_LINKMODES_OURS] && lsettings->autoneg &&
+ (req_speed || req_duplex) &&
+ ethnl_auto_linkmodes(ksettings, req_speed, req_duplex))
+ *mod = true;
+
+ return 0;
+}
+
+int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[ETHTOOL_A_LINKMODES_MAX + 1];
+ struct ethtool_link_ksettings ksettings = {};
+ struct ethnl_req_info req_info = {};
+ struct net_device *dev;
+ bool mod = false;
+ int ret;
+
+ ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
+ ETHTOOL_A_LINKMODES_MAX, linkmodes_set_policy,
+ info->extack);
+ if (ret < 0)
+ return ret;
+ ret = ethnl_parse_header(&req_info, tb[ETHTOOL_A_LINKMODES_HEADER],
+ genl_info_net(info), info->extack, true);
+ if (ret < 0)
+ return ret;
+ dev = req_info.dev;
+ if (!dev->ethtool_ops->get_link_ksettings ||
+ !dev->ethtool_ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = __ethtool_get_link_ksettings(dev, &ksettings);
+ if (ret < 0) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
+ goto out_ops;
+ }
+
+ ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod);
+ if (ret < 0)
+ goto out_ops;
+
+ if (mod) {
+ ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
+ if (ret < 0)
+ GENL_SET_ERR_MSG(info, "link settings update failed");
+ else
+ ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ }
+
+out_ops:
+ ethnl_ops_complete(dev);
+out_rtnl:
+ rtnl_unlock();
+ dev_put(dev);
+ return ret;
+}
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
new file mode 100644
index 000000000000..2740cde0a182
--- /dev/null
+++ b/net/ethtool/linkstate.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "netlink.h"
+#include "common.h"
+
+struct linkstate_req_info {
+ struct ethnl_req_info base;
+};
+
+struct linkstate_reply_data {
+ struct ethnl_reply_data base;
+ int link;
+};
+
+#define LINKSTATE_REPDATA(__reply_base) \
+ container_of(__reply_base, struct linkstate_reply_data, base)
+
+static const struct nla_policy
+linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = {
+ [ETHTOOL_A_LINKSTATE_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_LINKSTATE_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_LINKSTATE_LINK] = { .type = NLA_REJECT },
+};
+
+static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ int ret;
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+ data->link = __ethtool_get_link(dev);
+ ethnl_ops_complete(dev);
+
+ return 0;
+}
+
+static int linkstate_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ return nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+ + 0;
+}
+
+static int linkstate_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
+
+ if (data->link >= 0 &&
+ nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct ethnl_request_ops ethnl_linkstate_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKSTATE_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKSTATE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKSTATE_HEADER,
+ .max_attr = ETHTOOL_A_LINKSTATE_MAX,
+ .req_info_size = sizeof(struct linkstate_req_info),
+ .reply_data_size = sizeof(struct linkstate_reply_data),
+ .request_policy = linkstate_get_policy,
+
+ .prepare_data = linkstate_prepare_data,
+ .reply_size = linkstate_reply_size,
+ .fill_reply = linkstate_fill_reply,
+};
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
new file mode 100644
index 000000000000..86b79f9bc08d
--- /dev/null
+++ b/net/ethtool/netlink.c
@@ -0,0 +1,696 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <net/sock.h>
+#include <linux/ethtool_netlink.h>
+#include "netlink.h"
+
+static struct genl_family ethtool_genl_family;
+
+static bool ethnl_ok __read_mostly;
+static u32 ethnl_bcast_seq;
+
+static const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_MAX + 1] = {
+ [ETHTOOL_A_HEADER_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_HEADER_DEV_INDEX] = { .type = NLA_U32 },
+ [ETHTOOL_A_HEADER_DEV_NAME] = { .type = NLA_NUL_STRING,
+ .len = ALTIFNAMSIZ - 1 },
+ [ETHTOOL_A_HEADER_FLAGS] = { .type = NLA_U32 },
+};
+
+/**
+ * ethnl_parse_header() - parse request header
+ * @req_info: structure to put results into
+ * @header: nest attribute with request header
+ * @net: request netns
+ * @extack: netlink extack for error reporting
+ * @require_dev: fail if no device identified in header
+ *
+ * Parse request header in nested attribute @nest and puts results into
+ * the structure pointed to by @req_info. Extack from @info is used for error
+ * reporting. If req_info->dev is not null on return, reference to it has
+ * been taken. If error is returned, *req_info is null initialized and no
+ * reference is held.
+ *
+ * Return: 0 on success or negative error code
+ */
+int ethnl_parse_header(struct ethnl_req_info *req_info,
+ const struct nlattr *header, struct net *net,
+ struct netlink_ext_ack *extack, bool require_dev)
+{
+ struct nlattr *tb[ETHTOOL_A_HEADER_MAX + 1];
+ const struct nlattr *devname_attr;
+ struct net_device *dev = NULL;
+ int ret;
+
+ if (!header) {
+ NL_SET_ERR_MSG(extack, "request header missing");
+ return -EINVAL;
+ }
+ ret = nla_parse_nested(tb, ETHTOOL_A_HEADER_MAX, header,
+ ethnl_header_policy, extack);
+ if (ret < 0)
+ return ret;
+ devname_attr = tb[ETHTOOL_A_HEADER_DEV_NAME];
+
+ if (tb[ETHTOOL_A_HEADER_DEV_INDEX]) {
+ u32 ifindex = nla_get_u32(tb[ETHTOOL_A_HEADER_DEV_INDEX]);
+
+ dev = dev_get_by_index(net, ifindex);
+ if (!dev) {
+ NL_SET_ERR_MSG_ATTR(extack,
+ tb[ETHTOOL_A_HEADER_DEV_INDEX],
+ "no device matches ifindex");
+ return -ENODEV;
+ }
+ /* if both ifindex and ifname are passed, they must match */
+ if (devname_attr &&
+ strncmp(dev->name, nla_data(devname_attr), IFNAMSIZ)) {
+ dev_put(dev);
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "ifindex and name do not match");
+ return -ENODEV;
+ }
+ } else if (devname_attr) {
+ dev = dev_get_by_name(net, nla_data(devname_attr));
+ if (!dev) {
+ NL_SET_ERR_MSG_ATTR(extack, devname_attr,
+ "no device matches name");
+ return -ENODEV;
+ }
+ } else if (require_dev) {
+ NL_SET_ERR_MSG_ATTR(extack, header,
+ "neither ifindex nor name specified");
+ return -EINVAL;
+ }
+
+ if (dev && !netif_device_present(dev)) {
+ dev_put(dev);
+ NL_SET_ERR_MSG(extack, "device not present");
+ return -ENODEV;
+ }
+
+ req_info->dev = dev;
+ if (tb[ETHTOOL_A_HEADER_FLAGS])
+ req_info->flags = nla_get_u32(tb[ETHTOOL_A_HEADER_FLAGS]);
+
+ return 0;
+}
+
+/**
+ * ethnl_fill_reply_header() - Put common header into a reply message
+ * @skb: skb with the message
+ * @dev: network device to describe in header
+ * @attrtype: attribute type to use for the nest
+ *
+ * Create a nested attribute with attributes describing given network device.
+ *
+ * Return: 0 on success, error value (-EMSGSIZE only) on error
+ */
+int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev,
+ u16 attrtype)
+{
+ struct nlattr *nest;
+
+ if (!dev)
+ return 0;
+ nest = nla_nest_start(skb, attrtype);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, ETHTOOL_A_HEADER_DEV_INDEX, (u32)dev->ifindex) ||
+ nla_put_string(skb, ETHTOOL_A_HEADER_DEV_NAME, dev->name))
+ goto nla_put_failure;
+ /* If more attributes are put into reply header, ethnl_header_size()
+ * must be updated to account for them.
+ */
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+/**
+ * ethnl_reply_init() - Create skb for a reply and fill device identification
+ * @payload: payload length (without netlink and genetlink header)
+ * @dev: device the reply is about (may be null)
+ * @cmd: ETHTOOL_MSG_* message type for reply
+ * @info: genetlink info of the received packet we respond to
+ * @ehdrp: place to store payload pointer returned by genlmsg_new()
+ *
+ * Return: pointer to allocated skb on success, NULL on error
+ */
+struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
+ u16 hdr_attrtype, struct genl_info *info,
+ void **ehdrp)
+{
+ struct sk_buff *skb;
+
+ skb = genlmsg_new(payload, GFP_KERNEL);
+ if (!skb)
+ goto err;
+ *ehdrp = genlmsg_put_reply(skb, info, &ethtool_genl_family, 0, cmd);
+ if (!*ehdrp)
+ goto err_free;
+
+ if (dev) {
+ int ret;
+
+ ret = ethnl_fill_reply_header(skb, dev, hdr_attrtype);
+ if (ret < 0)
+ goto err_free;
+ }
+ return skb;
+
+err_free:
+ nlmsg_free(skb);
+err:
+ if (info)
+ GENL_SET_ERR_MSG(info, "failed to setup reply message");
+ return NULL;
+}
+
+static void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
+{
+ return genlmsg_put(skb, 0, ++ethnl_bcast_seq, &ethtool_genl_family, 0,
+ cmd);
+}
+
+static int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
+{
+ return genlmsg_multicast_netns(&ethtool_genl_family, dev_net(dev), skb,
+ 0, ETHNL_MCGRP_MONITOR, GFP_KERNEL);
+}
+
+/* GET request helpers */
+
+/**
+ * struct ethnl_dump_ctx - context structure for generic dumpit() callback
+ * @ops: request ops of currently processed message type
+ * @req_info: parsed request header of processed request
+ * @pos_hash: saved iteration position - hashbucket
+ * @pos_idx: saved iteration position - index
+ *
+ * These parameters are kept in struct netlink_callback as context preserved
+ * between iterations. They are initialized by ethnl_default_start() and used
+ * in ethnl_default_dumpit() and ethnl_default_done().
+ */
+struct ethnl_dump_ctx {
+ const struct ethnl_request_ops *ops;
+ struct ethnl_req_info *req_info;
+ struct ethnl_reply_data *reply_data;
+ int pos_hash;
+ int pos_idx;
+};
+
+static const struct ethnl_request_ops *
+ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
+ [ETHTOOL_MSG_STRSET_GET] = &ethnl_strset_request_ops,
+ [ETHTOOL_MSG_LINKINFO_GET] = &ethnl_linkinfo_request_ops,
+ [ETHTOOL_MSG_LINKMODES_GET] = &ethnl_linkmodes_request_ops,
+ [ETHTOOL_MSG_LINKSTATE_GET] = &ethnl_linkstate_request_ops,
+};
+
+static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
+{
+ return (struct ethnl_dump_ctx *)cb->ctx;
+}
+
+/**
+ * ethnl_default_parse() - Parse request message
+ * @req_info: pointer to structure to put data into
+ * @nlhdr: pointer to request message header
+ * @net: request netns
+ * @request_ops: struct request_ops for request type
+ * @extack: netlink extack for error reporting
+ * @require_dev: fail if no device identified in header
+ *
+ * Parse universal request header and call request specific ->parse_request()
+ * callback (if defined) to parse the rest of the message.
+ *
+ * Return: 0 on success or negative error code
+ */
+static int ethnl_default_parse(struct ethnl_req_info *req_info,
+ const struct nlmsghdr *nlhdr, struct net *net,
+ const struct ethnl_request_ops *request_ops,
+ struct netlink_ext_ack *extack, bool require_dev)
+{
+ struct nlattr **tb;
+ int ret;
+
+ tb = kmalloc_array(request_ops->max_attr + 1, sizeof(tb[0]),
+ GFP_KERNEL);
+ if (!tb)
+ return -ENOMEM;
+
+ ret = nlmsg_parse(nlhdr, GENL_HDRLEN, tb, request_ops->max_attr,
+ request_ops->request_policy, extack);
+ if (ret < 0)
+ goto out;
+ ret = ethnl_parse_header(req_info, tb[request_ops->hdr_attr], net,
+ extack, require_dev);
+ if (ret < 0)
+ goto out;
+
+ if (request_ops->parse_request) {
+ ret = request_ops->parse_request(req_info, tb, extack);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ kfree(tb);
+ return ret;
+}
+
+/**
+ * ethnl_init_reply_data() - Initialize reply data for GET request
+ * @req_info: pointer to embedded struct ethnl_req_info
+ * @ops: instance of struct ethnl_request_ops describing the layout
+ * @dev: network device to initialize the reply for
+ *
+ * Fills the reply data part with zeros and sets the dev member. Must be called
+ * before calling the ->fill_reply() callback (for each iteration when handling
+ * dump requests).
+ */
+static void ethnl_init_reply_data(struct ethnl_reply_data *reply_data,
+ const struct ethnl_request_ops *ops,
+ struct net_device *dev)
+{
+ memset(reply_data, 0, ops->reply_data_size);
+ reply_data->dev = dev;
+}
+
+/* default ->doit() handler for GET type requests */
+static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ethnl_reply_data *reply_data = NULL;
+ struct ethnl_req_info *req_info = NULL;
+ const u8 cmd = info->genlhdr->cmd;
+ const struct ethnl_request_ops *ops;
+ struct sk_buff *rskb;
+ void *reply_payload;
+ int reply_len;
+ int ret;
+
+ ops = ethnl_default_requests[cmd];
+ if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
+ return -EOPNOTSUPP;
+ req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
+ if (!req_info)
+ return -ENOMEM;
+ reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
+ if (!reply_data) {
+ kfree(req_info);
+ return -ENOMEM;
+ }
+
+ ret = ethnl_default_parse(req_info, info->nlhdr, genl_info_net(info), ops,
+ info->extack, !ops->allow_nodev_do);
+ if (ret < 0)
+ goto err_dev;
+ ethnl_init_reply_data(reply_data, ops, req_info->dev);
+
+ rtnl_lock();
+ ret = ops->prepare_data(req_info, reply_data, info);
+ rtnl_unlock();
+ if (ret < 0)
+ goto err_cleanup;
+ ret = ops->reply_size(req_info, reply_data);
+ if (ret < 0)
+ goto err_cleanup;
+ reply_len = ret;
+ ret = -ENOMEM;
+ rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd,
+ ops->hdr_attr, info, &reply_payload);
+ if (!rskb)
+ goto err_cleanup;
+ ret = ops->fill_reply(rskb, req_info, reply_data);
+ if (ret < 0)
+ goto err_msg;
+ if (ops->cleanup_data)
+ ops->cleanup_data(reply_data);
+
+ genlmsg_end(rskb, reply_payload);
+ if (req_info->dev)
+ dev_put(req_info->dev);
+ kfree(reply_data);
+ kfree(req_info);
+ return genlmsg_reply(rskb, info);
+
+err_msg:
+ WARN_ONCE(ret == -EMSGSIZE, "calculated message payload length (%d) not sufficient\n", reply_len);
+ nlmsg_free(rskb);
+err_cleanup:
+ if (ops->cleanup_data)
+ ops->cleanup_data(reply_data);
+err_dev:
+ if (req_info->dev)
+ dev_put(req_info->dev);
+ kfree(reply_data);
+ kfree(req_info);
+ return ret;
+}
+
+static int ethnl_default_dump_one(struct sk_buff *skb, struct net_device *dev,
+ const struct ethnl_dump_ctx *ctx)
+{
+ int ret;
+
+ ethnl_init_reply_data(ctx->reply_data, ctx->ops, dev);
+ rtnl_lock();
+ ret = ctx->ops->prepare_data(ctx->req_info, ctx->reply_data, NULL);
+ rtnl_unlock();
+ if (ret < 0)
+ goto out;
+ ret = ethnl_fill_reply_header(skb, dev, ctx->ops->hdr_attr);
+ if (ret < 0)
+ goto out;
+ ret = ctx->ops->fill_reply(skb, ctx->req_info, ctx->reply_data);
+
+out:
+ if (ctx->ops->cleanup_data)
+ ctx->ops->cleanup_data(ctx->reply_data);
+ ctx->reply_data->dev = NULL;
+ return ret;
+}
+
+/* Default ->dumpit() handler for GET requests. Device iteration copied from
+ * rtnl_dump_ifinfo(); we have to be more careful about device hashtable
+ * persistence as we cannot guarantee to hold RTNL lock through the whole
+ * function as rtnetnlink does.
+ */
+static int ethnl_default_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
+ struct net *net = sock_net(skb->sk);
+ int s_idx = ctx->pos_idx;
+ int h, idx = 0;
+ int ret = 0;
+ void *ehdr;
+
+ rtnl_lock();
+ for (h = ctx->pos_hash; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ struct hlist_head *head;
+ struct net_device *dev;
+ unsigned int seq;
+
+ head = &net->dev_index_head[h];
+
+restart_chain:
+ seq = net->dev_base_seq;
+ cb->seq = seq;
+ idx = 0;
+ hlist_for_each_entry(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ dev_hold(dev);
+ rtnl_unlock();
+
+ ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ &ethtool_genl_family, 0,
+ ctx->ops->reply_cmd);
+ if (!ehdr) {
+ dev_put(dev);
+ ret = -EMSGSIZE;
+ goto out;
+ }
+ ret = ethnl_default_dump_one(skb, dev, ctx);
+ dev_put(dev);
+ if (ret < 0) {
+ genlmsg_cancel(skb, ehdr);
+ if (ret == -EOPNOTSUPP)
+ goto lock_and_cont;
+ if (likely(skb->len))
+ ret = skb->len;
+ goto out;
+ }
+ genlmsg_end(skb, ehdr);
+lock_and_cont:
+ rtnl_lock();
+ if (net->dev_base_seq != seq) {
+ s_idx = idx + 1;
+ goto restart_chain;
+ }
+cont:
+ idx++;
+ }
+
+ }
+ rtnl_unlock();
+
+out:
+ ctx->pos_hash = h;
+ ctx->pos_idx = idx;
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+
+ return ret;
+}
+
+/* generic ->start() handler for GET requests */
+static int ethnl_default_start(struct netlink_callback *cb)
+{
+ struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
+ struct ethnl_reply_data *reply_data;
+ const struct ethnl_request_ops *ops;
+ struct ethnl_req_info *req_info;
+ struct genlmsghdr *ghdr;
+ int ret;
+
+ BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx));
+
+ ghdr = nlmsg_data(cb->nlh);
+ ops = ethnl_default_requests[ghdr->cmd];
+ if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", ghdr->cmd))
+ return -EOPNOTSUPP;
+ req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
+ if (!req_info)
+ return -ENOMEM;
+ reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
+ if (!reply_data) {
+ ret = -ENOMEM;
+ goto free_req_info;
+ }
+
+ ret = ethnl_default_parse(req_info, cb->nlh, sock_net(cb->skb->sk), ops,
+ cb->extack, false);
+ if (req_info->dev) {
+ /* We ignore device specification in dump requests but as the
+ * same parser as for non-dump (doit) requests is used, it
+ * would take reference to the device if it finds one
+ */
+ dev_put(req_info->dev);
+ req_info->dev = NULL;
+ }
+ if (ret < 0)
+ goto free_reply_data;
+
+ ctx->ops = ops;
+ ctx->req_info = req_info;
+ ctx->reply_data = reply_data;
+ ctx->pos_hash = 0;
+ ctx->pos_idx = 0;
+
+ return 0;
+
+free_reply_data:
+ kfree(reply_data);
+free_req_info:
+ kfree(req_info);
+
+ return ret;
+}
+
+/* default ->done() handler for GET requests */
+static int ethnl_default_done(struct netlink_callback *cb)
+{
+ struct ethnl_dump_ctx *ctx = ethnl_dump_context(cb);
+
+ kfree(ctx->reply_data);
+ kfree(ctx->req_info);
+
+ return 0;
+}
+
+static const struct ethnl_request_ops *
+ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
+ [ETHTOOL_MSG_LINKINFO_NTF] = &ethnl_linkinfo_request_ops,
+ [ETHTOOL_MSG_LINKMODES_NTF] = &ethnl_linkmodes_request_ops,
+};
+
+/* default notification handler */
+static void ethnl_default_notify(struct net_device *dev, unsigned int cmd,
+ const void *data)
+{
+ struct ethnl_reply_data *reply_data;
+ const struct ethnl_request_ops *ops;
+ struct ethnl_req_info *req_info;
+ struct sk_buff *skb;
+ void *reply_payload;
+ int reply_len;
+ int ret;
+
+ if (WARN_ONCE(cmd > ETHTOOL_MSG_KERNEL_MAX ||
+ !ethnl_default_notify_ops[cmd],
+ "unexpected notification type %u\n", cmd))
+ return;
+ ops = ethnl_default_notify_ops[cmd];
+ req_info = kzalloc(ops->req_info_size, GFP_KERNEL);
+ if (!req_info)
+ return;
+ reply_data = kmalloc(ops->reply_data_size, GFP_KERNEL);
+ if (!reply_data) {
+ kfree(req_info);
+ return;
+ }
+
+ req_info->dev = dev;
+ req_info->flags |= ETHTOOL_FLAG_COMPACT_BITSETS;
+
+ ethnl_init_reply_data(reply_data, ops, dev);
+ ret = ops->prepare_data(req_info, reply_data, NULL);
+ if (ret < 0)
+ goto err_cleanup;
+ ret = ops->reply_size(req_info, reply_data);
+ if (ret < 0)
+ goto err_cleanup;
+ reply_len = ret;
+ ret = -ENOMEM;
+ skb = genlmsg_new(reply_len, GFP_KERNEL);
+ if (!skb)
+ goto err_cleanup;
+ reply_payload = ethnl_bcastmsg_put(skb, cmd);
+ if (!reply_payload)
+ goto err_skb;
+ ret = ethnl_fill_reply_header(skb, dev, ops->hdr_attr);
+ if (ret < 0)
+ goto err_msg;
+ ret = ops->fill_reply(skb, req_info, reply_data);
+ if (ret < 0)
+ goto err_msg;
+ if (ops->cleanup_data)
+ ops->cleanup_data(reply_data);
+
+ genlmsg_end(skb, reply_payload);
+ kfree(reply_data);
+ kfree(req_info);
+ ethnl_multicast(skb, dev);
+ return;
+
+err_msg:
+ WARN_ONCE(ret == -EMSGSIZE,
+ "calculated message payload length (%d) not sufficient\n",
+ reply_len);
+err_skb:
+ nlmsg_free(skb);
+err_cleanup:
+ if (ops->cleanup_data)
+ ops->cleanup_data(reply_data);
+ kfree(reply_data);
+ kfree(req_info);
+ return;
+}
+
+/* notifications */
+
+typedef void (*ethnl_notify_handler_t)(struct net_device *dev, unsigned int cmd,
+ const void *data);
+
+static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
+ [ETHTOOL_MSG_LINKINFO_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_LINKMODES_NTF] = ethnl_default_notify,
+};
+
+void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
+{
+ if (unlikely(!ethnl_ok))
+ return;
+ ASSERT_RTNL();
+
+ if (likely(cmd < ARRAY_SIZE(ethnl_notify_handlers) &&
+ ethnl_notify_handlers[cmd]))
+ ethnl_notify_handlers[cmd](dev, cmd, data);
+ else
+ WARN_ONCE(1, "notification %u not implemented (dev=%s)\n",
+ cmd, netdev_name(dev));
+}
+EXPORT_SYMBOL(ethtool_notify);
+
+/* genetlink setup */
+
+static const struct genl_ops ethtool_genl_ops[] = {
+ {
+ .cmd = ETHTOOL_MSG_STRSET_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKINFO_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKINFO_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_set_linkinfo,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKMODES_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKMODES_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_set_linkmodes,
+ },
+ {
+ .cmd = ETHTOOL_MSG_LINKSTATE_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ },
+};
+
+static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
+ [ETHNL_MCGRP_MONITOR] = { .name = ETHTOOL_MCGRP_MONITOR_NAME },
+};
+
+static struct genl_family ethtool_genl_family = {
+ .name = ETHTOOL_GENL_NAME,
+ .version = ETHTOOL_GENL_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = ethtool_genl_ops,
+ .n_ops = ARRAY_SIZE(ethtool_genl_ops),
+ .mcgrps = ethtool_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps),
+};
+
+/* module setup */
+
+static int __init ethnl_init(void)
+{
+ int ret;
+
+ ret = genl_register_family(&ethtool_genl_family);
+ if (WARN(ret < 0, "ethtool: genetlink family registration failed"))
+ return ret;
+ ethnl_ok = true;
+
+ return 0;
+}
+
+subsys_initcall(ethnl_init);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
new file mode 100644
index 000000000000..da9d6521a4eb
--- /dev/null
+++ b/net/ethtool/netlink.h
@@ -0,0 +1,341 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _NET_ETHTOOL_NETLINK_H
+#define _NET_ETHTOOL_NETLINK_H
+
+#include <linux/ethtool_netlink.h>
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
+
+struct ethnl_req_info;
+
+int ethnl_parse_header(struct ethnl_req_info *req_info,
+ const struct nlattr *nest, struct net *net,
+ struct netlink_ext_ack *extack, bool require_dev);
+int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev,
+ u16 attrtype);
+struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
+ u16 hdr_attrtype, struct genl_info *info,
+ void **ehdrp);
+
+/**
+ * ethnl_strz_size() - calculate attribute length for fixed size string
+ * @s: ETH_GSTRING_LEN sized string (may not be null terminated)
+ *
+ * Return: total length of an attribute with null terminated string from @s
+ */
+static inline int ethnl_strz_size(const char *s)
+{
+ return nla_total_size(strnlen(s, ETH_GSTRING_LEN) + 1);
+}
+
+/**
+ * ethnl_put_strz() - put string attribute with fixed size string
+ * @skb: skb with the message
+ * @attrype: attribute type
+ * @s: ETH_GSTRING_LEN sized string (may not be null terminated)
+ *
+ * Puts an attribute with null terminated string from @s into the message.
+ *
+ * Return: 0 on success, negative error code on failure
+ */
+static inline int ethnl_put_strz(struct sk_buff *skb, u16 attrtype,
+ const char *s)
+{
+ unsigned int len = strnlen(s, ETH_GSTRING_LEN);
+ struct nlattr *attr;
+
+ attr = nla_reserve(skb, attrtype, len + 1);
+ if (!attr)
+ return -EMSGSIZE;
+
+ memcpy(nla_data(attr), s, len);
+ ((char *)nla_data(attr))[len] = '\0';
+ return 0;
+}
+
+/**
+ * ethnl_update_u32() - update u32 value from NLA_U32 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Copy the u32 value from NLA_U32 netlink attribute @attr into variable
+ * pointed to by @dst; do nothing if @attr is null. Bool pointed to by @mod
+ * is set to true if this function changed the value of *dst, otherwise it
+ * is left as is.
+ */
+static inline void ethnl_update_u32(u32 *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ u32 val;
+
+ if (!attr)
+ return;
+ val = nla_get_u32(attr);
+ if (*dst == val)
+ return;
+
+ *dst = val;
+ *mod = true;
+}
+
+/**
+ * ethnl_update_u8() - update u8 value from NLA_U8 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Copy the u8 value from NLA_U8 netlink attribute @attr into variable
+ * pointed to by @dst; do nothing if @attr is null. Bool pointed to by @mod
+ * is set to true if this function changed the value of *dst, otherwise it
+ * is left as is.
+ */
+static inline void ethnl_update_u8(u8 *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ u8 val;
+
+ if (!attr)
+ return;
+ val = nla_get_u8(attr);
+ if (*dst == val)
+ return;
+
+ *dst = val;
+ *mod = true;
+}
+
+/**
+ * ethnl_update_bool32() - update u32 used as bool from NLA_U8 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Use the u8 value from NLA_U8 netlink attribute @attr to set u32 variable
+ * pointed to by @dst to 0 (if zero) or 1 (if not); do nothing if @attr is
+ * null. Bool pointed to by @mod is set to true if this function changed the
+ * logical value of *dst, otherwise it is left as is.
+ */
+static inline void ethnl_update_bool32(u32 *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ u8 val;
+
+ if (!attr)
+ return;
+ val = !!nla_get_u8(attr);
+ if (!!*dst == val)
+ return;
+
+ *dst = val;
+ *mod = true;
+}
+
+/**
+ * ethnl_update_binary() - update binary data from NLA_BINARY atribute
+ * @dst: value to update
+ * @len: destination buffer length
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Use the u8 value from NLA_U8 netlink attribute @attr to rewrite data block
+ * of length @len at @dst by attribute payload; do nothing if @attr is null.
+ * Bool pointed to by @mod is set to true if this function changed the logical
+ * value of *dst, otherwise it is left as is.
+ */
+static inline void ethnl_update_binary(void *dst, unsigned int len,
+ const struct nlattr *attr, bool *mod)
+{
+ if (!attr)
+ return;
+ if (nla_len(attr) < len)
+ len = nla_len(attr);
+ if (!memcmp(dst, nla_data(attr), len))
+ return;
+
+ memcpy(dst, nla_data(attr), len);
+ *mod = true;
+}
+
+/**
+ * ethnl_update_bitfield32() - update u32 value from NLA_BITFIELD32 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Update bits in u32 value which are set in attribute's mask to values from
+ * attribute's value. Do nothing if @attr is null or the value wouldn't change;
+ * otherwise, set bool pointed to by @mod to true.
+ */
+static inline void ethnl_update_bitfield32(u32 *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ struct nla_bitfield32 change;
+ u32 newval;
+
+ if (!attr)
+ return;
+ change = nla_get_bitfield32(attr);
+ newval = (*dst & ~change.selector) | (change.value & change.selector);
+ if (*dst == newval)
+ return;
+
+ *dst = newval;
+ *mod = true;
+}
+
+/**
+ * ethnl_reply_header_size() - total size of reply header
+ *
+ * This is an upper estimate so that we do not need to hold RTNL lock longer
+ * than necessary (to prevent rename between size estimate and composing the
+ * message). Accounts only for device ifindex and name as those are the only
+ * attributes ethnl_fill_reply_header() puts into the reply header.
+ */
+static inline unsigned int ethnl_reply_header_size(void)
+{
+ return nla_total_size(nla_total_size(sizeof(u32)) +
+ nla_total_size(IFNAMSIZ));
+}
+
+/* GET request handling */
+
+/* Unified processing of GET requests uses two data structures: request info
+ * and reply data. Request info holds information parsed from client request
+ * and its stays constant through all request processing. Reply data holds data
+ * retrieved from ethtool_ops callbacks or other internal sources which is used
+ * to compose the reply. When processing a dump request, request info is filled
+ * only once (when the request message is parsed) but reply data is filled for
+ * each reply message.
+ *
+ * Both structures consist of part common for all request types (struct
+ * ethnl_req_info and struct ethnl_reply_data defined below) and optional
+ * parts specific for each request type. Common part always starts at offset 0.
+ */
+
+/**
+ * struct ethnl_req_info - base type of request information for GET requests
+ * @dev: network device the request is for (may be null)
+ * @flags: request flags common for all request types
+ *
+ * This is a common base for request specific structures holding data from
+ * parsed userspace request. These always embed struct ethnl_req_info at
+ * zero offset.
+ */
+struct ethnl_req_info {
+ struct net_device *dev;
+ u32 flags;
+};
+
+/**
+ * struct ethnl_reply_data - base type of reply data for GET requests
+ * @dev: device for current reply message; in single shot requests it is
+ * equal to &ethnl_req_info.dev; in dumps it's different for each
+ * reply message
+ *
+ * This is a common base for request specific structures holding data for
+ * kernel reply message. These always embed struct ethnl_reply_data at zero
+ * offset.
+ */
+struct ethnl_reply_data {
+ struct net_device *dev;
+};
+
+static inline int ethnl_ops_begin(struct net_device *dev)
+{
+ if (dev && dev->ethtool_ops->begin)
+ return dev->ethtool_ops->begin(dev);
+ else
+ return 0;
+}
+
+static inline void ethnl_ops_complete(struct net_device *dev)
+{
+ if (dev && dev->ethtool_ops->complete)
+ dev->ethtool_ops->complete(dev);
+}
+
+/**
+ * struct ethnl_request_ops - unified handling of GET requests
+ * @request_cmd: command id for request (GET)
+ * @reply_cmd: command id for reply (GET_REPLY)
+ * @hdr_attr: attribute type for request header
+ * @max_attr: maximum (top level) attribute type
+ * @req_info_size: size of request info
+ * @reply_data_size: size of reply data
+ * @request_policy: netlink policy for message contents
+ * @allow_nodev_do: allow non-dump request with no device identification
+ * @parse_request:
+ * Parse request except common header (struct ethnl_req_info). Common
+ * header is already filled on entry, the rest up to @repdata_offset
+ * is zero initialized. This callback should only modify type specific
+ * request info by parsed attributes from request message.
+ * @prepare_data:
+ * Retrieve and prepare data needed to compose a reply message. Calls to
+ * ethtool_ops handlers are limited to this callback. Common reply data
+ * (struct ethnl_reply_data) is filled on entry, type specific part after
+ * it is zero initialized. This callback should only modify the type
+ * specific part of reply data. Device identification from struct
+ * ethnl_reply_data is to be used as for dump requests, it iterates
+ * through network devices while dev member of struct ethnl_req_info
+ * points to the device from client request.
+ * @reply_size:
+ * Estimate reply message size. Returned value must be sufficient for
+ * message payload without common reply header. The callback may returned
+ * estimate higher than actual message size if exact calculation would
+ * not be worth the saved memory space.
+ * @fill_reply:
+ * Fill reply message payload (except for common header) from reply data.
+ * The callback must not generate more payload than previously called
+ * ->reply_size() estimated.
+ * @cleanup_data:
+ * Optional cleanup called when reply data is no longer needed. Can be
+ * used e.g. to free any additional data structures outside the main
+ * structure which were allocated by ->prepare_data(). When processing
+ * dump requests, ->cleanup() is called for each message.
+ *
+ * Description of variable parts of GET request handling when using the
+ * unified infrastructure. When used, a pointer to an instance of this
+ * structure is to be added to &ethnl_default_requests array and generic
+ * handlers ethnl_default_doit(), ethnl_default_dumpit(),
+ * ethnl_default_start() and ethnl_default_done() used in @ethtool_genl_ops;
+ * ethnl_default_notify() can be used in @ethnl_notify_handlers to send
+ * notifications of the corresponding type.
+ */
+struct ethnl_request_ops {
+ u8 request_cmd;
+ u8 reply_cmd;
+ u16 hdr_attr;
+ unsigned int max_attr;
+ unsigned int req_info_size;
+ unsigned int reply_data_size;
+ const struct nla_policy *request_policy;
+ bool allow_nodev_do;
+
+ int (*parse_request)(struct ethnl_req_info *req_info,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack);
+ int (*prepare_data)(const struct ethnl_req_info *req_info,
+ struct ethnl_reply_data *reply_data,
+ struct genl_info *info);
+ int (*reply_size)(const struct ethnl_req_info *req_info,
+ const struct ethnl_reply_data *reply_data);
+ int (*fill_reply)(struct sk_buff *skb,
+ const struct ethnl_req_info *req_info,
+ const struct ethnl_reply_data *reply_data);
+ void (*cleanup_data)(struct ethnl_reply_data *reply_data);
+};
+
+/* request handlers */
+
+extern const struct ethnl_request_ops ethnl_strset_request_ops;
+extern const struct ethnl_request_ops ethnl_linkinfo_request_ops;
+extern const struct ethnl_request_ops ethnl_linkmodes_request_ops;
+extern const struct ethnl_request_ops ethnl_linkstate_request_ops;
+
+int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
+int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
+
+#endif /* _NET_ETHTOOL_NETLINK_H */
diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c
new file mode 100644
index 000000000000..82a059c13c1c
--- /dev/null
+++ b/net/ethtool/strset.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/ethtool.h>
+#include <linux/phy.h>
+#include "netlink.h"
+#include "common.h"
+
+struct strset_info {
+ bool per_dev;
+ bool free_strings;
+ unsigned int count;
+ const char (*strings)[ETH_GSTRING_LEN];
+};
+
+static const struct strset_info info_template[] = {
+ [ETH_SS_TEST] = {
+ .per_dev = true,
+ },
+ [ETH_SS_STATS] = {
+ .per_dev = true,
+ },
+ [ETH_SS_PRIV_FLAGS] = {
+ .per_dev = true,
+ },
+ [ETH_SS_FEATURES] = {
+ .per_dev = false,
+ .count = ARRAY_SIZE(netdev_features_strings),
+ .strings = netdev_features_strings,
+ },
+ [ETH_SS_RSS_HASH_FUNCS] = {
+ .per_dev = false,
+ .count = ARRAY_SIZE(rss_hash_func_strings),
+ .strings = rss_hash_func_strings,
+ },
+ [ETH_SS_TUNABLES] = {
+ .per_dev = false,
+ .count = ARRAY_SIZE(tunable_strings),
+ .strings = tunable_strings,
+ },
+ [ETH_SS_PHY_STATS] = {
+ .per_dev = true,
+ },
+ [ETH_SS_PHY_TUNABLES] = {
+ .per_dev = false,
+ .count = ARRAY_SIZE(phy_tunable_strings),
+ .strings = phy_tunable_strings,
+ },
+ [ETH_SS_LINK_MODES] = {
+ .per_dev = false,
+ .count = __ETHTOOL_LINK_MODE_MASK_NBITS,
+ .strings = link_mode_names,
+ },
+};
+
+struct strset_req_info {
+ struct ethnl_req_info base;
+ u32 req_ids;
+ bool counts_only;
+};
+
+#define STRSET_REQINFO(__req_base) \
+ container_of(__req_base, struct strset_req_info, base)
+
+struct strset_reply_data {
+ struct ethnl_reply_data base;
+ struct strset_info sets[ETH_SS_COUNT];
+};
+
+#define STRSET_REPDATA(__reply_base) \
+ container_of(__reply_base, struct strset_reply_data, base)
+
+static const struct nla_policy strset_get_policy[ETHTOOL_A_STRSET_MAX + 1] = {
+ [ETHTOOL_A_STRSET_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_STRSET_HEADER] = { .type = NLA_NESTED },
+ [ETHTOOL_A_STRSET_STRINGSETS] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+get_stringset_policy[ETHTOOL_A_STRINGSET_MAX + 1] = {
+ [ETHTOOL_A_STRINGSET_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_STRINGSET_ID] = { .type = NLA_U32 },
+ [ETHTOOL_A_STRINGSET_COUNT] = { .type = NLA_REJECT },
+ [ETHTOOL_A_STRINGSET_STRINGS] = { .type = NLA_REJECT },
+};
+
+/**
+ * strset_include() - test if a string set should be included in reply
+ * @data: pointer to request data structure
+ * @id: id of string set to check (ETH_SS_* constants)
+ */
+static bool strset_include(const struct strset_req_info *info,
+ const struct strset_reply_data *data, u32 id)
+{
+ bool per_dev;
+
+ BUILD_BUG_ON(ETH_SS_COUNT >= BITS_PER_BYTE * sizeof(info->req_ids));
+
+ if (info->req_ids)
+ return info->req_ids & (1U << id);
+ per_dev = data->sets[id].per_dev;
+ if (!per_dev && !data->sets[id].strings)
+ return false;
+
+ return data->base.dev ? per_dev : !per_dev;
+}
+
+static int strset_get_id(const struct nlattr *nest, u32 *val,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[ETHTOOL_A_STRINGSET_MAX + 1];
+ int ret;
+
+ ret = nla_parse_nested(tb, ETHTOOL_A_STRINGSET_MAX, nest,
+ get_stringset_policy, extack);
+ if (ret < 0)
+ return ret;
+ if (!tb[ETHTOOL_A_STRINGSET_ID])
+ return -EINVAL;
+
+ *val = nla_get_u32(tb[ETHTOOL_A_STRINGSET_ID]);
+ return 0;
+}
+
+static const struct nla_policy
+strset_stringsets_policy[ETHTOOL_A_STRINGSETS_MAX + 1] = {
+ [ETHTOOL_A_STRINGSETS_UNSPEC] = { .type = NLA_REJECT },
+ [ETHTOOL_A_STRINGSETS_STRINGSET] = { .type = NLA_NESTED },
+};
+
+static int strset_parse_request(struct ethnl_req_info *req_base,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct strset_req_info *req_info = STRSET_REQINFO(req_base);
+ struct nlattr *nest = tb[ETHTOOL_A_STRSET_STRINGSETS];
+ struct nlattr *attr;
+ int rem, ret;
+
+ if (!nest)
+ return 0;
+ ret = nla_validate_nested(nest, ETHTOOL_A_STRINGSETS_MAX,
+ strset_stringsets_policy, extack);
+ if (ret < 0)
+ return ret;
+
+ req_info->counts_only = tb[ETHTOOL_A_STRSET_COUNTS_ONLY];
+ nla_for_each_nested(attr, nest, rem) {
+ u32 id;
+
+ if (WARN_ONCE(nla_type(attr) != ETHTOOL_A_STRINGSETS_STRINGSET,
+ "unexpected attrtype %u in ETHTOOL_A_STRSET_STRINGSETS\n",
+ nla_type(attr)))
+ return -EINVAL;
+
+ ret = strset_get_id(attr, &id, extack);
+ if (ret < 0)
+ return ret;
+ if (ret >= ETH_SS_COUNT) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "unknown string set id");
+ return -EOPNOTSUPP;
+ }
+
+ req_info->req_ids |= (1U << id);
+ }
+
+ return 0;
+}
+
+static void strset_cleanup_data(struct ethnl_reply_data *reply_base)
+{
+ struct strset_reply_data *data = STRSET_REPDATA(reply_base);
+ unsigned int i;
+
+ for (i = 0; i < ETH_SS_COUNT; i++)
+ if (data->sets[i].free_strings) {
+ kfree(data->sets[i].strings);
+ data->sets[i].strings = NULL;
+ data->sets[i].free_strings = false;
+ }
+}
+
+static int strset_prepare_set(struct strset_info *info, struct net_device *dev,
+ unsigned int id, bool counts_only)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ void *strings;
+ int count, ret;
+
+ if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ !ops->get_ethtool_phy_stats)
+ ret = phy_ethtool_get_sset_count(dev->phydev);
+ else if (ops->get_sset_count && ops->get_strings)
+ ret = ops->get_sset_count(dev, id);
+ else
+ ret = -EOPNOTSUPP;
+ if (ret <= 0) {
+ info->count = 0;
+ return 0;
+ }
+
+ count = ret;
+ if (!counts_only) {
+ strings = kcalloc(count, ETH_GSTRING_LEN, GFP_KERNEL);
+ if (!strings)
+ return -ENOMEM;
+ if (id == ETH_SS_PHY_STATS && dev->phydev &&
+ !ops->get_ethtool_phy_stats)
+ phy_ethtool_get_strings(dev->phydev, strings);
+ else
+ ops->get_strings(dev, id, strings);
+ info->strings = strings;
+ info->free_strings = true;
+ }
+ info->count = count;
+
+ return 0;
+}
+
+static int strset_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
+ struct strset_reply_data *data = STRSET_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ unsigned int i;
+ int ret;
+
+ BUILD_BUG_ON(ARRAY_SIZE(info_template) != ETH_SS_COUNT);
+ memcpy(&data->sets, &info_template, sizeof(data->sets));
+
+ if (!dev) {
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ if ((req_info->req_ids & (1U << i)) &&
+ data->sets[i].per_dev) {
+ if (info)
+ GENL_SET_ERR_MSG(info, "requested per device strings without dev");
+ return -EINVAL;
+ }
+ }
+ return 0;
+ }
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto err_strset;
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ if (!strset_include(req_info, data, i) ||
+ !data->sets[i].per_dev)
+ continue;
+
+ ret = strset_prepare_set(&data->sets[i], dev, i,
+ req_info->counts_only);
+ if (ret < 0)
+ goto err_ops;
+ }
+ ethnl_ops_complete(dev);
+
+ return 0;
+err_ops:
+ ethnl_ops_complete(dev);
+err_strset:
+ strset_cleanup_data(reply_base);
+ return ret;
+}
+
+/* calculate size of ETHTOOL_A_STRSET_STRINGSET nest for one string set */
+static int strset_set_size(const struct strset_info *info, bool counts_only)
+{
+ unsigned int len = 0;
+ unsigned int i;
+
+ if (info->count == 0)
+ return 0;
+ if (counts_only)
+ return nla_total_size(2 * nla_total_size(sizeof(u32)));
+
+ for (i = 0; i < info->count; i++) {
+ const char *str = info->strings[i];
+
+ /* ETHTOOL_A_STRING_INDEX, ETHTOOL_A_STRING_VALUE, nest */
+ len += nla_total_size(nla_total_size(sizeof(u32)) +
+ ethnl_strz_size(str));
+ }
+ /* ETHTOOL_A_STRINGSET_ID, ETHTOOL_A_STRINGSET_COUNT */
+ len = 2 * nla_total_size(sizeof(u32)) + nla_total_size(len);
+
+ return nla_total_size(len);
+}
+
+static int strset_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
+ const struct strset_reply_data *data = STRSET_REPDATA(reply_base);
+ unsigned int i;
+ int len = 0;
+ int ret;
+
+ len += ethnl_reply_header_size();
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ const struct strset_info *set_info = &data->sets[i];
+
+ if (!strset_include(req_info, data, i))
+ continue;
+
+ ret = strset_set_size(set_info, req_info->counts_only);
+ if (ret < 0)
+ return ret;
+ len += ret;
+ }
+
+ return len;
+}
+
+/* fill one string into reply */
+static int strset_fill_string(struct sk_buff *skb,
+ const struct strset_info *set_info, u32 idx)
+{
+ struct nlattr *string_attr;
+ const char *value;
+
+ value = set_info->strings[idx];
+
+ string_attr = nla_nest_start(skb, ETHTOOL_A_STRINGS_STRING);
+ if (!string_attr)
+ return -EMSGSIZE;
+ if (nla_put_u32(skb, ETHTOOL_A_STRING_INDEX, idx) ||
+ ethnl_put_strz(skb, ETHTOOL_A_STRING_VALUE, value))
+ goto nla_put_failure;
+ nla_nest_end(skb, string_attr);
+
+ return 0;
+nla_put_failure:
+ nla_nest_cancel(skb, string_attr);
+ return -EMSGSIZE;
+}
+
+/* fill one string set into reply */
+static int strset_fill_set(struct sk_buff *skb,
+ const struct strset_info *set_info, u32 id,
+ bool counts_only)
+{
+ struct nlattr *stringset_attr;
+ struct nlattr *strings_attr;
+ unsigned int i;
+
+ if (!set_info->per_dev && !set_info->strings)
+ return -EOPNOTSUPP;
+ if (set_info->count == 0)
+ return 0;
+ stringset_attr = nla_nest_start(skb, ETHTOOL_A_STRINGSETS_STRINGSET);
+ if (!stringset_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, ETHTOOL_A_STRINGSET_ID, id) ||
+ nla_put_u32(skb, ETHTOOL_A_STRINGSET_COUNT, set_info->count))
+ goto nla_put_failure;
+
+ if (!counts_only) {
+ strings_attr = nla_nest_start(skb, ETHTOOL_A_STRINGSET_STRINGS);
+ if (!strings_attr)
+ goto nla_put_failure;
+ for (i = 0; i < set_info->count; i++) {
+ if (strset_fill_string(skb, set_info, i) < 0)
+ goto nla_put_failure;
+ }
+ nla_nest_end(skb, strings_attr);
+ }
+
+ nla_nest_end(skb, stringset_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, stringset_attr);
+ return -EMSGSIZE;
+}
+
+static int strset_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct strset_req_info *req_info = STRSET_REQINFO(req_base);
+ const struct strset_reply_data *data = STRSET_REPDATA(reply_base);
+ struct nlattr *nest;
+ unsigned int i;
+ int ret;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_STRSET_STRINGSETS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ for (i = 0; i < ETH_SS_COUNT; i++) {
+ if (strset_include(req_info, data, i)) {
+ ret = strset_fill_set(skb, &data->sets[i], i,
+ req_info->counts_only);
+ if (ret < 0)
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return ret;
+}
+
+const struct ethnl_request_ops ethnl_strset_request_ops = {
+ .request_cmd = ETHTOOL_MSG_STRSET_GET,
+ .reply_cmd = ETHTOOL_MSG_STRSET_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_STRSET_HEADER,
+ .max_attr = ETHTOOL_A_STRSET_MAX,
+ .req_info_size = sizeof(struct strset_req_info),
+ .reply_data_size = sizeof(struct strset_reply_data),
+ .request_policy = strset_get_policy,
+ .allow_nodev_do = true,
+
+ .parse_request = strset_parse_request,
+ .prepare_data = strset_prepare_data,
+ .reply_size = strset_reply_size,
+ .fill_reply = strset_fill_reply,
+ .cleanup_data = strset_cleanup_data,
+};
diff --git a/net/hsr/hsr_debugfs.c b/net/hsr/hsr_debugfs.c
index 94447974a3c0..d5f709b940ff 100644
--- a/net/hsr/hsr_debugfs.c
+++ b/net/hsr/hsr_debugfs.c
@@ -20,6 +20,8 @@
#include "hsr_main.h"
#include "hsr_framereg.h"
+static struct dentry *hsr_debugfs_root_dir;
+
static void print_mac_address(struct seq_file *sfp, unsigned char *mac)
{
seq_printf(sfp, "%02x:%02x:%02x:%02x:%02x:%02x:",
@@ -63,8 +65,20 @@ hsr_node_table_open(struct inode *inode, struct file *filp)
return single_open(filp, hsr_node_table_show, inode->i_private);
}
+void hsr_debugfs_rename(struct net_device *dev)
+{
+ struct hsr_priv *priv = netdev_priv(dev);
+ struct dentry *d;
+
+ d = debugfs_rename(hsr_debugfs_root_dir, priv->node_tbl_root,
+ hsr_debugfs_root_dir, dev->name);
+ if (IS_ERR(d))
+ netdev_warn(dev, "failed to rename\n");
+ else
+ priv->node_tbl_root = d;
+}
+
static const struct file_operations hsr_fops = {
- .owner = THIS_MODULE,
.open = hsr_node_table_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -78,15 +92,14 @@ static const struct file_operations hsr_fops = {
* When debugfs is configured this routine sets up the node_table file per
* hsr device for dumping the node_table entries
*/
-int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
+void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
{
- int rc = -1;
struct dentry *de = NULL;
- de = debugfs_create_dir(hsr_dev->name, NULL);
- if (!de) {
- pr_err("Cannot create hsr debugfs root\n");
- return rc;
+ de = debugfs_create_dir(hsr_dev->name, hsr_debugfs_root_dir);
+ if (IS_ERR(de)) {
+ pr_err("Cannot create hsr debugfs directory\n");
+ return;
}
priv->node_tbl_root = de;
@@ -94,13 +107,13 @@ int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev)
de = debugfs_create_file("node_table", S_IFREG | 0444,
priv->node_tbl_root, priv,
&hsr_fops);
- if (!de) {
- pr_err("Cannot create hsr node_table directory\n");
- return rc;
+ if (IS_ERR(de)) {
+ pr_err("Cannot create hsr node_table file\n");
+ debugfs_remove(priv->node_tbl_root);
+ priv->node_tbl_root = NULL;
+ return;
}
priv->node_tbl_file = de;
-
- return 0;
}
/* hsr_debugfs_term - Tear down debugfs intrastructure
@@ -117,3 +130,18 @@ hsr_debugfs_term(struct hsr_priv *priv)
debugfs_remove(priv->node_tbl_root);
priv->node_tbl_root = NULL;
}
+
+void hsr_debugfs_create_root(void)
+{
+ hsr_debugfs_root_dir = debugfs_create_dir("hsr", NULL);
+ if (IS_ERR(hsr_debugfs_root_dir)) {
+ pr_err("Cannot create hsr debugfs root directory\n");
+ hsr_debugfs_root_dir = NULL;
+ }
+}
+
+void hsr_debugfs_remove_root(void)
+{
+ /* debugfs_remove() internally checks NULL and ERROR */
+ debugfs_remove(hsr_debugfs_root_dir);
+}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index b01e1bae4ddc..c7bd6c49fadf 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -272,6 +272,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
skb->dev->dev_addr, skb->len) <= 0)
goto out;
skb_reset_mac_header(skb);
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
if (hsr_ver > 0) {
hsr_tag = skb_put(skb, sizeof(struct hsr_tag));
@@ -368,7 +370,7 @@ static void hsr_dev_destroy(struct net_device *hsr_dev)
del_timer_sync(&hsr->prune_timer);
del_timer_sync(&hsr->announce_timer);
- hsr_del_self_node(&hsr->self_node_db);
+ hsr_del_self_node(hsr);
hsr_del_nodes(&hsr->node_db);
}
@@ -440,11 +442,12 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
INIT_LIST_HEAD(&hsr->ports);
INIT_LIST_HEAD(&hsr->node_db);
INIT_LIST_HEAD(&hsr->self_node_db);
+ spin_lock_init(&hsr->list_lock);
ether_addr_copy(hsr_dev->dev_addr, slave[0]->dev_addr);
/* Make sure we recognize frames from ourselves in hsr_rcv() */
- res = hsr_create_self_node(&hsr->self_node_db, hsr_dev->dev_addr,
+ res = hsr_create_self_node(hsr, hsr_dev->dev_addr,
slave[1]->dev_addr);
if (res < 0)
return res;
@@ -477,31 +480,32 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
res = hsr_add_port(hsr, hsr_dev, HSR_PT_MASTER);
if (res)
- goto err_add_port;
+ goto err_add_master;
res = register_netdevice(hsr_dev);
if (res)
- goto fail;
+ goto err_unregister;
res = hsr_add_port(hsr, slave[0], HSR_PT_SLAVE_A);
if (res)
- goto fail;
+ goto err_add_slaves;
+
res = hsr_add_port(hsr, slave[1], HSR_PT_SLAVE_B);
if (res)
- goto fail;
+ goto err_add_slaves;
+ hsr_debugfs_init(hsr, hsr_dev);
mod_timer(&hsr->prune_timer, jiffies + msecs_to_jiffies(PRUNE_PERIOD));
- res = hsr_debugfs_init(hsr, hsr_dev);
- if (res)
- goto fail;
return 0;
-fail:
+err_add_slaves:
+ unregister_netdevice(hsr_dev);
+err_unregister:
list_for_each_entry_safe(port, tmp, &hsr->ports, port_list)
hsr_del_port(port);
-err_add_port:
- hsr_del_self_node(&hsr->self_node_db);
+err_add_master:
+ hsr_del_self_node(hsr);
return res;
}
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 292be446007b..27dc65d7de67 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -75,10 +75,11 @@ static struct hsr_node *find_node_by_addr_A(struct list_head *node_db,
/* Helper for device init; the self_node_db is used in hsr_rcv() to recognize
* frames from self that's been looped over the HSR ring.
*/
-int hsr_create_self_node(struct list_head *self_node_db,
+int hsr_create_self_node(struct hsr_priv *hsr,
unsigned char addr_a[ETH_ALEN],
unsigned char addr_b[ETH_ALEN])
{
+ struct list_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node, *oldnode;
node = kmalloc(sizeof(*node), GFP_KERNEL);
@@ -88,33 +89,33 @@ int hsr_create_self_node(struct list_head *self_node_db,
ether_addr_copy(node->macaddress_A, addr_a);
ether_addr_copy(node->macaddress_B, addr_b);
- rcu_read_lock();
+ spin_lock_bh(&hsr->list_lock);
oldnode = list_first_or_null_rcu(self_node_db,
struct hsr_node, mac_list);
if (oldnode) {
list_replace_rcu(&oldnode->mac_list, &node->mac_list);
- rcu_read_unlock();
- synchronize_rcu();
- kfree(oldnode);
+ spin_unlock_bh(&hsr->list_lock);
+ kfree_rcu(oldnode, rcu_head);
} else {
- rcu_read_unlock();
list_add_tail_rcu(&node->mac_list, self_node_db);
+ spin_unlock_bh(&hsr->list_lock);
}
return 0;
}
-void hsr_del_self_node(struct list_head *self_node_db)
+void hsr_del_self_node(struct hsr_priv *hsr)
{
+ struct list_head *self_node_db = &hsr->self_node_db;
struct hsr_node *node;
- rcu_read_lock();
+ spin_lock_bh(&hsr->list_lock);
node = list_first_or_null_rcu(self_node_db, struct hsr_node, mac_list);
- rcu_read_unlock();
if (node) {
list_del_rcu(&node->mac_list);
- kfree(node);
+ kfree_rcu(node, rcu_head);
}
+ spin_unlock_bh(&hsr->list_lock);
}
void hsr_del_nodes(struct list_head *node_db)
@@ -130,30 +131,43 @@ void hsr_del_nodes(struct list_head *node_db)
* seq_out is used to initialize filtering of outgoing duplicate frames
* originating from the newly added node.
*/
-struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
- u16 seq_out)
+static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
+ struct list_head *node_db,
+ unsigned char addr[],
+ u16 seq_out)
{
- struct hsr_node *node;
+ struct hsr_node *new_node, *node;
unsigned long now;
int i;
- node = kzalloc(sizeof(*node), GFP_ATOMIC);
- if (!node)
+ new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
+ if (!new_node)
return NULL;
- ether_addr_copy(node->macaddress_A, addr);
+ ether_addr_copy(new_node->macaddress_A, addr);
/* We are only interested in time diffs here, so use current jiffies
* as initialization. (0 could trigger an spurious ring error warning).
*/
now = jiffies;
for (i = 0; i < HSR_PT_PORTS; i++)
- node->time_in[i] = now;
+ new_node->time_in[i] = now;
for (i = 0; i < HSR_PT_PORTS; i++)
- node->seq_out[i] = seq_out;
-
- list_add_tail_rcu(&node->mac_list, node_db);
+ new_node->seq_out[i] = seq_out;
+ spin_lock_bh(&hsr->list_lock);
+ list_for_each_entry_rcu(node, node_db, mac_list) {
+ if (ether_addr_equal(node->macaddress_A, addr))
+ goto out;
+ if (ether_addr_equal(node->macaddress_B, addr))
+ goto out;
+ }
+ list_add_tail_rcu(&new_node->mac_list, node_db);
+ spin_unlock_bh(&hsr->list_lock);
+ return new_node;
+out:
+ spin_unlock_bh(&hsr->list_lock);
+ kfree(new_node);
return node;
}
@@ -163,6 +177,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
bool is_sup)
{
struct list_head *node_db = &port->hsr->node_db;
+ struct hsr_priv *hsr = port->hsr;
struct hsr_node *node;
struct ethhdr *ethhdr;
u16 seq_out;
@@ -196,7 +211,7 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
seq_out = HSR_SEQNR_START;
}
- return hsr_add_node(node_db, ethhdr->h_source, seq_out);
+ return hsr_add_node(hsr, node_db, ethhdr->h_source, seq_out);
}
/* Use the Supervision frame's info about an eventual macaddress_B for merging
@@ -206,10 +221,11 @@ struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
struct hsr_port *port_rcv)
{
- struct ethhdr *ethhdr;
- struct hsr_node *node_real;
+ struct hsr_priv *hsr = port_rcv->hsr;
struct hsr_sup_payload *hsr_sp;
+ struct hsr_node *node_real;
struct list_head *node_db;
+ struct ethhdr *ethhdr;
int i;
ethhdr = (struct ethhdr *)skb_mac_header(skb);
@@ -231,7 +247,7 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
node_real = find_node_by_addr_A(node_db, hsr_sp->macaddress_A);
if (!node_real)
/* No frame received from AddrA of this node yet */
- node_real = hsr_add_node(node_db, hsr_sp->macaddress_A,
+ node_real = hsr_add_node(hsr, node_db, hsr_sp->macaddress_A,
HSR_SEQNR_START - 1);
if (!node_real)
goto done; /* No mem */
@@ -252,7 +268,9 @@ void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
}
node_real->addr_B_port = port_rcv->type;
+ spin_lock_bh(&hsr->list_lock);
list_del_rcu(&node_curr->mac_list);
+ spin_unlock_bh(&hsr->list_lock);
kfree_rcu(node_curr, rcu_head);
done:
@@ -368,12 +386,13 @@ void hsr_prune_nodes(struct timer_list *t)
{
struct hsr_priv *hsr = from_timer(hsr, t, prune_timer);
struct hsr_node *node;
+ struct hsr_node *tmp;
struct hsr_port *port;
unsigned long timestamp;
unsigned long time_a, time_b;
- rcu_read_lock();
- list_for_each_entry_rcu(node, &hsr->node_db, mac_list) {
+ spin_lock_bh(&hsr->list_lock);
+ list_for_each_entry_safe(node, tmp, &hsr->node_db, mac_list) {
/* Don't prune own node. Neither time_in[HSR_PT_SLAVE_A]
* nor time_in[HSR_PT_SLAVE_B], will ever be updated for
* the master port. Thus the master node will be repeatedly
@@ -421,7 +440,7 @@ void hsr_prune_nodes(struct timer_list *t)
kfree_rcu(node, rcu_head);
}
}
- rcu_read_unlock();
+ spin_unlock_bh(&hsr->list_lock);
/* Restart timer */
mod_timer(&hsr->prune_timer,
diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h
index 89a3ce38151d..0f0fa12b4329 100644
--- a/net/hsr/hsr_framereg.h
+++ b/net/hsr/hsr_framereg.h
@@ -12,10 +12,8 @@
struct hsr_node;
-void hsr_del_self_node(struct list_head *self_node_db);
+void hsr_del_self_node(struct hsr_priv *hsr);
void hsr_del_nodes(struct list_head *node_db);
-struct hsr_node *hsr_add_node(struct list_head *node_db, unsigned char addr[],
- u16 seq_out);
struct hsr_node *hsr_get_node(struct hsr_port *port, struct sk_buff *skb,
bool is_sup);
void hsr_handle_sup_frame(struct sk_buff *skb, struct hsr_node *node_curr,
@@ -33,7 +31,7 @@ int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
void hsr_prune_nodes(struct timer_list *t);
-int hsr_create_self_node(struct list_head *self_node_db,
+int hsr_create_self_node(struct hsr_priv *hsr,
unsigned char addr_a[ETH_ALEN],
unsigned char addr_b[ETH_ALEN]);
diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index b9988a662ee1..9e389accbfc7 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -45,6 +45,10 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
case NETDEV_CHANGE: /* Link (carrier) state changes */
hsr_check_carrier_and_operstate(hsr);
break;
+ case NETDEV_CHANGENAME:
+ if (is_hsr_master(dev))
+ hsr_debugfs_rename(dev);
+ break;
case NETDEV_CHANGEADDR:
if (port->type == HSR_PT_MASTER) {
/* This should not happen since there's no
@@ -64,7 +68,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
/* Make sure we recognize frames from ourselves in hsr_rcv() */
port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B);
- res = hsr_create_self_node(&hsr->self_node_db,
+ res = hsr_create_self_node(hsr,
master->dev->dev_addr,
port ?
port->dev->dev_addr :
@@ -123,6 +127,7 @@ static void __exit hsr_exit(void)
{
unregister_netdevice_notifier(&hsr_nb);
hsr_netlink_exit();
+ hsr_debugfs_remove_root();
}
module_init(hsr_init);
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 96fac696a1e1..754d84b217f0 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -160,8 +160,9 @@ struct hsr_priv {
int announce_count;
u16 sequence_nr;
u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
- u8 prot_version; /* Indicate if HSRv0 or HSRv1. */
- spinlock_t seqnr_lock; /* locking for sequence_nr */
+ u8 prot_version; /* Indicate if HSRv0 or HSRv1. */
+ spinlock_t seqnr_lock; /* locking for sequence_nr */
+ spinlock_t list_lock; /* locking for node list */
unsigned char sup_multicast_addr[ETH_ALEN];
#ifdef CONFIG_DEBUG_FS
struct dentry *node_tbl_root;
@@ -184,17 +185,24 @@ static inline u16 hsr_get_skb_sequence_nr(struct sk_buff *skb)
}
#if IS_ENABLED(CONFIG_DEBUG_FS)
-int hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
+void hsr_debugfs_rename(struct net_device *dev);
+void hsr_debugfs_init(struct hsr_priv *priv, struct net_device *hsr_dev);
void hsr_debugfs_term(struct hsr_priv *priv);
+void hsr_debugfs_create_root(void);
+void hsr_debugfs_remove_root(void);
#else
-static inline int hsr_debugfs_init(struct hsr_priv *priv,
- struct net_device *hsr_dev)
+static inline void hsr_debugfs_rename(struct net_device *dev)
{
- return 0;
}
-
+static inline void hsr_debugfs_init(struct hsr_priv *priv,
+ struct net_device *hsr_dev)
+{}
static inline void hsr_debugfs_term(struct hsr_priv *priv)
{}
+static inline void hsr_debugfs_create_root(void)
+{}
+static inline void hsr_debugfs_remove_root(void)
+{}
#endif
#endif /* __HSR_PRIVATE_H */
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index 8f8337f893ba..8dc0547f01d0 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -476,6 +476,7 @@ int __init hsr_netlink_init(void)
if (rc)
goto fail_genl_register_family;
+ hsr_debugfs_create_root();
return 0;
fail_genl_register_family:
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b9df9c09b84e..b92a42433a7d 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -980,9 +980,12 @@ static struct key_vector *fib_find_node(struct trie *t,
/* Return the first fib alias matching TOS with
* priority less than or equal to PRIO.
+ * If 'find_first' is set, return the first matching
+ * fib alias, regardless of TOS and priority.
*/
static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
- u8 tos, u32 prio, u32 tb_id)
+ u8 tos, u32 prio, u32 tb_id,
+ bool find_first)
{
struct fib_alias *fa;
@@ -998,6 +1001,8 @@ static struct fib_alias *fib_find_alias(struct hlist_head *fah, u8 slen,
continue;
if (fa->tb_id != tb_id)
break;
+ if (find_first)
+ return fa;
if (fa->fa_tos > tos)
continue;
if (fa->fa_info->fib_priority >= prio || fa->fa_tos < tos)
@@ -1063,9 +1068,6 @@ noleaf:
return -ENOMEM;
}
-/* fib notifier for ADD is sent before calling fib_insert_alias with
- * the expectation that the only possible failure ENOMEM
- */
static int fib_insert_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *new,
struct fib_alias *fa, t_key key)
@@ -1118,11 +1120,13 @@ static bool fib_valid_key_len(u32 key, u8 plen, struct netlink_ext_ack *extack)
return true;
}
+static void fib_remove_alias(struct trie *t, struct key_vector *tp,
+ struct key_vector *l, struct fib_alias *old);
+
/* Caller must hold RTNL. */
int fib_table_insert(struct net *net, struct fib_table *tb,
struct fib_config *cfg, struct netlink_ext_ack *extack)
{
- enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
struct trie *t = (struct trie *)tb->tb_data;
struct fib_alias *fa, *new_fa;
struct key_vector *l, *tp;
@@ -1149,7 +1153,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
l = fib_find_node(t, &tp, key);
fa = l ? fib_find_alias(&l->leaf, slen, tos, fi->fib_priority,
- tb->tb_id) : NULL;
+ tb->tb_id, false) : NULL;
/* Now fa, if non-NULL, points to the first fib alias
* with the same keys [prefix,tos,priority], if such key already
@@ -1217,12 +1221,17 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
- err = call_fib_entry_notifiers(net,
- FIB_EVENT_ENTRY_REPLACE,
- key, plen, new_fa,
- extack);
- if (err)
- goto out_free_new_fa;
+ if (fib_find_alias(&l->leaf, fa->fa_slen, 0, 0,
+ tb->tb_id, true) == fa) {
+ enum fib_event_type fib_event;
+
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ err = call_fib_entry_notifiers(net, fib_event,
+ key, plen,
+ new_fa, extack);
+ if (err)
+ goto out_free_new_fa;
+ }
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1244,12 +1253,10 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
if (fa_match)
goto out;
- if (cfg->fc_nlflags & NLM_F_APPEND) {
- event = FIB_EVENT_ENTRY_APPEND;
+ if (cfg->fc_nlflags & NLM_F_APPEND)
nlflags |= NLM_F_APPEND;
- } else {
+ else
fa = fa_first;
- }
}
err = -ENOENT;
if (!(cfg->fc_nlflags & NLM_F_CREATE))
@@ -1269,14 +1276,26 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
- err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
- if (err)
- goto out_free_new_fa;
-
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_fib_notif;
+ goto out_free_new_fa;
+
+ /* The alias was already inserted, so the node must exist. */
+ l = l ? l : fib_find_node(t, &tp, key);
+ if (WARN_ON_ONCE(!l))
+ goto out_free_new_fa;
+
+ if (fib_find_alias(&l->leaf, new_fa->fa_slen, 0, 0, tb->tb_id, true) ==
+ new_fa) {
+ enum fib_event_type fib_event;
+
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ err = call_fib_entry_notifiers(net, fib_event, key, plen,
+ new_fa, extack);
+ if (err)
+ goto out_remove_new_fa;
+ }
if (!plen)
tb->tb_num_default++;
@@ -1287,14 +1306,8 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
succeeded:
return 0;
-out_fib_notif:
- /* notifier was sent that entry would be added to trie, but
- * the add failed and need to recover. Only failure for
- * fib_insert_alias is ENOMEM.
- */
- NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
- plen, new_fa, NULL);
+out_remove_new_fa:
+ fib_remove_alias(t, tp, l, new_fa);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
@@ -1545,6 +1558,36 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp,
node_pull_suffix(tp, fa->fa_slen);
}
+static void fib_notify_alias_delete(struct net *net, u32 key,
+ struct hlist_head *fah,
+ struct fib_alias *fa_to_delete,
+ struct netlink_ext_ack *extack)
+{
+ struct fib_alias *fa_next, *fa_to_notify;
+ u32 tb_id = fa_to_delete->tb_id;
+ u8 slen = fa_to_delete->fa_slen;
+ enum fib_event_type fib_event;
+
+ /* Do not notify if we do not care about the route. */
+ if (fib_find_alias(fah, slen, 0, 0, tb_id, true) != fa_to_delete)
+ return;
+
+ /* Determine if the route should be replaced by the next route in the
+ * list.
+ */
+ fa_next = hlist_entry_safe(fa_to_delete->fa_list.next,
+ struct fib_alias, fa_list);
+ if (fa_next && fa_next->fa_slen == slen && fa_next->tb_id == tb_id) {
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+ fa_to_notify = fa_next;
+ } else {
+ fib_event = FIB_EVENT_ENTRY_DEL;
+ fa_to_notify = fa_to_delete;
+ }
+ call_fib_entry_notifiers(net, fib_event, key, KEYLENGTH - slen,
+ fa_to_notify, extack);
+}
+
/* Caller must hold RTNL. */
int fib_table_delete(struct net *net, struct fib_table *tb,
struct fib_config *cfg, struct netlink_ext_ack *extack)
@@ -1566,7 +1609,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!l)
return -ESRCH;
- fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id);
+ fa = fib_find_alias(&l->leaf, slen, tos, 0, tb->tb_id, false);
if (!fa)
return -ESRCH;
@@ -1598,8 +1641,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb,
if (!fa_to_delete)
return -ESRCH;
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen,
- fa_to_delete, extack);
+ fib_notify_alias_delete(net, key, &l->leaf, fa_to_delete, extack);
rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id,
&cfg->fc_nlinfo, 0);
@@ -1923,10 +1965,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
continue;
}
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
- n->key,
- KEYLENGTH - fa->fa_slen, fa,
- NULL);
+ fib_notify_alias_delete(net, n->key, &n->leaf, fa,
+ NULL);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
@@ -2022,6 +2062,7 @@ static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
struct netlink_ext_ack *extack)
{
struct fib_alias *fa;
+ int last_slen = -1;
int err;
hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
@@ -2036,8 +2077,12 @@ static int fib_leaf_notify(struct key_vector *l, struct fib_table *tb,
if (tb->tb_id != fa->tb_id)
continue;
- err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_ADD, l->key,
- KEYLENGTH - fa->fa_slen,
+ if (fa->fa_slen == last_slen)
+ continue;
+
+ last_slen = fa->fa_slen;
+ err = call_fib_entry_notifier(nb, FIB_EVENT_ENTRY_REPLACE,
+ l->key, KEYLENGTH - fa->fa_slen,
fa, extack);
if (err)
return err;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 4de7e962d3da..2e6d1b7a7bc9 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -174,7 +174,7 @@ static struct sk_buff *gre_gro_receive(struct list_head *head,
if (skb_gro_checksum_simple_validate(skb))
goto out_unlock;
- skb_gro_checksum_try_convert(skb, IPPROTO_GRE, 0,
+ skb_gro_checksum_try_convert(skb, IPPROTO_GRE,
null_compute_pseudo);
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e4c6e8b40490..6a691fd04398 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -770,6 +770,18 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
}
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
+static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
+ const gfp_t priority)
+{
+ struct inet_connection_sock *icsk = inet_csk(newsk);
+
+ if (!icsk->icsk_ulp_ops)
+ return;
+
+ if (icsk->icsk_ulp_ops->clone)
+ icsk->icsk_ulp_ops->clone(req, newsk, priority);
+}
+
/**
* inet_csk_clone_lock - clone an inet socket, and lock its clone
* @sk: the socket to clone
@@ -810,6 +822,8 @@ struct sock *inet_csk_clone_lock(const struct sock *sk,
/* Deinitialize accept_queue to trap illegal accesses. */
memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue));
+ inet_clone_ulp(req, newsk, priority);
+
security_inet_csk_clone(newsk, req);
}
return newsk;
@@ -1086,7 +1100,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu)
if (!dst)
goto out;
}
- dst->ops->update_pmtu(dst, sk, NULL, mtu);
+ dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
dst = __sk_dst_check(sk, 0);
if (!dst)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index af154977904c..f11e997e517b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -911,11 +911,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
for (i = s_i; i < INET_LHTABLE_SIZE; i++) {
struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
num = 0;
ilb = &hashinfo->listening_hash[i];
spin_lock(&ilb->lock);
- sk_for_each(sk, &ilb->head) {
+ sk_nulls_for_each(sk, node, &ilb->nulls_head) {
struct inet_sock *inet = inet_sk(sk);
if (!net_eq(sock_net(sk), net))
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 83fb00153018..2bbaaf0c7176 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -516,10 +516,11 @@ static int inet_reuseport_add_sock(struct sock *sk,
struct inet_listen_hashbucket *ilb)
{
struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash;
+ const struct hlist_nulls_node *node;
struct sock *sk2;
kuid_t uid = sock_i_uid(sk);
- sk_for_each_rcu(sk2, &ilb->head) {
+ sk_nulls_for_each_rcu(sk2, node, &ilb->nulls_head) {
if (sk2 != sk &&
sk2->sk_family == sk->sk_family &&
ipv6_only_sock(sk2) == ipv6_only_sock(sk) &&
@@ -555,9 +556,9 @@ int __inet_hash(struct sock *sk, struct sock *osk)
}
if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
sk->sk_family == AF_INET6)
- hlist_add_tail_rcu(&sk->sk_node, &ilb->head);
+ __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
else
- hlist_add_head_rcu(&sk->sk_node, &ilb->head);
+ __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
inet_hash2(hashinfo, sk);
ilb->count++;
sock_set_flag(sk, SOCK_RCU_FREE);
@@ -606,11 +607,9 @@ void inet_unhash(struct sock *sk)
reuseport_detach_sock(sk);
if (ilb) {
inet_unhash2(hashinfo, sk);
- __sk_del_node_init(sk);
- ilb->count--;
- } else {
- __sk_nulls_del_node_init_rcu(sk);
+ ilb->count--;
}
+ __sk_nulls_del_node_init_rcu(sk);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
unlock:
spin_unlock_bh(lock);
@@ -750,7 +749,8 @@ void inet_hashinfo_init(struct inet_hashinfo *h)
for (i = 0; i < INET_LHTABLE_SIZE; i++) {
spin_lock_init(&h->listening_hash[i].lock);
- INIT_HLIST_HEAD(&h->listening_hash[i].head);
+ INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].nulls_head,
+ i + LISTENING_NULLS_BASE);
h->listening_hash[i].count = 0;
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 572b6307a2df..8274f98c511c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1464,8 +1464,8 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 38c02bb62e2c..0fe2a5d3e258 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -505,7 +505,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
if (skb_valid_dst(skb))
- skb_dst_update_pmtu(skb, mtu);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
if (!skb_is_gso(skb) &&
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index cfb025606793..e90b600c7a25 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -214,7 +214,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
mtu = dst_mtu(dst);
if (skb->len > mtu) {
- skb_dst_update_pmtu(skb, mtu);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
@@ -580,8 +580,8 @@ static const struct nla_policy vti_policy[IFLA_VTI_MAX + 1] = {
[IFLA_VTI_LINK] = { .type = NLA_U32 },
[IFLA_VTI_IKEY] = { .type = NLA_U32 },
[IFLA_VTI_OKEY] = { .type = NLA_U32 },
- [IFLA_VTI_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
- [IFLA_VTI_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_VTI_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
+ [IFLA_VTI_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_VTI_FWMARK] = { .type = NLA_U32 },
};
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 214154b47d56..069f72edb264 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -384,10 +384,11 @@ next: ;
return 1;
}
-static inline int check_target(struct arpt_entry *e, const char *name)
+static int check_target(struct arpt_entry *e, struct net *net, const char *name)
{
struct xt_entry_target *t = arpt_get_target(e);
struct xt_tgchk_param par = {
+ .net = net,
.table = name,
.entryinfo = e,
.target = t->u.kernel.target,
@@ -399,8 +400,9 @@ static inline int check_target(struct arpt_entry *e, const char *name)
return xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
}
-static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
+static int
+find_check_entry(struct arpt_entry *e, struct net *net, const char *name,
+ unsigned int size,
struct xt_percpu_counter_alloc_state *alloc_state)
{
struct xt_entry_target *t;
@@ -419,7 +421,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
}
t->u.kernel.target = target;
- ret = check_target(e, name);
+ ret = check_target(e, net, name);
if (ret)
goto err;
return 0;
@@ -512,7 +514,9 @@ static inline void cleanup_entry(struct arpt_entry *e)
/* Checks and translates the user-supplied table segment (held in
* newinfo).
*/
-static int translate_table(struct xt_table_info *newinfo, void *entry0,
+static int translate_table(struct net *net,
+ struct xt_table_info *newinfo,
+ void *entry0,
const struct arpt_replace *repl)
{
struct xt_percpu_counter_alloc_state alloc_state = { 0 };
@@ -569,7 +573,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
/* Finally, each sanity check must pass */
i = 0;
xt_entry_foreach(iter, entry0, newinfo->size) {
- ret = find_check_entry(iter, repl->name, repl->size,
+ ret = find_check_entry(iter, net, repl->name, repl->size,
&alloc_state);
if (ret != 0)
break;
@@ -974,7 +978,7 @@ static int do_replace(struct net *net, const void __user *user,
goto free_newinfo;
}
- ret = translate_table(newinfo, loc_cpu_entry, &tmp);
+ ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
@@ -1149,7 +1153,8 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
}
}
-static int translate_compat_table(struct xt_table_info **pinfo,
+static int translate_compat_table(struct net *net,
+ struct xt_table_info **pinfo,
void **pentry0,
const struct compat_arpt_replace *compatr)
{
@@ -1217,7 +1222,7 @@ static int translate_compat_table(struct xt_table_info **pinfo,
repl.num_counters = 0;
repl.counters = NULL;
repl.size = newinfo->size;
- ret = translate_table(newinfo, entry1, &repl);
+ ret = translate_table(net, newinfo, entry1, &repl);
if (ret)
goto free_newinfo;
@@ -1270,7 +1275,7 @@ static int compat_do_replace(struct net *net, void __user *user,
goto free_newinfo;
}
- ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
@@ -1546,7 +1551,7 @@ int arpt_register_table(struct net *net,
loc_cpu_entry = newinfo->entries;
memcpy(loc_cpu_entry, repl->entries, repl->size);
- ret = translate_table(newinfo, loc_cpu_entry, repl);
+ ret = translate_table(net, newinfo, loc_cpu_entry, repl);
if (ret != 0)
goto out_free;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f88c93c38f11..87e979f2b74a 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -139,7 +139,8 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst);
static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
static void ipv4_link_failure(struct sk_buff *skb);
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu);
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh);
static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static void ipv4_dst_destroy(struct dst_entry *dst);
@@ -1043,7 +1044,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
}
static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
struct rtable *rt = (struct rtable *) dst;
struct flowi4 fl4;
@@ -2687,7 +2689,8 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
}
static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
}
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index fcb2cd167f64..9684af02e0a5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1193,6 +1193,15 @@ static struct ctl_table ipv4_net_table[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "tcp_no_ssthresh_metrics_save",
+ .data = &init_net.ipv4.sysctl_tcp_no_ssthresh_metrics_save,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+ {
.procname = "tcp_moderate_rcvbuf",
.data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8a39ee794891..6711a97de3ce 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -443,8 +443,6 @@ void tcp_init_sock(struct sock *sk)
tp->tsoffset = 0;
tp->rack.reo_wnd_steps = 1;
- sk->sk_state = TCP_CLOSE;
-
sk->sk_write_space = sk_stream_write_space;
sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
@@ -692,8 +690,8 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb,
refcount_read(&sk->sk_wmem_alloc) > skb->truesize;
}
-static void tcp_push(struct sock *sk, int flags, int mss_now,
- int nonagle, int size_goal)
+void tcp_push(struct sock *sk, int flags, int mss_now,
+ int nonagle, int size_goal)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
@@ -927,7 +925,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
return max(size_goal, mss_now);
}
-static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
+int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
{
int mss_now;
@@ -1087,8 +1085,7 @@ do_error:
goto out;
out_err:
/* make sure we wake any epoll edge trigger waiter */
- if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
- err == -EAGAIN)) {
+ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
sk->sk_write_space(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
@@ -1419,8 +1416,7 @@ out_err:
sock_zerocopy_put_abort(uarg, true);
err = sk_stream_error(sk, flags, err);
/* make sure we wake any epoll edge trigger waiter */
- if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 &&
- err == -EAGAIN)) {
+ if (unlikely(tcp_rtx_and_write_queues_empty(sk) && err == -EAGAIN)) {
sk->sk_write_space(sk);
tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED);
}
@@ -1780,6 +1776,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
while (length + PAGE_SIZE <= zc->length) {
if (zc->recv_skip_hint < PAGE_SIZE) {
if (skb) {
+ if (zc->recv_skip_hint > 0)
+ break;
skb = skb->next;
offset = seq - TCP_SKB_CB(skb)->seq;
} else {
@@ -3949,7 +3947,7 @@ void __init tcp_init(void)
BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
BUILD_BUG_ON(sizeof(struct tcp_skb_cb) >
- FIELD_SIZEOF(struct sk_buff, cb));
+ sizeof_field(struct sk_buff, cb));
percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 32772d6ded4e..a6545ef0d27b 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -306,7 +306,8 @@ static u32 bbr_tso_segs_goal(struct sock *sk)
/* Sort of tcp_tso_autosize() but ignoring
* driver provided sk_gso_max_size.
*/
- bytes = min_t(unsigned long, sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ bytes = min_t(unsigned long,
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
GSO_MAX_SIZE - 1 - MAX_TCP_HEADER);
segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk));
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 1b3d032a4df2..8f8eefd3a3ce 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -40,8 +40,8 @@
/* Number of delay samples for detecting the increase of delay */
#define HYSTART_MIN_SAMPLES 8
-#define HYSTART_DELAY_MIN (4U<<3)
-#define HYSTART_DELAY_MAX (16U<<3)
+#define HYSTART_DELAY_MIN (4000U) /* 4 ms */
+#define HYSTART_DELAY_MAX (16000U) /* 16 ms */
#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
static int fast_convergence __read_mostly = 1;
@@ -53,7 +53,7 @@ static int tcp_friendliness __read_mostly = 1;
static int hystart __read_mostly = 1;
static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
static int hystart_low_window __read_mostly = 16;
-static int hystart_ack_delta __read_mostly = 2;
+static int hystart_ack_delta_us __read_mostly = 2000;
static u32 cube_rtt_scale __read_mostly;
static u32 beta_scale __read_mostly;
@@ -77,8 +77,8 @@ MODULE_PARM_DESC(hystart_detect, "hybrid slow start detection mechanisms"
" 1: packet-train 2: delay 3: both packet-train and delay");
module_param(hystart_low_window, int, 0644);
MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
-module_param(hystart_ack_delta, int, 0644);
-MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
+module_param(hystart_ack_delta_us, int, 0644);
+MODULE_PARM_DESC(hystart_ack_delta_us, "spacing between ack's indicating train (usecs)");
/* BIC TCP Parameters */
struct bictcp {
@@ -89,7 +89,7 @@ struct bictcp {
u32 bic_origin_point;/* origin point of bic function */
u32 bic_K; /* time to origin point
from the beginning of the current epoch */
- u32 delay_min; /* min delay (msec << 3) */
+ u32 delay_min; /* min delay (usec) */
u32 epoch_start; /* beginning of an epoch */
u32 ack_cnt; /* number of acks */
u32 tcp_cwnd; /* estimated tcp cwnd */
@@ -117,13 +117,9 @@ static inline void bictcp_reset(struct bictcp *ca)
ca->found = 0;
}
-static inline u32 bictcp_clock(void)
+static inline u32 bictcp_clock_us(const struct sock *sk)
{
-#if HZ < 1000
- return ktime_to_ms(ktime_get_real());
-#else
- return jiffies_to_msecs(jiffies);
-#endif
+ return tcp_sk(sk)->tcp_mstamp;
}
static inline void bictcp_hystart_reset(struct sock *sk)
@@ -131,9 +127,9 @@ static inline void bictcp_hystart_reset(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
- ca->round_start = ca->last_ack = bictcp_clock();
+ ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ca->end_seq = tp->snd_nxt;
- ca->curr_rtt = 0;
+ ca->curr_rtt = ~0U;
ca->sample_cnt = 0;
}
@@ -276,7 +272,7 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd, u32 acked)
*/
t = (s32)(tcp_jiffies32 - ca->epoch_start);
- t += msecs_to_jiffies(ca->delay_min >> 3);
+ t += usecs_to_jiffies(ca->delay_min);
/* change the unit from HZ to bictcp_HZ */
t <<= BICTCP_HZ;
do_div(t, HZ);
@@ -376,22 +372,54 @@ static void bictcp_state(struct sock *sk, u8 new_state)
}
}
+/* Account for TSO/GRO delays.
+ * Otherwise short RTT flows could get too small ssthresh, since during
+ * slow start we begin with small TSO packets and ca->delay_min would
+ * not account for long aggregation delay when TSO packets get bigger.
+ * Ideally even with a very small RTT we would like to have at least one
+ * TSO packet being sent and received by GRO, and another one in qdisc layer.
+ * We apply another 100% factor because @rate is doubled at this point.
+ * We cap the cushion to 1ms.
+ */
+static u32 hystart_ack_delay(struct sock *sk)
+{
+ unsigned long rate;
+
+ rate = READ_ONCE(sk->sk_pacing_rate);
+ if (!rate)
+ return 0;
+ return min_t(u64, USEC_PER_MSEC,
+ div64_ul((u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
+}
+
static void hystart_update(struct sock *sk, u32 delay)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
-
- if (ca->found & hystart_detect)
- return;
+ u32 threshold;
if (hystart_detect & HYSTART_ACK_TRAIN) {
- u32 now = bictcp_clock();
+ u32 now = bictcp_clock_us(sk);
/* first detection parameter - ack-train detection */
- if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
+ if ((s32)(now - ca->last_ack) <= hystart_ack_delta_us) {
ca->last_ack = now;
- if ((s32)(now - ca->round_start) > ca->delay_min >> 4) {
- ca->found |= HYSTART_ACK_TRAIN;
+
+ threshold = ca->delay_min + hystart_ack_delay(sk);
+
+ /* Hystart ack train triggers if we get ack past
+ * ca->delay_min/2.
+ * Pacing might have delayed packets up to RTT/2
+ * during slow start.
+ */
+ if (sk->sk_pacing_status == SK_PACING_NONE)
+ threshold >>= 1;
+
+ if ((s32)(now - ca->round_start) > threshold) {
+ ca->found = 1;
+ pr_debug("hystart_ack_train (%u > %u) delay_min %u (+ ack_delay %u) cwnd %u\n",
+ now - ca->round_start, threshold,
+ ca->delay_min, hystart_ack_delay(sk), tp->snd_cwnd);
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTTRAINDETECT);
NET_ADD_STATS(sock_net(sk),
@@ -405,14 +433,14 @@ static void hystart_update(struct sock *sk, u32 delay)
if (hystart_detect & HYSTART_DELAY) {
/* obtain the minimum delay of more than sampling packets */
if (ca->sample_cnt < HYSTART_MIN_SAMPLES) {
- if (ca->curr_rtt == 0 || ca->curr_rtt > delay)
+ if (ca->curr_rtt > delay)
ca->curr_rtt = delay;
ca->sample_cnt++;
} else {
if (ca->curr_rtt > ca->delay_min +
HYSTART_DELAY_THRESH(ca->delay_min >> 3)) {
- ca->found |= HYSTART_DELAY;
+ ca->found = 1;
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPHYSTARTDELAYDETECT);
NET_ADD_STATS(sock_net(sk),
@@ -424,9 +452,6 @@ static void hystart_update(struct sock *sk, u32 delay)
}
}
-/* Track delayed acknowledgment ratio using sliding window
- * ratio = (15*ratio + sample) / 16
- */
static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -441,7 +466,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
if (ca->epoch_start && (s32)(tcp_jiffies32 - ca->epoch_start) < HZ)
return;
- delay = (sample->rtt_us << 3) / USEC_PER_MSEC;
+ delay = sample->rtt_us;
if (delay == 0)
delay = 1;
@@ -450,7 +475,7 @@ static void bictcp_acked(struct sock *sk, const struct ack_sample *sample)
ca->delay_min = delay;
/* hystart triggers when cwnd is larger than some threshold */
- if (hystart && tcp_in_slow_start(tp) &&
+ if (!ca->found && tcp_in_slow_start(tp) && hystart &&
tp->snd_cwnd >= hystart_low_window)
hystart_update(sk, delay);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 88b987ca9ebb..2914fdf1d543 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1422,7 +1422,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED)
goto fallback;
- if (!tcp_skb_can_collapse_to(prev))
+ if (!tcp_skb_can_collapse(prev, skb))
goto fallback;
in_sack = !after(start_seq, TCP_SKB_CB(skb)->seq) &&
@@ -1727,8 +1727,11 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb,
}
/* Ignore very old stuff early */
- if (!after(sp[used_sacks].end_seq, prior_snd_una))
+ if (!after(sp[used_sacks].end_seq, prior_snd_una)) {
+ if (i == 0)
+ first_sack_index = -1;
continue;
+ }
used_sacks++;
}
@@ -3550,7 +3553,7 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit)
if (rexmit == REXMIT_NONE || sk->sk_state == TCP_SYN_SENT)
return;
- if (unlikely(rexmit == 2)) {
+ if (unlikely(rexmit == REXMIT_NEW)) {
__tcp_push_pending_frames(sk, tcp_current_mss(sk),
TCP_NAGLE_OFF);
if (after(tp->snd_nxt, tp->high_seq))
@@ -4420,6 +4423,9 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->seq != TCP_SKB_CB(to)->end_seq)
return false;
+ if (!mptcp_skb_can_collapse(to, from))
+ return false;
+
#ifdef CONFIG_TLS_DEVICE
if (from->decrypted != to->decrypted)
return false;
@@ -4929,7 +4935,7 @@ restart:
/* The first skb to collapse is:
* - not SYN/FIN and
* - bloated or contains data before "start" or
- * overlaps to the next one.
+ * overlaps to the next one and mptcp allow collapsing.
*/
if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) &&
(tcp_win_from_space(sk, skb->truesize) > skb->len ||
@@ -4938,7 +4944,7 @@ restart:
break;
}
- if (n && n != tail &&
+ if (n && n != tail && mptcp_skb_can_collapse(skb, n) &&
TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) {
end_of_skbs = false;
break;
@@ -4971,6 +4977,7 @@ restart:
else
__skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */
skb_set_owner_r(nskb, sk);
+ mptcp_skb_ext_move(nskb, skb);
/* Copy data, releasing collapsed skbs. */
while (copy > 0) {
@@ -4990,6 +4997,7 @@ restart:
skb = tcp_collapse_one(sk, skb, list, root);
if (!skb ||
skb == tail ||
+ !mptcp_skb_can_collapse(nskb, skb) ||
(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)))
goto end;
#ifdef CONFIG_TLS_DEVICE
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 92282f98dc82..fedb537839ec 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -701,9 +701,21 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr, AF_INET);
+ const union tcp_md5_addr *addr;
+ int l3index;
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and inet_iif is set to it.
+ */
+ l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+ key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
} else if (hash_location) {
+ const union tcp_md5_addr *addr;
+ int sdif = tcp_v4_sdif(skb);
+ int dif = inet_iif(skb);
+ int l3index;
+
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -714,14 +726,17 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
ip_hdr(skb)->saddr,
th->source, ip_hdr(skb)->daddr,
- ntohs(th->source), inet_iif(skb),
- tcp_v4_sdif(skb));
+ ntohs(th->source), dif, sdif);
/* don't send rst if it can't find key */
if (!sk1)
goto out;
- key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
- &ip_hdr(skb)->saddr, AF_INET);
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to it.
+ */
+ l3index = sdif ? dif : 0;
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+ key = tcp_md5_do_lookup(sk1, l3index, addr, AF_INET);
if (!key)
goto out;
@@ -905,6 +920,9 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
+ const union tcp_md5_addr *addr;
+ int l3index;
+
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
@@ -916,14 +934,15 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
* exception of <SYN> segments, MUST be right-shifted by
* Rcv.Wind.Shift bits:
*/
+ addr = (union tcp_md5_addr *)&ip_hdr(skb)->saddr;
+ l3index = tcp_v4_sdif(skb) ? inet_iif(skb) : 0;
tcp_v4_send_ack(sk, skb, seq,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent,
0,
- tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->saddr,
- AF_INET),
+ tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
ip_hdr(skb)->tos);
}
@@ -983,7 +1002,7 @@ DEFINE_STATIC_KEY_FALSE(tcp_md5_needed);
EXPORT_SYMBOL(tcp_md5_needed);
/* Find the Key structure for an address. */
-struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
+struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
int family)
{
@@ -1003,7 +1022,8 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk,
hlist_for_each_entry_rcu(key, &md5sig->head, node) {
if (key->family != family)
continue;
-
+ if (key->l3index && key->l3index != l3index)
+ continue;
if (family == AF_INET) {
mask = inet_make_mask(key->prefixlen);
match = (key->addr.a4.s_addr & mask) ==
@@ -1027,7 +1047,8 @@ EXPORT_SYMBOL(__tcp_md5_do_lookup);
static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
const union tcp_md5_addr *addr,
- int family, u8 prefixlen)
+ int family, u8 prefixlen,
+ int l3index)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *key;
@@ -1046,6 +1067,8 @@ static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk,
hlist_for_each_entry_rcu(key, &md5sig->head, node) {
if (key->family != family)
continue;
+ if (key->l3index && key->l3index != l3index)
+ continue;
if (!memcmp(&key->addr, addr, size) &&
key->prefixlen == prefixlen)
return key;
@@ -1057,23 +1080,26 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
const union tcp_md5_addr *addr;
+ int l3index;
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
+ addr_sk->sk_bound_dev_if);
addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
- return tcp_md5_do_lookup(sk, addr, AF_INET);
+ return tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
}
EXPORT_SYMBOL(tcp_v4_md5_lookup);
/* This can be called on a newly created socket, from other files */
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
- int family, u8 prefixlen, const u8 *newkey, u8 newkeylen,
- gfp_t gfp)
+ int family, u8 prefixlen, int l3index,
+ const u8 *newkey, u8 newkeylen, gfp_t gfp)
{
/* Add Key to the list */
struct tcp_md5sig_key *key;
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_info *md5sig;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
if (key) {
/* Pre-existing entry - just update that one. */
memcpy(key->key, newkey, newkeylen);
@@ -1105,6 +1131,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
key->keylen = newkeylen;
key->family = family;
key->prefixlen = prefixlen;
+ key->l3index = l3index;
memcpy(&key->addr, addr,
(family == AF_INET6) ? sizeof(struct in6_addr) :
sizeof(struct in_addr));
@@ -1114,11 +1141,11 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
EXPORT_SYMBOL(tcp_md5_do_add);
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family,
- u8 prefixlen)
+ u8 prefixlen, int l3index)
{
struct tcp_md5sig_key *key;
- key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen);
+ key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen, l3index);
if (!key)
return -ENOENT;
hlist_del_rcu(&key->node);
@@ -1149,7 +1176,9 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
{
struct tcp_md5sig cmd;
struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
+ const union tcp_md5_addr *addr;
u8 prefixlen = 32;
+ int l3index = 0;
if (optlen < sizeof(cmd))
return -EINVAL;
@@ -1167,16 +1196,34 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, int optname,
return -EINVAL;
}
+ if (optname == TCP_MD5SIG_EXT &&
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
+ if (dev && netif_is_l3_master(dev))
+ l3index = dev->ifindex;
+
+ rcu_read_unlock();
+
+ /* ok to reference set/not set outside of rcu;
+ * right now device MUST be an L3 master
+ */
+ if (!dev || !l3index)
+ return -EINVAL;
+ }
+
+ addr = (union tcp_md5_addr *)&sin->sin_addr.s_addr;
+
if (!cmd.tcpm_keylen)
- return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen);
+ return tcp_md5_do_del(sk, addr, AF_INET, prefixlen, l3index);
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
return -EINVAL;
- return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
- AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen,
- GFP_KERNEL);
+ return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index,
+ cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
@@ -1286,7 +1333,8 @@ EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
/* Called with rcu_read_lock() */
static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
+ const struct sk_buff *skb,
+ int dif, int sdif)
{
#ifdef CONFIG_TCP_MD5SIG
/*
@@ -1301,11 +1349,17 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
struct tcp_md5sig_key *hash_expected;
const struct iphdr *iph = ip_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
- int genhash;
+ const union tcp_md5_addr *addr;
unsigned char newhash[16];
+ int genhash, l3index;
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to the l3mdev
+ */
+ l3index = sdif ? dif : 0;
- hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
- AF_INET);
+ addr = (union tcp_md5_addr *)&iph->saddr;
+ hash_expected = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
hash_location = tcp_parse_md5sig_option(th);
/* We've parsed the options - do we have a hash? */
@@ -1331,11 +1385,11 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
+ net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s L3 index %d\n",
&iph->saddr, ntohs(th->source),
&iph->daddr, ntohs(th->dest),
genhash ? " tcp_v4_calc_md5_hash failed"
- : "");
+ : "", l3index);
return true;
}
return false;
@@ -1372,7 +1426,7 @@ struct request_sock_ops tcp_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
+const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
.mss_clamp = TCP_MSS_DEFAULT,
#ifdef CONFIG_TCP_MD5SIG
.req_md5_lookup = tcp_v4_md5_lookup,
@@ -1419,7 +1473,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
struct tcp_sock *newtp;
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
+ const union tcp_md5_addr *addr;
struct tcp_md5sig_key *key;
+ int l3index;
#endif
struct ip_options_rcu *inet_opt;
@@ -1467,9 +1523,10 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
tcp_initialize_rcv_mss(newsk);
#ifdef CONFIG_TCP_MD5SIG
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
/* Copy over the MD5 key from the original socket */
- key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
- AF_INET);
+ addr = (union tcp_md5_addr *)&newinet->inet_daddr;
+ key = tcp_md5_do_lookup(sk, l3index, addr, AF_INET);
if (key) {
/*
* We're using one, so create a matching key
@@ -1477,8 +1534,8 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
* memory, then we end up not copying the key
* across. Shucks.
*/
- tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
- AF_INET, 32, key->key, key->keylen, GFP_ATOMIC);
+ tcp_md5_do_add(newsk, addr, AF_INET, 32, l3index,
+ key->key, key->keylen, GFP_ATOMIC);
sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
}
#endif
@@ -1808,6 +1865,7 @@ int tcp_v4_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb->dev);
struct sk_buff *skb_to_free;
int sdif = inet_sdif(skb);
+ int dif = inet_iif(skb);
const struct iphdr *iph;
const struct tcphdr *th;
bool refcounted;
@@ -1856,7 +1914,7 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
+ if (unlikely(tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -1914,7 +1972,7 @@ process:
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (tcp_v4_inbound_md5_hash(sk, skb))
+ if (tcp_v4_inbound_md5_hash(sk, skb, dif, sdif))
goto discard_and_relse;
nf_reset_ct(skb);
@@ -2147,13 +2205,14 @@ static void *listening_get_next(struct seq_file *seq, void *cur)
struct tcp_iter_state *st = seq->private;
struct net *net = seq_file_net(seq);
struct inet_listen_hashbucket *ilb;
+ struct hlist_nulls_node *node;
struct sock *sk = cur;
if (!sk) {
get_head:
ilb = &tcp_hashinfo.listening_hash[st->bucket];
spin_lock(&ilb->lock);
- sk = sk_head(&ilb->head);
+ sk = sk_nulls_head(&ilb->nulls_head);
st->offset = 0;
goto get_sk;
}
@@ -2161,9 +2220,9 @@ get_head:
++st->num;
++st->offset;
- sk = sk_next(sk);
+ sk = sk_nulls_next(sk);
get_sk:
- sk_for_each_from(sk) {
+ sk_nulls_for_each_from(sk, node) {
if (!net_eq(sock_net(sk), net))
continue;
if (sk->sk_family == afinfo->family)
@@ -2674,6 +2733,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
net->ipv4.sysctl_tcp_tw_reuse = 2;
+ net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1;
cnt = tcp_hashinfo.ehash_mask + 1;
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = cnt / 2;
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index c4848e7a0aad..279db8822439 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -385,7 +385,8 @@ void tcp_update_metrics(struct sock *sk)
if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tp->snd_cwnd >> 1) > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
@@ -400,7 +401,8 @@ void tcp_update_metrics(struct sock *sk)
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tp->snd_cwnd >> 1, tp->snd_ssthresh));
if (!tcp_metric_locked(tm, TCP_METRIC_CWND)) {
@@ -416,7 +418,8 @@ void tcp_update_metrics(struct sock *sk)
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
- if (!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+ if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
@@ -441,6 +444,7 @@ void tcp_init_metrics(struct sock *sk)
{
struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_sock *tp = tcp_sk(sk);
+ struct net *net = sock_net(sk);
struct tcp_metrics_block *tm;
u32 val, crtt = 0; /* cached RTT scaled by 8 */
@@ -458,7 +462,8 @@ void tcp_init_metrics(struct sock *sk)
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
- val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+ val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
+ 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b184f03d7437..05109d0c675b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -72,6 +72,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb)
__skb_unlink(skb, &sk->sk_write_queue);
tcp_rbtree_insert(&sk->tcp_rtx_queue, skb);
+ if (tp->highest_sack == NULL)
+ tp->highest_sack = skb;
+
tp->packets_out += tcp_skb_pcount(skb);
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
@@ -751,13 +754,17 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
const unsigned int remaining = MAX_TCP_OPTION_SPACE - size;
+ if (unlikely(remaining < TCPOLEN_SACK_BASE_ALIGNED +
+ TCPOLEN_SACK_PERBLOCK))
+ return size;
+
opts->num_sack_blocks =
min_t(unsigned int, eff_sacks,
(remaining - TCPOLEN_SACK_BASE_ALIGNED) /
TCPOLEN_SACK_PERBLOCK);
- if (likely(opts->num_sack_blocks))
- size += TCPOLEN_SACK_BASE_ALIGNED +
- opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
+
+ size += TCPOLEN_SACK_BASE_ALIGNED +
+ opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK;
}
return size;
@@ -1725,7 +1732,7 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
u32 bytes, segs;
bytes = min_t(unsigned long,
- sk->sk_pacing_rate >> sk->sk_pacing_shift,
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift),
sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
/* Goal is to send at least one packet per ms,
@@ -2260,7 +2267,7 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
limit = max_t(unsigned long,
2 * skb->truesize,
- sk->sk_pacing_rate >> sk->sk_pacing_shift);
+ sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
@@ -2438,6 +2445,14 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (tcp_small_queue_check(sk, skb, 0))
break;
+ /* Argh, we hit an empty skb(), presumably a thread
+ * is sleeping in sendmsg()/sk_stream_wait_memory().
+ * We do not want to send a pure-ack packet and have
+ * a strange looking rtx queue with empty packet(s).
+ */
+ if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq)
+ break;
+
if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
break;
@@ -2854,7 +2869,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to,
if (!tcp_can_collapse(sk, skb))
break;
- if (!tcp_skb_can_collapse_to(to))
+ if (!tcp_skb_can_collapse(to, skb))
break;
space -= skb->len;
@@ -3121,7 +3136,7 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
*/
void tcp_send_fin(struct sock *sk)
{
- struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk);
+ struct sk_buff *skb, *tskb, *tail = tcp_write_queue_tail(sk);
struct tcp_sock *tp = tcp_sk(sk);
/* Optimization, tack on the FIN if we have one skb in write queue and
@@ -3129,6 +3144,7 @@ void tcp_send_fin(struct sock *sk)
* Note: in the latter case, FIN packet will be sent after a timeout,
* as TCP stack thinks it has already been transmitted.
*/
+ tskb = tail;
if (!tskb && tcp_under_memory_pressure(sk))
tskb = skb_rb_last(&sk->tcp_rtx_queue);
@@ -3136,7 +3152,7 @@ void tcp_send_fin(struct sock *sk)
TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN;
TCP_SKB_CB(tskb)->end_seq++;
tp->write_seq++;
- if (tcp_write_queue_empty(sk)) {
+ if (!tail) {
/* This means tskb was already sent.
* Pretend we included the FIN on previous transmit.
* We need to set tp->snd_nxt to the value it would have
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4da5758cc718..93a355b6b092 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1475,7 +1475,7 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
* queue contains some other skb
*/
rmem = atomic_add_return(size, &sk->sk_rmem_alloc);
- if (rmem > (size + sk->sk_rcvbuf))
+ if (rmem > (size + (unsigned int)sk->sk_rcvbuf))
goto uncharge_drop;
spin_lock(&list->lock);
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a3908e55ed89..b25e42100ceb 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -480,7 +480,7 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb)
inet_gro_compute_pseudo))
goto flush;
else if (uh->check)
- skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ skb_gro_checksum_try_convert(skb, IPPROTO_UDP,
inet_gro_compute_pseudo);
skip:
NAPI_GRO_CB(skb)->is_ipv6 = 0;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 35b84b52b702..9ebd54752e03 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -100,12 +100,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
}
static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->update_pmtu(path, sk, skb, mtu);
+ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
}
static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 98d82305d6de..39d861d00377 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5231,16 +5231,16 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
return -EINVAL;
}
+ if (!netlink_strict_get_check(skb))
+ return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
+
ifm = nlmsg_data(nlh);
if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
return -EINVAL;
}
- if (!netlink_strict_get_check(skb))
- return nlmsg_parse_deprecated(nlh, sizeof(*ifm), tb, IFA_MAX,
- ifa_ipv6_policy, extack);
-
err = nlmsg_parse_deprecated_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
ifa_ipv6_policy, extack);
if (err)
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index fe9cb8d1adca..e315526fa244 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -146,7 +146,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu)
if (IS_ERR(dst))
return NULL;
- dst->ops->update_pmtu(dst, sk, NULL, mtu);
+ dst->ops->update_pmtu(dst, sk, NULL, mtu, true);
dst = inet6_csk_route_socket(sk, &fl6);
return IS_ERR(dst) ? NULL : dst;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7bae6a91b487..b1e9a10e1133 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -370,6 +370,21 @@ static int call_fib6_entry_notifier(struct notifier_block *nb,
return call_fib6_notifier(nb, event_type, &info.info);
}
+static int call_fib6_multipath_entry_notifier(struct notifier_block *nb,
+ enum fib_event_type event_type,
+ struct fib6_info *rt,
+ unsigned int nsiblings,
+ struct netlink_ext_ack *extack)
+{
+ struct fib6_entry_notifier_info info = {
+ .info.extack = extack,
+ .rt = rt,
+ .nsiblings = nsiblings,
+ };
+
+ return call_fib6_notifier(nb, event_type, &info.info);
+}
+
int call_fib6_entry_notifiers(struct net *net,
enum fib_event_type event_type,
struct fib6_info *rt,
@@ -400,6 +415,17 @@ int call_fib6_multipath_entry_notifiers(struct net *net,
return call_fib6_notifiers(net, event_type, &info.info);
}
+int call_fib6_entry_notifiers_replace(struct net *net, struct fib6_info *rt)
+{
+ struct fib6_entry_notifier_info info = {
+ .rt = rt,
+ .nsiblings = rt->fib6_nsiblings,
+ };
+
+ rt->fib6_table->fib_seq++;
+ return call_fib6_notifiers(net, FIB_EVENT_ENTRY_REPLACE, &info.info);
+}
+
struct fib6_dump_arg {
struct net *net;
struct notifier_block *nb;
@@ -408,22 +434,29 @@ struct fib6_dump_arg {
static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
- if (rt == arg->net->ipv6.fib6_null_entry)
+ enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
+ int err;
+
+ if (!rt || rt == arg->net->ipv6.fib6_null_entry)
return 0;
- return call_fib6_entry_notifier(arg->nb, FIB_EVENT_ENTRY_ADD,
- rt, arg->extack);
+
+ if (rt->fib6_nsiblings)
+ err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
+ rt,
+ rt->fib6_nsiblings,
+ arg->extack);
+ else
+ err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
+ arg->extack);
+
+ return err;
}
static int fib6_node_dump(struct fib6_walker *w)
{
- struct fib6_info *rt;
- int err = 0;
+ int err;
- for_each_fib6_walker_rt(w) {
- err = fib6_rt_dump(rt, w->args);
- if (err)
- break;
- }
+ err = fib6_rt_dump(w->leaf, w->args);
w->leaf = NULL;
return err;
}
@@ -1039,6 +1072,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ bool notify_sibling_rt = false;
u16 nlflags = NLM_F_EXCL;
int err;
@@ -1130,6 +1164,7 @@ next_iter:
/* Find the first route that have the same metric */
sibling = leaf;
+ notify_sibling_rt = true;
while (sibling) {
if (sibling->fib6_metric == rt->fib6_metric &&
rt6_qualify_for_ecmp(sibling)) {
@@ -1139,6 +1174,7 @@ next_iter:
}
sibling = rcu_dereference_protected(sibling->fib6_next,
lockdep_is_held(&rt->fib6_table->tb6_lock));
+ notify_sibling_rt = false;
}
/* For each sibling in the list, increment the counter of
* siblings. BUG() if counters does not match, list of siblings
@@ -1165,10 +1201,21 @@ next_iter:
add:
nlflags |= NLM_F_CREATE;
- if (!info->skip_notify_kernel) {
+ /* The route should only be notified if it is the first
+ * route in the node or if it is added as a sibling
+ * route to the first route in the node.
+ */
+ if (!info->skip_notify_kernel &&
+ (notify_sibling_rt || ins == &fn->leaf)) {
+ enum fib_event_type fib_event;
+
+ if (notify_sibling_rt)
+ fib_event = FIB_EVENT_ENTRY_APPEND;
+ else
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
err = call_fib6_entry_notifiers(info->nl_net,
- FIB_EVENT_ENTRY_ADD,
- rt, extack);
+ fib_event, rt,
+ extack);
if (err) {
struct fib6_info *sibling, *next_sibling;
@@ -1212,7 +1259,7 @@ add:
return -ENOENT;
}
- if (!info->skip_notify_kernel) {
+ if (!info->skip_notify_kernel && ins == &fn->leaf) {
err = call_fib6_entry_notifiers(info->nl_net,
FIB_EVENT_ENTRY_REPLACE,
rt, extack);
@@ -1845,13 +1892,29 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
struct fib6_info __rcu **rtp, struct nl_info *info)
{
+ struct fib6_info *leaf, *replace_rt = NULL;
struct fib6_walker *w;
struct fib6_info *rt = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
struct net *net = info->nl_net;
+ bool notify_del = false;
RT6_TRACE("fib6_del_route\n");
+ /* If the deleted route is the first in the node and it is not part of
+ * a multipath route, then we need to replace it with the next route
+ * in the node, if exists.
+ */
+ leaf = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&table->tb6_lock));
+ if (leaf == rt && !rt->fib6_nsiblings) {
+ if (rcu_access_pointer(rt->fib6_next))
+ replace_rt = rcu_dereference_protected(rt->fib6_next,
+ lockdep_is_held(&table->tb6_lock));
+ else
+ notify_del = true;
+ }
+
/* Unlink it */
*rtp = rt->fib6_next;
rt->fib6_node = NULL;
@@ -1869,6 +1932,14 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
if (rt->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling;
+ /* The route is deleted from a multipath route. If this
+ * multipath route is the first route in the node, then we need
+ * to emit a delete notification. Otherwise, we need to skip
+ * the notification.
+ */
+ if (rt->fib6_metric == leaf->fib6_metric &&
+ rt6_qualify_for_ecmp(leaf))
+ notify_del = true;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings)
sibling->fib6_nsiblings--;
@@ -1904,8 +1975,13 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
fib6_purge_rt(rt, fn, net);
- if (!info->skip_notify_kernel)
- call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL);
+ if (!info->skip_notify_kernel) {
+ if (notify_del)
+ call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL,
+ rt, NULL);
+ else if (replace_rt)
+ call_fib6_entry_notifiers_replace(net, replace_rt);
+ }
if (!info->skip_notify)
inet6_rt_notify(RTM_DELROUTE, rt, info, 0);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 923034c52ce4..ee968d980746 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
- dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
NEXTHDR_GRE);
@@ -2170,8 +2170,8 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct ipv6hdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct ipv6hdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 },
[IFLA_GRE_FLOWINFO] = { .type = NLA_U32 },
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 754a484d35df..2f376dbc37d5 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -640,7 +640,7 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (rel_info > dst_mtu(skb_dst(skb2)))
goto out;
- skb_dst_update_pmtu(skb2, rel_info);
+ skb_dst_update_pmtu_no_confirm(skb2, rel_info);
}
icmp_send(skb2, rel_type, rel_code, htonl(rel_info));
@@ -1132,7 +1132,7 @@ route_lookup:
mtu = max(mtu, skb->protocol == htons(ETH_P_IPV6) ?
IPV6_MIN_MTU : IPV4_MIN_MTU);
- skb_dst_update_pmtu(skb, mtu);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
*pmtu = mtu;
err = -EMSGSIZE;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 024db17386d2..6f08b760c2a7 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -479,7 +479,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
mtu = dst_mtu(dst);
if (skb->len > mtu) {
- skb_dst_update_pmtu(skb, mtu);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IPV6)) {
if (mtu < IPV6_MIN_MTU)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b59940416cb5..0253b702afb7 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -95,7 +95,8 @@ static int ip6_pkt_prohibit(struct sk_buff *skb);
static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu);
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb);
static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
@@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
}
static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
}
@@ -2692,7 +2694,8 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
- const struct ipv6hdr *iph, u32 mtu)
+ const struct ipv6hdr *iph, u32 mtu,
+ bool confirm_neigh)
{
const struct in6_addr *daddr, *saddr;
struct rt6_info *rt6 = (struct rt6_info *)dst;
@@ -2710,7 +2713,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
daddr = NULL;
saddr = NULL;
}
- dst_confirm_neigh(dst, daddr);
+
+ if (confirm_neigh)
+ dst_confirm_neigh(dst, daddr);
+
mtu = max_t(u32, mtu, IPV6_MIN_MTU);
if (mtu >= dst_mtu(dst))
return;
@@ -2764,9 +2770,11 @@ out_unlock:
}
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
- __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu);
+ __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu,
+ confirm_neigh);
}
void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
@@ -2785,7 +2793,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,
dst = ip6_route_output(net, NULL, &fl6);
if (!dst->error)
- __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu));
+ __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true);
dst_release(dst);
}
EXPORT_SYMBOL_GPL(ip6_update_pmtu);
@@ -3749,6 +3757,7 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
if (rt->fib6_nsiblings && cfg->fc_delete_all_nh) {
struct fib6_info *sibling, *next_sibling;
+ struct fib6_node *fn;
/* prefer to send a single notification with all hops */
skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
@@ -3764,12 +3773,32 @@ static int __ip6_del_rt_siblings(struct fib6_info *rt, struct fib6_config *cfg)
info->skip_notify = 1;
}
+ /* 'rt' points to the first sibling route. If it is not the
+ * leaf, then we do not need to send a notification. Otherwise,
+ * we need to check if the last sibling has a next route or not
+ * and emit a replace or delete notification, respectively.
+ */
info->skip_notify_kernel = 1;
- call_fib6_multipath_entry_notifiers(net,
- FIB_EVENT_ENTRY_DEL,
- rt,
- rt->fib6_nsiblings,
- NULL);
+ fn = rcu_dereference_protected(rt->fib6_node,
+ lockdep_is_held(&table->tb6_lock));
+ if (rcu_access_pointer(fn->leaf) == rt) {
+ struct fib6_info *last_sibling, *replace_rt;
+
+ last_sibling = list_last_entry(&rt->fib6_siblings,
+ struct fib6_info,
+ fib6_siblings);
+ replace_rt = rcu_dereference_protected(
+ last_sibling->fib6_next,
+ lockdep_is_held(&table->tb6_lock));
+ if (replace_rt)
+ call_fib6_entry_notifiers_replace(net,
+ replace_rt);
+ else
+ call_fib6_multipath_entry_notifiers(net,
+ FIB_EVENT_ENTRY_DEL,
+ rt, rt->fib6_nsiblings,
+ NULL);
+ }
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings) {
@@ -5017,12 +5046,37 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
}
+static bool ip6_route_mpath_should_notify(const struct fib6_info *rt)
+{
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
+ bool should_notify = false;
+ struct fib6_info *leaf;
+ struct fib6_node *fn;
+
+ rcu_read_lock();
+ fn = rcu_dereference(rt->fib6_node);
+ if (!fn)
+ goto out;
+
+ leaf = rcu_dereference(fn->leaf);
+ if (!leaf)
+ goto out;
+
+ if (rt == leaf ||
+ (rt_can_ecmp && rt->fib6_metric == leaf->fib6_metric &&
+ rt6_qualify_for_ecmp(leaf)))
+ should_notify = true;
+out:
+ rcu_read_unlock();
+
+ return should_notify;
+}
+
static int ip6_route_multipath_add(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
struct fib6_info *rt_notif = NULL, *rt_last = NULL;
struct nl_info *info = &cfg->fc_nlinfo;
- enum fib_event_type event_type;
struct fib6_config r_cfg;
struct rtnexthop *rtnh;
struct fib6_info *rt;
@@ -5147,13 +5201,27 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nhn++;
}
- event_type = replace ? FIB_EVENT_ENTRY_REPLACE : FIB_EVENT_ENTRY_ADD;
- err = call_fib6_multipath_entry_notifiers(info->nl_net, event_type,
- rt_notif, nhn - 1, extack);
- if (err) {
- /* Delete all the siblings that were just added */
- err_nh = NULL;
- goto add_errout;
+ /* An in-kernel notification should only be sent in case the new
+ * multipath route is added as the first route in the node, or if
+ * it was appended to it. We pass 'rt_notif' since it is the first
+ * sibling and might allow us to skip some checks in the replace case.
+ */
+ if (ip6_route_mpath_should_notify(rt_notif)) {
+ enum fib_event_type fib_event;
+
+ if (rt_notif->fib6_nsiblings != nhn - 1)
+ fib_event = FIB_EVENT_ENTRY_APPEND;
+ else
+ fib_event = FIB_EVENT_ENTRY_REPLACE;
+
+ err = call_fib6_multipath_entry_notifiers(info->nl_net,
+ fib_event, rt_notif,
+ nhn - 1, extack);
+ if (err) {
+ /* Delete all the siblings that were just added */
+ err_nh = NULL;
+ goto add_errout;
+ }
}
/* success ... tell user about new route */
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index b2ccbc473127..98954830c40b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -944,7 +944,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
}
if (tunnel->parms.iph.daddr)
- skb_dst_update_pmtu(skb, mtu);
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->len > mtu && !skb_is_gso(skb)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index df5fd9109696..5b5260103b65 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -75,13 +75,14 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped;
-static const struct inet_connection_sock_af_ops ipv6_specific;
+const struct inet_connection_sock_af_ops ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
#else
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr)
+ const struct in6_addr *addr,
+ int l3index)
{
return NULL;
}
@@ -532,15 +533,22 @@ static void tcp_v6_reqsk_destructor(struct request_sock *req)
#ifdef CONFIG_TCP_MD5SIG
static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
- const struct in6_addr *addr)
+ const struct in6_addr *addr,
+ int l3index)
{
- return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
+ return tcp_md5_do_lookup(sk, l3index,
+ (union tcp_md5_addr *)addr, AF_INET6);
}
static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
const struct sock *addr_sk)
{
- return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
+ int l3index;
+
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
+ addr_sk->sk_bound_dev_if);
+ return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
+ l3index);
}
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
@@ -548,6 +556,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
+ int l3index = 0;
u8 prefixlen;
if (optlen < sizeof(cmd))
@@ -569,12 +578,30 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
}
+ if (optname == TCP_MD5SIG_EXT &&
+ cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
+ if (dev && netif_is_l3_master(dev))
+ l3index = dev->ifindex;
+ rcu_read_unlock();
+
+ /* ok to reference set/not set outside of rcu;
+ * right now device MUST be an L3 master
+ */
+ if (!dev || !l3index)
+ return -EINVAL;
+ }
+
if (!cmd.tcpm_keylen) {
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen);
+ AF_INET, prefixlen,
+ l3index);
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen);
+ AF_INET6, prefixlen, l3index);
}
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
@@ -582,12 +609,13 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
+ AF_INET, prefixlen, l3index,
+ cmd.tcpm_key, cmd.tcpm_keylen,
+ GFP_KERNEL);
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, cmd.tcpm_key,
- cmd.tcpm_keylen, GFP_KERNEL);
+ AF_INET6, prefixlen, l3index,
+ cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
@@ -698,17 +726,23 @@ clear_hash_noput:
#endif
static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb)
+ const struct sk_buff *skb,
+ int dif, int sdif)
{
#ifdef CONFIG_TCP_MD5SIG
const __u8 *hash_location = NULL;
struct tcp_md5sig_key *hash_expected;
const struct ipv6hdr *ip6h = ipv6_hdr(skb);
const struct tcphdr *th = tcp_hdr(skb);
- int genhash;
+ int genhash, l3index;
u8 newhash[16];
- hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to the l3mdev
+ */
+ l3index = sdif ? dif : 0;
+
+ hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
hash_location = tcp_parse_md5sig_option(th);
/* We've parsed the options - do we have a hash? */
@@ -732,10 +766,10 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
+ net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
genhash ? "failed" : "mismatch",
&ip6h->saddr, ntohs(th->source),
- &ip6h->daddr, ntohs(th->dest));
+ &ip6h->daddr, ntohs(th->dest), l3index);
return true;
}
#endif
@@ -785,7 +819,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
.syn_ack_timeout = tcp_syn_ack_timeout,
};
-static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
sizeof(struct ipv6hdr),
#ifdef CONFIG_TCP_MD5SIG
@@ -951,8 +985,18 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
rcu_read_lock();
hash_location = tcp_parse_md5sig_option(th);
if (sk && sk_fullsock(sk)) {
- key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
+ int l3index;
+
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and inet_iif is set to it.
+ */
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+ key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
} else if (hash_location) {
+ int dif = tcp_v6_iif_l3_slave(skb);
+ int sdif = tcp_v6_sdif(skb);
+ int l3index;
+
/*
* active side is lost. Try to find listening socket through
* source port, and then find md5 key through listening socket.
@@ -964,13 +1008,16 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
&tcp_hashinfo, NULL, 0,
&ipv6h->saddr,
th->source, &ipv6h->daddr,
- ntohs(th->source),
- tcp_v6_iif_l3_slave(skb),
- tcp_v6_sdif(skb));
+ ntohs(th->source), dif, sdif);
if (!sk1)
goto out;
- key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
+ /* sdif set, means packet ingressed via a device
+ * in an L3 domain and dif is set to it.
+ */
+ l3index = tcp_v6_sdif(skb) ? dif : 0;
+
+ key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
if (!key)
goto out;
@@ -1040,6 +1087,10 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req)
{
+ int l3index;
+
+ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
+
/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
* sk->sk_state == TCP_SYN_RECV -> for Fast Open.
*/
@@ -1054,7 +1105,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
- tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
0, 0, sk->sk_priority);
}
@@ -1126,6 +1177,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
struct tcp_md5sig_key *key;
+ int l3index;
#endif
struct flowi6 fl6;
@@ -1269,8 +1321,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
#ifdef CONFIG_TCP_MD5SIG
+ l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
+
/* Copy over the MD5 key from the original socket */
- key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
+ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
if (key) {
/* We're using one, so create a matching key
* on the newsk structure. If we fail to get
@@ -1278,7 +1332,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
- AF_INET6, 128, key->key, key->keylen,
+ AF_INET6, 128, l3index, key->key, key->keylen,
sk_gfp_mask(sk, GFP_ATOMIC));
}
#endif
@@ -1480,6 +1534,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
struct sk_buff *skb_to_free;
int sdif = inet6_sdif(skb);
+ int dif = inet6_iif(skb);
const struct tcphdr *th;
const struct ipv6hdr *hdr;
bool refcounted;
@@ -1528,7 +1583,7 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v6_inbound_md5_hash(sk, skb)) {
+ if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -1583,7 +1638,7 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;
- if (tcp_v6_inbound_md5_hash(sk, skb))
+ if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
goto discard_and_relse;
if (tcp_filter(sk, skb))
@@ -1739,7 +1794,7 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_destructor = tcp_twsk_destructor,
};
-static const struct inet_connection_sock_af_ops ipv6_specific = {
+const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
.rebuild_header = inet6_sk_rebuild_header,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 64b8f05d6735..f0d5fc27d0b5 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -127,7 +127,7 @@ struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb)
ip6_gro_compute_pseudo))
goto flush;
else if (uh->check)
- skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+ skb_gro_checksum_try_convert(skb, IPPROTO_UDP,
ip6_gro_compute_pseudo);
skip:
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 699e0730ce8e..af7a4b8b1e9c 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -98,12 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
}
static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb, u32 mtu)
+ struct sk_buff *skb, u32 mtu,
+ bool confirm_neigh)
{
struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
struct dst_entry *path = xdst->route;
- path->ops->update_pmtu(path, sk, skb, mtu);
+ path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh);
}
static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk,
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ebb62a4ebe30..c4bdcbc84b07 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -50,7 +50,7 @@ static struct iucv_interface *pr_iucv;
static const u8 iprm_shutdown[8] =
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01};
-#define TRGCLS_SIZE FIELD_SIZEOF(struct iucv_message, class)
+#define TRGCLS_SIZE sizeof_field(struct iucv_message, class)
#define __iucv_sock_wait(sk, condition, timeo, ret) \
do { \
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index f82ea12bac37..c99223cb3338 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -122,8 +122,6 @@ static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
static inline struct l2tp_net *l2tp_pernet(const struct net *net)
{
- BUG_ON(!net);
-
return net_generic(net, l2tp_net_id);
}
diff --git a/net/llc/llc_station.c b/net/llc/llc_station.c
index 204a8351efff..c29170e767a8 100644
--- a/net/llc/llc_station.c
+++ b/net/llc/llc_station.c
@@ -32,7 +32,7 @@ static int llc_stat_ev_rx_null_dsap_xid_c(struct sk_buff *skb)
return LLC_PDU_IS_CMD(pdu) && /* command PDU */
LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_XID &&
- !pdu->dsap ? 0 : 1; /* NULL DSAP value */
+ !pdu->dsap; /* NULL DSAP value */
}
static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb)
@@ -42,7 +42,7 @@ static int llc_stat_ev_rx_null_dsap_test_c(struct sk_buff *skb)
return LLC_PDU_IS_CMD(pdu) && /* command PDU */
LLC_PDU_TYPE_IS_U(pdu) && /* U type PDU */
LLC_U_PDU_CMD(pdu) == LLC_1_PDU_CMD_TEST &&
- !pdu->dsap ? 0 : 1; /* NULL DSAP */
+ !pdu->dsap; /* NULL DSAP */
}
static int llc_station_ac_send_xid_r(struct sk_buff *skb)
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
index 63cb0028b02d..9fc2968856c0 100644
--- a/net/mac80211/airtime.c
+++ b/net/mac80211/airtime.c
@@ -442,7 +442,7 @@ u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
return 0;
sband = hw->wiphy->bands[status->band];
- if (!sband || status->rate_idx > sband->n_bitrates)
+ if (!sband || status->rate_idx >= sband->n_bitrates)
return 0;
rate = &sband->bitrates[status->rate_idx];
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index b3c9001d1f43..c80b1e163ea4 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -201,8 +201,6 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
u64 rx_airtime = 0, tx_airtime = 0;
s64 deficit[IEEE80211_NUM_ACS];
- u32 q_depth[IEEE80211_NUM_ACS];
- u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS];
ssize_t rv;
int ac;
@@ -214,6 +212,56 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
rx_airtime += sta->airtime[ac].rx_airtime;
tx_airtime += sta->airtime[ac].tx_airtime;
deficit[ac] = sta->airtime[ac].deficit;
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ }
+
+ p += scnprintf(p, bufsz + buf - p,
+ "RX: %llu us\nTX: %llu us\nWeight: %u\n"
+ "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
+ rx_airtime, tx_airtime, sta->airtime_weight,
+ deficit[0], deficit[1], deficit[2], deficit[3]);
+
+ rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
+ kfree(buf);
+ return rv;
+}
+
+static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_info *sta = file->private_data;
+ struct ieee80211_local *local = sta->sdata->local;
+ int ac;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ spin_lock_bh(&local->active_txq_lock[ac]);
+ sta->airtime[ac].rx_airtime = 0;
+ sta->airtime[ac].tx_airtime = 0;
+ sta->airtime[ac].deficit = sta->airtime_weight;
+ spin_unlock_bh(&local->active_txq_lock[ac]);
+ }
+
+ return count;
+}
+STA_OPS_RW(airtime);
+
+static ssize_t sta_aql_read(struct file *file, char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct sta_info *sta = file->private_data;
+ struct ieee80211_local *local = sta->sdata->local;
+ size_t bufsz = 400;
+ char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
+ u32 q_depth[IEEE80211_NUM_ACS];
+ u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS];
+ ssize_t rv;
+ int ac;
+
+ if (!buf)
+ return -ENOMEM;
+
+ for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+ spin_lock_bh(&local->active_txq_lock[ac]);
q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
spin_unlock_bh(&local->active_txq_lock[ac]);
@@ -221,12 +269,8 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
}
p += scnprintf(p, bufsz + buf - p,
- "RX: %llu us\nTX: %llu us\nWeight: %u\n"
- "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n"
"Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n"
"Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n",
- rx_airtime, tx_airtime, sta->airtime_weight,
- deficit[0], deficit[1], deficit[2], deficit[3],
q_depth[0], q_depth[1], q_depth[2], q_depth[3],
q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1],
q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]),
@@ -236,11 +280,10 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
return rv;
}
-static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
+static ssize_t sta_aql_write(struct file *file, const char __user *userbuf,
size_t count, loff_t *ppos)
{
struct sta_info *sta = file->private_data;
- struct ieee80211_local *local = sta->sdata->local;
u32 ac, q_limit_l, q_limit_h;
char _buf[100] = {}, *buf = _buf;
@@ -251,7 +294,7 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
return -EFAULT;
buf[sizeof(_buf) - 1] = '\0';
- if (sscanf(buf, "queue limit %u %u %u", &ac, &q_limit_l, &q_limit_h)
+ if (sscanf(buf, "limit %u %u %u", &ac, &q_limit_l, &q_limit_h)
!= 3)
return -EINVAL;
@@ -261,17 +304,10 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
sta->airtime[ac].aql_limit_low = q_limit_l;
sta->airtime[ac].aql_limit_high = q_limit_h;
- for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
- spin_lock_bh(&local->active_txq_lock[ac]);
- sta->airtime[ac].rx_airtime = 0;
- sta->airtime[ac].tx_airtime = 0;
- sta->airtime[ac].deficit = sta->airtime_weight;
- spin_unlock_bh(&local->active_txq_lock[ac]);
- }
-
return count;
}
-STA_OPS_RW(airtime);
+STA_OPS_RW(aql);
+
static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
size_t count, loff_t *ppos)
@@ -996,6 +1032,10 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
NL80211_EXT_FEATURE_AIRTIME_FAIRNESS))
DEBUGFS_ADD(airtime);
+ if (wiphy_ext_feature_isset(local->hw.wiphy,
+ NL80211_EXT_FEATURE_AQL))
+ DEBUGFS_ADD(aql);
+
debugfs_create_xul("driver_buffered_tids", 0400, sta->debugfs_dir,
&sta->driver_buffered_tids);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6cca0853f183..4c2b5ba3ac09 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -672,9 +672,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
}
- local->airtime_flags = AIRTIME_USE_TX |
- AIRTIME_USE_RX |
- AIRTIME_USE_AQL;
+ local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
local->aql_threshold = IEEE80211_AQL_THRESHOLD;
atomic_set(&local->aql_total_pending_airtime, 0);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8eafd81e97b4..0f5f40678885 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1916,6 +1916,9 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
{
int tx_pending;
+ if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
+ return;
+
if (!tx_completed) {
if (sta)
atomic_add(tx_airtime,
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index ad5d8a4ae56d..c00e28585f9d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -127,7 +127,6 @@ enum ieee80211_agg_stop_reason {
/* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
#define AIRTIME_USE_TX BIT(0)
#define AIRTIME_USE_RX BIT(1)
-#define AIRTIME_USE_AQL BIT(2)
struct airtime_info {
u64 rx_airtime;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index b696b9136f4c..a8a7306a1f56 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2256,6 +2256,15 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
payload[7]);
}
+ /*
+ * Initialize skb->priority for QoS frames. This is put in the TID field
+ * of the frame before passing it to the driver.
+ */
+ if (ieee80211_is_data_qos(hdr->frame_control)) {
+ u8 *p = ieee80211_get_qos_ctl(hdr);
+ skb->priority = *p & IEEE80211_QOS_CTL_TAG1D_MASK;
+ }
+
memset(info, 0, sizeof(*info));
info->flags = IEEE80211_TX_CTL_REQ_TX_STATUS |
@@ -3668,7 +3677,7 @@ begin:
IEEE80211_SKB_CB(skb)->control.vif = vif;
- if (local->airtime_flags & AIRTIME_USE_AQL) {
+ if (wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL)) {
u32 airtime;
airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
@@ -3790,7 +3799,7 @@ bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
struct sta_info *sta;
struct ieee80211_local *local = hw_to_local(hw);
- if (!(local->airtime_flags & AIRTIME_USE_AQL))
+ if (!wiphy_ext_feature_isset(local->hw.wiphy, NL80211_EXT_FEATURE_AQL))
return true;
if (!txq->sta)
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index ad3fd7f1da75..e37102546be6 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -64,6 +64,17 @@ enum {
NCSI_MODE_MAX
};
+/* Supported media status bits for Mellanox Mac affinity command.
+ * Bit (0-2) for different protocol support; Bit 1 for RBT support,
+ * bit 1 for SMBUS support and bit 2 for PCIE support. Bit (3-5)
+ * for different protocol availability. Bit 4 for RBT, bit 4 for
+ * SMBUS and bit 5 for PCIE.
+ */
+enum {
+ MLX_MC_RBT_SUPPORT = 0x01, /* MC supports RBT */
+ MLX_MC_RBT_AVL = 0x08, /* RBT medium is available */
+};
+
/* OEM Vendor Manufacture ID */
#define NCSI_OEM_MFR_MLX_ID 0x8119
#define NCSI_OEM_MFR_BCM_ID 0x113d
@@ -72,9 +83,15 @@ enum {
/* Mellanox specific OEM Command */
#define NCSI_OEM_MLX_CMD_GMA 0x00 /* CMD ID for Get MAC */
#define NCSI_OEM_MLX_CMD_GMA_PARAM 0x1b /* Parameter for GMA */
+#define NCSI_OEM_MLX_CMD_SMAF 0x01 /* CMD ID for Set MC Affinity */
+#define NCSI_OEM_MLX_CMD_SMAF_PARAM 0x07 /* Parameter for SMAF */
/* OEM Command payload lengths*/
#define NCSI_OEM_BCM_CMD_GMA_LEN 12
#define NCSI_OEM_MLX_CMD_GMA_LEN 8
+#define NCSI_OEM_MLX_CMD_SMAF_LEN 60
+/* Offset in OEM request */
+#define MLX_SMAF_MAC_ADDR_OFFSET 8 /* Offset for MAC in SMAF */
+#define MLX_SMAF_MED_SUPPORT_OFFSET 14 /* Offset for medium in SMAF */
/* Mac address offset in OEM response */
#define BCM_MAC_ADDR_OFFSET 28
#define MLX_MAC_ADDR_OFFSET 8
@@ -251,6 +268,8 @@ enum {
ncsi_dev_state_probe_deselect = 0x0201,
ncsi_dev_state_probe_package,
ncsi_dev_state_probe_channel,
+ ncsi_dev_state_probe_mlx_gma,
+ ncsi_dev_state_probe_mlx_smaf,
ncsi_dev_state_probe_cis,
ncsi_dev_state_probe_gvi,
ncsi_dev_state_probe_gc,
@@ -311,6 +330,7 @@ struct ncsi_dev_priv {
struct list_head vlan_vids; /* List of active VLAN IDs */
bool multi_package; /* Enable multiple packages */
+ bool mlx_multi_host; /* Enable multi host Mellanox */
u32 package_whitelist; /* Packages to configure */
};
diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c
index 0187e65176c0..ba9ae482141b 100644
--- a/net/ncsi/ncsi-cmd.c
+++ b/net/ncsi/ncsi-cmd.c
@@ -369,7 +369,15 @@ int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca)
eh = skb_push(nr->cmd, sizeof(*eh));
eh->h_proto = htons(ETH_P_NCSI);
eth_broadcast_addr(eh->h_dest);
- eth_broadcast_addr(eh->h_source);
+
+ /* If mac address received from device then use it for
+ * source address as unicast address else use broadcast
+ * address as source address
+ */
+ if (nca->ndp->gma_flag == 1)
+ memcpy(eh->h_source, nca->ndp->ndev.dev->dev_addr, ETH_ALEN);
+ else
+ eth_broadcast_addr(eh->h_source);
/* Start the timer for the request that might not have
* corresponding response. Given NCSI is an internal
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 70fe02697544..1f387be7827b 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -8,6 +8,8 @@
#include <linux/init.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <net/ncsi.h>
#include <net/net_namespace.h>
@@ -730,6 +732,34 @@ static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca)
return ret;
}
+static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca)
+{
+ union {
+ u8 data_u8[NCSI_OEM_MLX_CMD_SMAF_LEN];
+ u32 data_u32[NCSI_OEM_MLX_CMD_SMAF_LEN / sizeof(u32)];
+ } u;
+ int ret = 0;
+
+ memset(&u, 0, sizeof(u));
+ u.data_u32[0] = ntohl(NCSI_OEM_MFR_MLX_ID);
+ u.data_u8[5] = NCSI_OEM_MLX_CMD_SMAF;
+ u.data_u8[6] = NCSI_OEM_MLX_CMD_SMAF_PARAM;
+ memcpy(&u.data_u8[MLX_SMAF_MAC_ADDR_OFFSET],
+ nca->ndp->ndev.dev->dev_addr, ETH_ALEN);
+ u.data_u8[MLX_SMAF_MED_SUPPORT_OFFSET] =
+ (MLX_MC_RBT_AVL | MLX_MC_RBT_SUPPORT);
+
+ nca->payload = NCSI_OEM_MLX_CMD_SMAF_LEN;
+ nca->data = u.data_u8;
+
+ ret = ncsi_xmit_cmd(nca);
+ if (ret)
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during probe\n",
+ nca->type);
+ return ret;
+}
+
/* OEM Command handlers initialization */
static struct ncsi_oem_gma_handler {
unsigned int mfr_id;
@@ -764,9 +794,6 @@ static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
return -1;
}
- /* Set the flag for GMA command which should only be called once */
- nca->ndp->gma_flag = 1;
-
/* Get Mac address from NCSI device */
return nch->handler(nca);
}
@@ -1313,8 +1340,38 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp)
break;
}
nd->state = ncsi_dev_state_probe_cis;
+ if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) &&
+ ndp->mlx_multi_host)
+ nd->state = ncsi_dev_state_probe_mlx_gma;
+
schedule_work(&ndp->work);
break;
+#if IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC)
+ case ncsi_dev_state_probe_mlx_gma:
+ ndp->pending_req_num = 1;
+
+ nca.type = NCSI_PKT_CMD_OEM;
+ nca.package = ndp->active_package->id;
+ nca.channel = 0;
+ ret = ncsi_oem_gma_handler_mlx(&nca);
+ if (ret)
+ goto error;
+
+ nd->state = ncsi_dev_state_probe_mlx_smaf;
+ break;
+ case ncsi_dev_state_probe_mlx_smaf:
+ ndp->pending_req_num = 1;
+
+ nca.type = NCSI_PKT_CMD_OEM;
+ nca.package = ndp->active_package->id;
+ nca.channel = 0;
+ ret = ncsi_oem_smaf_mlx(&nca);
+ if (ret)
+ goto error;
+
+ nd->state = ncsi_dev_state_probe_cis;
+ break;
+#endif /* CONFIG_NCSI_OEM_CMD_GET_MAC */
case ncsi_dev_state_probe_cis:
ndp->pending_req_num = NCSI_RESERVED_CHANNEL;
@@ -1624,6 +1681,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
{
struct ncsi_dev_priv *ndp;
struct ncsi_dev *nd;
+ struct platform_device *pdev;
+ struct device_node *np;
unsigned long flags;
int i;
@@ -1670,6 +1729,13 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
/* Set up generic netlink interface */
ncsi_init_netlink(dev);
+ pdev = to_platform_device(dev->dev.parent);
+ if (pdev) {
+ np = pdev->dev.of_node;
+ if (np && of_get_property(np, "mlx,multi-host", NULL))
+ ndp->mlx_multi_host = true;
+ }
+
return nd;
}
EXPORT_SYMBOL_GPL(ncsi_register_dev);
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index d5611f04926d..a94bb59793f0 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -627,6 +627,9 @@ static int ncsi_rsp_handler_oem_mlx_gma(struct ncsi_request *nr)
saddr.sa_family = ndev->type;
ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
memcpy(saddr.sa_data, &rsp->data[MLX_MAC_ADDR_OFFSET], ETH_ALEN);
+ /* Set the flag for GMA command which should only be called once */
+ ndp->gma_flag = 1;
+
ret = ops->ndo_set_mac_address(ndev, &saddr);
if (ret < 0)
netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
@@ -671,6 +674,9 @@ static int ncsi_rsp_handler_oem_bcm_gma(struct ncsi_request *nr)
if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
return -ENXIO;
+ /* Set the flag for GMA command which should only be called once */
+ ndp->gma_flag = 1;
+
ret = ops->ndo_set_mac_address(ndev, &saddr);
if (ret < 0)
netdev_warn(ndev, "NCSI: 'Writing mac address to device failed\n");
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 169e0a04f814..cf895bc80871 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1848,6 +1848,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
struct ip_set *set;
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
+ u32 lineno;
if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
@@ -1864,7 +1865,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
return -IPSET_ERR_PROTOCOL;
rcu_read_lock_bh();
- ret = set->variant->uadt(set, tb, IPSET_TEST, NULL, 0, 0);
+ ret = set->variant->uadt(set, tb, IPSET_TEST, &lineno, 0, 0);
rcu_read_unlock_bh();
/* Userspace can't trigger element to be re-added */
if (ret == -EAGAIN)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b1e300f8881b..b00866d777fe 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -208,7 +208,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu)
struct rtable *ort = skb_rtable(skb);
if (!skb->dev && sk && sk_fullsock(sk))
- ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu);
+ ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true);
}
static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0af1898af2b8..f4c4b467c87e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -895,9 +895,10 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
}
/* Resolve race on insertion if this protocol allows this. */
-static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conntrack_tuple_hash *h)
+static __cold noinline int
+nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ struct nf_conntrack_tuple_hash *h)
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -2333,7 +2334,6 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
return nf_conntrack_hash_resize(hashsize);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
static __always_inline unsigned int total_extension_size(void)
{
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index c24e5b64b00c..3dbe2329c3f1 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -37,7 +37,6 @@ void nf_ct_ext_destroy(struct nf_conn *ct)
kfree(ct->ext);
}
-EXPORT_SYMBOL(nf_ct_ext_destroy);
void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp)
{
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index d8d33ef52ce0..6a1c8f1f6171 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3626,6 +3626,9 @@ static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list)
list_for_each_entry(net, net_exit_list, exit_list)
ctnetlink_net_exit(net);
+
+ /* wait for other cpus until they are done with ctnl_notifiers */
+ synchronize_rcu();
}
static struct pernet_operations ctnetlink_net_ops = {
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index b6b14db3955b..b3f4a334f9d7 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -677,6 +677,9 @@ static int dccp_timeout_nlattr_to_obj(struct nlattr *tb[],
unsigned int *timeouts = data;
int i;
+ if (!timeouts)
+ timeouts = dn->dccp_timeout;
+
/* set default DCCP timeouts. */
for (i=0; i<CT_DCCP_MAX; i++)
timeouts[i] = dn->dccp_timeout[i];
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index fce3d93f1541..0399ae8f1188 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -594,6 +594,9 @@ static int sctp_timeout_nlattr_to_obj(struct nlattr *tb[],
struct nf_sctp_net *sn = nf_sctp_pernet(net);
int i;
+ if (!timeouts)
+ timeouts = sn->timeouts;
+
/* set default SCTP timeouts. */
for (i=0; i<SCTP_CONNTRACK_MAX; i++)
timeouts[i] = sn->timeouts[i];
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 9889d52eda82..e33a73cb1f42 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -134,11 +134,6 @@ static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
-{
- return (__s32)(timeout - (u32)jiffies);
-}
-
static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
@@ -232,7 +227,7 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
{
int err;
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
err = rhashtable_insert_fast(&flow_table->rhashtable,
&flow->tuplehash[0].node,
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index b9e7dd6e60ce..7ea2ddc2aa93 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -280,7 +280,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
return NF_DROP;
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
iph = ip_hdr(skb);
ip_decrease_ttl(iph);
skb->tstamp = 0;
@@ -509,7 +509,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
return NF_DROP;
- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT;
ip6h = ipv6_hdr(skb);
ip6h->hop_limit--;
skb->tstamp = 0;
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index c54c9a6cc981..d06969af1085 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -28,6 +28,7 @@ struct nf_flow_key {
struct flow_dissector_key_basic basic;
union {
struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
};
struct flow_dissector_key_tcp tcp;
struct flow_dissector_key_ports tp;
@@ -57,6 +58,7 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
@@ -69,15 +71,24 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
key->ipv4.dst = tuple->dst_v4.s_addr;
mask->ipv4.dst = 0xffffffff;
break;
+ case AF_INET6:
+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ key->basic.n_proto = htons(ETH_P_IPV6);
+ key->ipv6.src = tuple->src_v6;
+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
+ key->ipv6.dst = tuple->dst_v6;
+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
+ break;
default:
return -EOPNOTSUPP;
}
+ match->dissector.used_keys |= BIT(key->control.addr_type);
mask->basic.n_proto = 0xffff;
switch (tuple->l4proto) {
case IPPROTO_TCP:
key->tcp.flags = 0;
- mask->tcp.flags = TCP_FLAG_RST | TCP_FLAG_FIN;
+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
break;
case IPPROTO_UDP:
@@ -96,14 +107,13 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_CONTROL) |
BIT(FLOW_DISSECTOR_KEY_BASIC) |
- BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
BIT(FLOW_DISSECTOR_KEY_PORTS);
return 0;
}
static void flow_offload_mangle(struct flow_action_entry *entry,
- enum flow_action_mangle_base htype,
- u32 offset, u8 *value, u8 *mask)
+ enum flow_action_mangle_base htype, u32 offset,
+ const __be32 *value, const __be32 *mask)
{
entry->id = FLOW_ACTION_MANGLE;
entry->mangle.htype = htype;
@@ -140,12 +150,12 @@ static int flow_offload_eth_src(struct net *net,
memcpy(&val16, dev->dev_addr, 2);
val = val16 << 16;
flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
- (u8 *)&val, (u8 *)&mask);
+ &val, &mask);
mask = ~0xffffffff;
memcpy(&val, dev->dev_addr + 2, 4);
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
- (u8 *)&val, (u8 *)&mask);
+ &val, &mask);
dev_put(dev);
return 0;
@@ -156,27 +166,41 @@ static int flow_offload_eth_dst(struct net *net,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
- const struct flow_offload_tuple *tuple = &flow->tuplehash[dir].tuple;
struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
+ const void *daddr = &flow->tuplehash[!dir].tuple.src_v4;
+ const struct dst_entry *dst_cache;
+ unsigned char ha[ETH_ALEN];
struct neighbour *n;
u32 mask, val;
+ u8 nud_state;
u16 val16;
- n = dst_neigh_lookup(tuple->dst_cache, &tuple->dst_v4);
+ dst_cache = flow->tuplehash[dir].tuple.dst_cache;
+ n = dst_neigh_lookup(dst_cache, daddr);
if (!n)
return -ENOENT;
+ read_lock_bh(&n->lock);
+ nud_state = n->nud_state;
+ ether_addr_copy(ha, n->ha);
+ read_unlock_bh(&n->lock);
+
+ if (!(nud_state & NUD_VALID)) {
+ neigh_release(n);
+ return -ENOENT;
+ }
+
mask = ~0xffffffff;
- memcpy(&val, n->ha, 4);
+ memcpy(&val, ha, 4);
flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
- (u8 *)&val, (u8 *)&mask);
+ &val, &mask);
mask = ~0x0000ffff;
- memcpy(&val16, n->ha + 4, 2);
+ memcpy(&val16, ha + 4, 2);
val = val16;
flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
- (u8 *)&val, (u8 *)&mask);
+ &val, &mask);
neigh_release(n);
return 0;
@@ -206,7 +230,7 @@ static void flow_offload_ipv4_snat(struct net *net,
}
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
- (u8 *)&addr, (u8 *)&mask);
+ &addr, &mask);
}
static void flow_offload_ipv4_dnat(struct net *net,
@@ -233,12 +257,12 @@ static void flow_offload_ipv4_dnat(struct net *net,
}
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
- (u8 *)&addr, (u8 *)&mask);
+ &addr, &mask);
}
static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
unsigned int offset,
- u8 *addr, u8 *mask)
+ const __be32 *addr, const __be32 *mask)
{
struct flow_action_entry *entry;
int i;
@@ -246,8 +270,7 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
entry = flow_action_entry_next(flow_rule);
flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
- offset + i,
- &addr[i], mask);
+ offset + i, &addr[i], mask);
}
}
@@ -257,23 +280,23 @@ static void flow_offload_ipv6_snat(struct net *net,
struct nf_flow_rule *flow_rule)
{
u32 mask = ~htonl(0xffffffff);
- const u8 *addr;
+ const __be32 *addr;
u32 offset;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
- addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr;
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
offset = offsetof(struct ipv6hdr, saddr);
break;
case FLOW_OFFLOAD_DIR_REPLY:
- addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr;
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
offset = offsetof(struct ipv6hdr, daddr);
break;
default:
return;
}
- flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask);
+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
}
static void flow_offload_ipv6_dnat(struct net *net,
@@ -282,23 +305,23 @@ static void flow_offload_ipv6_dnat(struct net *net,
struct nf_flow_rule *flow_rule)
{
u32 mask = ~htonl(0xffffffff);
- const u8 *addr;
+ const __be32 *addr;
u32 offset;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
- addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr;
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
offset = offsetof(struct ipv6hdr, daddr);
break;
case FLOW_OFFLOAD_DIR_REPLY:
- addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr;
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
offset = offsetof(struct ipv6hdr, saddr);
break;
default:
return;
}
- flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask);
+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
}
static int flow_offload_l4proto(const struct flow_offload *flow)
@@ -326,25 +349,28 @@ static void flow_offload_port_snat(struct net *net,
struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
- u32 mask = ~htonl(0xffff0000);
- __be16 port;
+ u32 mask, port;
u32 offset;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
- port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
offset = 0; /* offsetof(struct tcphdr, source); */
+ port = htonl(port << 16);
+ mask = ~htonl(0xffff0000);
break;
case FLOW_OFFLOAD_DIR_REPLY:
- port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
offset = 0; /* offsetof(struct tcphdr, dest); */
+ port = htonl(port);
+ mask = ~htonl(0xffff);
break;
default:
- break;
+ return;
}
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
- (u8 *)&port, (u8 *)&mask);
+ &port, &mask);
}
static void flow_offload_port_dnat(struct net *net,
@@ -353,25 +379,28 @@ static void flow_offload_port_dnat(struct net *net,
struct nf_flow_rule *flow_rule)
{
struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
- u32 mask = ~htonl(0xffff);
- __be16 port;
+ u32 mask, port;
u32 offset;
switch (dir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
- port = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port;
- offset = 0; /* offsetof(struct tcphdr, source); */
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
+ offset = 0; /* offsetof(struct tcphdr, dest); */
+ port = htonl(port);
+ mask = ~htonl(0xffff);
break;
case FLOW_OFFLOAD_DIR_REPLY:
- port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
- offset = 0; /* offsetof(struct tcphdr, dest); */
+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
+ offset = 0; /* offsetof(struct tcphdr, source); */
+ port = htonl(port << 16);
+ mask = ~htonl(0xffff0000);
break;
default:
- break;
+ return;
}
flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
- (u8 *)&port, (u8 *)&mask);
+ &port, &mask);
}
static void flow_offload_ipv4_checksum(struct net *net,
@@ -574,7 +603,7 @@ static int flow_offload_tuple_add(struct flow_offload_work *offload,
cls_flow.rule = flow_rule->rule;
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list) {
- err = block_cb->cb(TC_SETUP_FT, &cls_flow,
+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
block_cb->cb_priv);
if (err < 0)
continue;
@@ -599,7 +628,7 @@ static void flow_offload_tuple_del(struct flow_offload_work *offload,
&offload->flow->tuplehash[dir].tuple, &extack);
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
- block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
+ block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
offload->flow->flags |= FLOW_OFFLOAD_HW_DEAD;
}
@@ -656,7 +685,7 @@ static void flow_offload_tuple_stats(struct flow_offload_work *offload,
&offload->flow->tuplehash[dir].tuple, &extack);
list_for_each_entry(block_cb, &flowtable->flow_block.cb_list, list)
- block_cb->cb(TC_SETUP_FT, &cls_flow, block_cb->cb_priv);
+ block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow, block_cb->cb_priv);
memcpy(stats, &cls_flow.stats, sizeof(*stats));
}
@@ -752,9 +781,9 @@ void nf_flow_offload_stats(struct nf_flowtable *flowtable,
struct flow_offload *flow)
{
struct flow_offload_work *offload;
- s64 delta;
+ __s32 delta;
- delta = flow->timeout - jiffies;
+ delta = nf_flow_timeout_delta(flow->timeout);
if ((delta >= (9 * NF_FLOW_TIMEOUT) / 10) ||
flow->flags & FLOW_OFFLOAD_HW_DYING)
return;
@@ -822,7 +851,7 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
bo.extack = &extack;
INIT_LIST_HEAD(&bo.cb_list);
- err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, &bo);
if (err < 0)
return err;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index a2b58de82600..f8f52ff99cfb 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -189,7 +189,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
goto err;
}
- if (!skb_dst_force(skb) && state->hook != NF_INET_PRE_ROUTING) {
+ if (skb_dst(skb) && !skb_dst_force(skb)) {
status = -ENETDOWN;
goto err;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 062b73a83af0..43f05b3acd60 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4519,8 +4519,10 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
err = -EINVAL;
- if (desc.type != NFT_DATA_VALUE || desc.len != set->klen)
+ if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) {
+ nft_data_release(&elem.key.val, desc.type);
return err;
+ }
priv = set->ops->get(ctx->net, set, &elem, flags);
if (IS_ERR(priv))
@@ -4756,14 +4758,20 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
!(flags & NFT_SET_ELEM_INTERVAL_END))
return -EINVAL;
- if (nla[NFTA_SET_ELEM_DATA] != NULL &&
- flags & NFT_SET_ELEM_INTERVAL_END)
- return -EINVAL;
} else {
if (nla[NFTA_SET_ELEM_DATA] != NULL)
return -EINVAL;
}
+ if ((flags & NFT_SET_ELEM_INTERVAL_END) &&
+ (nla[NFTA_SET_ELEM_DATA] ||
+ nla[NFTA_SET_ELEM_OBJREF] ||
+ nla[NFTA_SET_ELEM_TIMEOUT] ||
+ nla[NFTA_SET_ELEM_EXPIRATION] ||
+ nla[NFTA_SET_ELEM_USERDATA] ||
+ nla[NFTA_SET_ELEM_EXPR]))
+ return -EINVAL;
+
timeout = 0;
if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
if (!(set->flags & NFT_SET_TIMEOUT))
@@ -5476,7 +5484,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = nft_obj_type_get(net, objtype);
+ type = __nft_obj_type_get(objtype);
nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
@@ -5976,6 +5984,7 @@ nft_flowtable_type_get(struct net *net, u8 family)
return ERR_PTR(-ENOENT);
}
+/* Only called from error and netdev event paths. */
static void nft_unregister_flowtable_hook(struct net *net,
struct nft_flowtable *flowtable,
struct nft_hook *hook)
@@ -5991,7 +6000,7 @@ static void nft_unregister_flowtable_net_hooks(struct net *net,
struct nft_hook *hook;
list_for_each_entry(hook, &flowtable->hook_list, list)
- nft_unregister_flowtable_hook(net, flowtable, hook);
+ nf_unregister_net_hook(net, &hook->ops);
}
static int nft_register_flowtable_net_hooks(struct net *net,
@@ -6440,12 +6449,14 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
{
struct nft_hook *hook, *next;
+ flowtable->data.type->free(&flowtable->data);
list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+ flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
list_del_rcu(&hook->list);
kfree(hook);
}
kfree(flowtable->name);
- flowtable->data.type->free(&flowtable->data);
module_put(flowtable->data.type->owner);
kfree(flowtable);
}
@@ -6489,6 +6500,7 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev,
if (hook->ops.dev != dev)
continue;
+ /* flow_offload_netdev_event() cleans up entries for us. */
nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook);
list_del_rcu(&hook->list);
kfree_rcu(hook, rcu);
@@ -7595,7 +7607,7 @@ int nft_validate_register_load(enum nft_registers reg, unsigned int len)
return -EINVAL;
if (len == 0)
return -EINVAL;
- if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
+ if (reg * NFT_REG32_SIZE + len > sizeof_field(struct nft_regs, data))
return -ERANGE;
return 0;
@@ -7643,7 +7655,7 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
if (len == 0)
return -EINVAL;
if (reg * NFT_REG32_SIZE + len >
- FIELD_SIZEOF(struct nft_regs, data))
+ sizeof_field(struct nft_regs, data))
return -ERANGE;
if (data != NULL && type != NFT_DATA_VALUE)
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 431f3b803bfb..a9ea29afb09f 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -44,6 +44,9 @@ struct nft_flow_rule *nft_flow_rule_create(struct net *net,
expr = nft_expr_next(expr);
}
+ if (num_actions == 0)
+ return ERR_PTR(-EOPNOTSUPP);
+
flow = nft_flow_rule_alloc(num_actions);
if (!flow)
return ERR_PTR(-ENOMEM);
@@ -577,6 +580,9 @@ static int nft_offload_netdev_event(struct notifier_block *this,
struct net *net = dev_net(dev);
struct nft_chain *chain;
+ if (event != NETDEV_UNREGISTER)
+ return NOTIFY_DONE;
+
mutex_lock(&net->nft.commit_mutex);
chain = __nft_offload_get_chain(dev);
if (chain)
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 7525063c25f5..de3a9596b7f1 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -236,7 +236,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
nla_strlcpy(helper->name,
tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN);
size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
- if (size > FIELD_SIZEOF(struct nf_conn_help, data)) {
+ if (size > sizeof_field(struct nf_conn_help, data)) {
ret = -ENOMEM;
goto err2;
}
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 02afa752dd2e..10e9d50e4e19 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -80,7 +80,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
tb[NFTA_BITWISE_MASK]);
if (err < 0)
return err;
- if (d1.len != priv->len) {
+ if (d1.type != NFT_DATA_VALUE || d1.len != priv->len) {
err = -EINVAL;
goto err1;
}
@@ -89,7 +89,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
tb[NFTA_BITWISE_XOR]);
if (err < 0)
goto err1;
- if (d2.len != priv->len) {
+ if (d2.type != NFT_DATA_VALUE || d2.len != priv->len) {
err = -EINVAL;
goto err2;
}
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index b8092069f868..8a28c127effc 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -81,6 +81,12 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (err < 0)
return err;
+ if (desc.type != NFT_DATA_VALUE) {
+ err = -EINVAL;
+ nft_data_release(&priv->data, desc.type);
+ return err;
+ }
+
priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]);
err = nft_validate_register_load(priv->sreg, desc.len);
if (err < 0)
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 46ca8bcca1bd..faea72c2df32 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -440,12 +440,12 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
switch (ctx->family) {
case NFPROTO_IPV4:
- len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ len = sizeof_field(struct nf_conntrack_tuple,
src.u3.ip);
break;
case NFPROTO_IPV6:
case NFPROTO_INET:
- len = FIELD_SIZEOF(struct nf_conntrack_tuple,
+ len = sizeof_field(struct nf_conntrack_tuple,
src.u3.ip6);
break;
default:
@@ -457,20 +457,20 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
- len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip);
+ len = sizeof_field(struct nf_conntrack_tuple, src.u3.ip);
break;
case NFT_CT_SRC_IP6:
case NFT_CT_DST_IP6:
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
- len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u3.ip6);
+ len = sizeof_field(struct nf_conntrack_tuple, src.u3.ip6);
break;
case NFT_CT_PROTO_SRC:
case NFT_CT_PROTO_DST:
if (tb[NFTA_CT_DIRECTION] == NULL)
return -EINVAL;
- len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all);
+ len = sizeof_field(struct nf_conntrack_tuple, src.u.all);
break;
case NFT_CT_BYTES:
case NFT_CT_PKTS:
@@ -551,7 +551,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
case NFT_CT_MARK:
if (tb[NFTA_CT_DIRECTION])
return -EINVAL;
- len = FIELD_SIZEOF(struct nf_conn, mark);
+ len = sizeof_field(struct nf_conn, mark);
break;
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index dd82ff2ee19f..b70b48996801 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -200,9 +200,6 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx,
static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
- struct nft_flow_offload *priv = nft_expr_priv(expr);
-
- priv->flowtable->use--;
nf_ct_netns_put(ctx->net, ctx->family);
}
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 39dc94f2491e..bc9fd98c5d6d 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
{
- u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
+ u32 plen = sizeof_field(struct nf_nat_range, min_addr.all);
struct nft_masq *priv = nft_expr_priv(expr);
int err;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 9740b554fdb3..951b6e87ed5d 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -17,6 +17,7 @@
#include <linux/smp.h>
#include <linux/static_key.h>
#include <net/dst.h>
+#include <net/ip.h>
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
@@ -33,8 +34,9 @@
static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-static u8 nft_meta_weekday(time64_t secs)
+static u8 nft_meta_weekday(void)
{
+ time64_t secs = ktime_get_real_seconds();
unsigned int dse;
u8 wday;
@@ -56,14 +58,264 @@ static u32 nft_meta_hour(time64_t secs)
+ tm.tm_sec;
}
+static noinline_for_stack void
+nft_meta_get_eval_time(enum nft_meta_keys key,
+ u32 *dest)
+{
+ switch (key) {
+ case NFT_META_TIME_NS:
+ nft_reg_store64(dest, ktime_get_real_ns());
+ break;
+ case NFT_META_TIME_DAY:
+ nft_reg_store8(dest, nft_meta_weekday());
+ break;
+ case NFT_META_TIME_HOUR:
+ *dest = nft_meta_hour(ktime_get_real_seconds());
+ break;
+ default:
+ break;
+ }
+}
+
+static noinline bool
+nft_meta_get_eval_pkttype_lo(const struct nft_pktinfo *pkt,
+ u32 *dest)
+{
+ const struct sk_buff *skb = pkt->skb;
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ if (ipv4_is_multicast(ip_hdr(skb)->daddr))
+ nft_reg_store8(dest, PACKET_MULTICAST);
+ else
+ nft_reg_store8(dest, PACKET_BROADCAST);
+ break;
+ case NFPROTO_IPV6:
+ nft_reg_store8(dest, PACKET_MULTICAST);
+ break;
+ case NFPROTO_NETDEV:
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ int noff = skb_network_offset(skb);
+ struct iphdr *iph, _iph;
+
+ iph = skb_header_pointer(skb, noff,
+ sizeof(_iph), &_iph);
+ if (!iph)
+ return false;
+
+ if (ipv4_is_multicast(iph->daddr))
+ nft_reg_store8(dest, PACKET_MULTICAST);
+ else
+ nft_reg_store8(dest, PACKET_BROADCAST);
+
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ nft_reg_store8(dest, PACKET_MULTICAST);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ return true;
+}
+
+static noinline bool
+nft_meta_get_eval_skugid(enum nft_meta_keys key,
+ u32 *dest,
+ const struct nft_pktinfo *pkt)
+{
+ struct sock *sk = skb_to_full_sk(pkt->skb);
+ struct socket *sock;
+
+ if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
+ return false;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ sock = sk->sk_socket;
+ if (!sock || !sock->file) {
+ read_unlock_bh(&sk->sk_callback_lock);
+ return false;
+ }
+
+ switch (key) {
+ case NFT_META_SKUID:
+ *dest = from_kuid_munged(&init_user_ns,
+ sock->file->f_cred->fsuid);
+ break;
+ case NFT_META_SKGID:
+ *dest = from_kgid_munged(&init_user_ns,
+ sock->file->f_cred->fsgid);
+ break;
+ default:
+ break;
+ }
+
+ read_unlock_bh(&sk->sk_callback_lock);
+ return true;
+}
+
+#ifdef CONFIG_CGROUP_NET_CLASSID
+static noinline bool
+nft_meta_get_eval_cgroup(u32 *dest, const struct nft_pktinfo *pkt)
+{
+ struct sock *sk = skb_to_full_sk(pkt->skb);
+
+ if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk)))
+ return false;
+
+ *dest = sock_cgroup_classid(&sk->sk_cgrp_data);
+ return true;
+}
+#endif
+
+static noinline bool nft_meta_get_eval_kind(enum nft_meta_keys key,
+ u32 *dest,
+ const struct nft_pktinfo *pkt)
+{
+ const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
+
+ switch (key) {
+ case NFT_META_IIFKIND:
+ if (!in || !in->rtnl_link_ops)
+ return false;
+ strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+ break;
+ case NFT_META_OIFKIND:
+ if (!out || !out->rtnl_link_ops)
+ return false;
+ strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static void nft_meta_store_ifindex(u32 *dest, const struct net_device *dev)
+{
+ *dest = dev ? dev->ifindex : 0;
+}
+
+static void nft_meta_store_ifname(u32 *dest, const struct net_device *dev)
+{
+ strncpy((char *)dest, dev ? dev->name : "", IFNAMSIZ);
+}
+
+static bool nft_meta_store_iftype(u32 *dest, const struct net_device *dev)
+{
+ if (!dev)
+ return false;
+
+ nft_reg_store16(dest, dev->type);
+ return true;
+}
+
+static bool nft_meta_store_ifgroup(u32 *dest, const struct net_device *dev)
+{
+ if (!dev)
+ return false;
+
+ *dest = dev->group;
+ return true;
+}
+
+static bool nft_meta_get_eval_ifname(enum nft_meta_keys key, u32 *dest,
+ const struct nft_pktinfo *pkt)
+{
+ switch (key) {
+ case NFT_META_IIFNAME:
+ nft_meta_store_ifname(dest, nft_in(pkt));
+ break;
+ case NFT_META_OIFNAME:
+ nft_meta_store_ifname(dest, nft_out(pkt));
+ break;
+ case NFT_META_IIF:
+ nft_meta_store_ifindex(dest, nft_in(pkt));
+ break;
+ case NFT_META_OIF:
+ nft_meta_store_ifindex(dest, nft_out(pkt));
+ break;
+ case NFT_META_IIFTYPE:
+ if (!nft_meta_store_iftype(dest, nft_in(pkt)))
+ return false;
+ break;
+ case NFT_META_OIFTYPE:
+ if (!nft_meta_store_iftype(dest, nft_out(pkt)))
+ return false;
+ break;
+ case NFT_META_IIFGROUP:
+ if (!nft_meta_store_ifgroup(dest, nft_out(pkt)))
+ return false;
+ break;
+ case NFT_META_OIFGROUP:
+ if (!nft_meta_store_ifgroup(dest, nft_out(pkt)))
+ return false;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static noinline u32 nft_prandom_u32(void)
+{
+ struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
+
+ return prandom_u32_state(state);
+}
+
+#ifdef CONFIG_IP_ROUTE_CLASSID
+static noinline bool
+nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest)
+{
+ const struct dst_entry *dst = skb_dst(skb);
+
+ if (!dst)
+ return false;
+
+ *dest = dst->tclassid;
+ return true;
+}
+#endif
+
+static noinline u32 nft_meta_get_eval_sdif(const struct nft_pktinfo *pkt)
+{
+ switch (nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ return inet_sdif(pkt->skb);
+ case NFPROTO_IPV6:
+ return inet6_sdif(pkt->skb);
+ }
+
+ return 0;
+}
+
+static noinline void
+nft_meta_get_eval_sdifname(u32 *dest, const struct nft_pktinfo *pkt)
+{
+ u32 sdif = nft_meta_get_eval_sdif(pkt);
+ const struct net_device *dev;
+
+ dev = sdif ? dev_get_by_index_rcu(nft_net(pkt), sdif) : NULL;
+ nft_meta_store_ifname(dest, dev);
+}
+
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_meta *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
- const struct net_device *in = nft_in(pkt), *out = nft_out(pkt);
- struct sock *sk;
u32 *dest = &regs->data[priv->dreg];
switch (priv->key) {
@@ -88,69 +340,26 @@ void nft_meta_get_eval(const struct nft_expr *expr,
*dest = skb->mark;
break;
case NFT_META_IIF:
- *dest = in ? in->ifindex : 0;
- break;
case NFT_META_OIF:
- *dest = out ? out->ifindex : 0;
- break;
case NFT_META_IIFNAME:
- strncpy((char *)dest, in ? in->name : "", IFNAMSIZ);
- break;
case NFT_META_OIFNAME:
- strncpy((char *)dest, out ? out->name : "", IFNAMSIZ);
- break;
case NFT_META_IIFTYPE:
- if (in == NULL)
- goto err;
- nft_reg_store16(dest, in->type);
- break;
case NFT_META_OIFTYPE:
- if (out == NULL)
+ case NFT_META_IIFGROUP:
+ case NFT_META_OIFGROUP:
+ if (!nft_meta_get_eval_ifname(priv->key, dest, pkt))
goto err;
- nft_reg_store16(dest, out->type);
break;
case NFT_META_SKUID:
- sk = skb_to_full_sk(skb);
- if (!sk || !sk_fullsock(sk) ||
- !net_eq(nft_net(pkt), sock_net(sk)))
- goto err;
-
- read_lock_bh(&sk->sk_callback_lock);
- if (sk->sk_socket == NULL ||
- sk->sk_socket->file == NULL) {
- read_unlock_bh(&sk->sk_callback_lock);
- goto err;
- }
-
- *dest = from_kuid_munged(&init_user_ns,
- sk->sk_socket->file->f_cred->fsuid);
- read_unlock_bh(&sk->sk_callback_lock);
- break;
case NFT_META_SKGID:
- sk = skb_to_full_sk(skb);
- if (!sk || !sk_fullsock(sk) ||
- !net_eq(nft_net(pkt), sock_net(sk)))
+ if (!nft_meta_get_eval_skugid(priv->key, dest, pkt))
goto err;
-
- read_lock_bh(&sk->sk_callback_lock);
- if (sk->sk_socket == NULL ||
- sk->sk_socket->file == NULL) {
- read_unlock_bh(&sk->sk_callback_lock);
- goto err;
- }
- *dest = from_kgid_munged(&init_user_ns,
- sk->sk_socket->file->f_cred->fsgid);
- read_unlock_bh(&sk->sk_callback_lock);
break;
#ifdef CONFIG_IP_ROUTE_CLASSID
- case NFT_META_RTCLASSID: {
- const struct dst_entry *dst = skb_dst(skb);
-
- if (dst == NULL)
+ case NFT_META_RTCLASSID:
+ if (!nft_meta_get_eval_rtclassid(skb, dest))
goto err;
- *dest = dst->tclassid;
break;
- }
#endif
#ifdef CONFIG_NETWORK_SECMARK
case NFT_META_SECMARK:
@@ -163,97 +372,41 @@ void nft_meta_get_eval(const struct nft_expr *expr,
break;
}
- switch (nft_pf(pkt)) {
- case NFPROTO_IPV4:
- if (ipv4_is_multicast(ip_hdr(skb)->daddr))
- nft_reg_store8(dest, PACKET_MULTICAST);
- else
- nft_reg_store8(dest, PACKET_BROADCAST);
- break;
- case NFPROTO_IPV6:
- nft_reg_store8(dest, PACKET_MULTICAST);
- break;
- case NFPROTO_NETDEV:
- switch (skb->protocol) {
- case htons(ETH_P_IP): {
- int noff = skb_network_offset(skb);
- struct iphdr *iph, _iph;
-
- iph = skb_header_pointer(skb, noff,
- sizeof(_iph), &_iph);
- if (!iph)
- goto err;
-
- if (ipv4_is_multicast(iph->daddr))
- nft_reg_store8(dest, PACKET_MULTICAST);
- else
- nft_reg_store8(dest, PACKET_BROADCAST);
-
- break;
- }
- case htons(ETH_P_IPV6):
- nft_reg_store8(dest, PACKET_MULTICAST);
- break;
- default:
- WARN_ON_ONCE(1);
- goto err;
- }
- break;
- default:
- WARN_ON_ONCE(1);
+ if (!nft_meta_get_eval_pkttype_lo(pkt, dest))
goto err;
- }
break;
case NFT_META_CPU:
*dest = raw_smp_processor_id();
break;
- case NFT_META_IIFGROUP:
- if (in == NULL)
- goto err;
- *dest = in->group;
- break;
- case NFT_META_OIFGROUP:
- if (out == NULL)
- goto err;
- *dest = out->group;
- break;
#ifdef CONFIG_CGROUP_NET_CLASSID
case NFT_META_CGROUP:
- sk = skb_to_full_sk(skb);
- if (!sk || !sk_fullsock(sk) ||
- !net_eq(nft_net(pkt), sock_net(sk)))
+ if (!nft_meta_get_eval_cgroup(dest, pkt))
goto err;
- *dest = sock_cgroup_classid(&sk->sk_cgrp_data);
break;
#endif
- case NFT_META_PRANDOM: {
- struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
- *dest = prandom_u32_state(state);
+ case NFT_META_PRANDOM:
+ *dest = nft_prandom_u32();
break;
- }
#ifdef CONFIG_XFRM
case NFT_META_SECPATH:
nft_reg_store8(dest, secpath_exists(skb));
break;
#endif
case NFT_META_IIFKIND:
- if (in == NULL || in->rtnl_link_ops == NULL)
- goto err;
- strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
- break;
case NFT_META_OIFKIND:
- if (out == NULL || out->rtnl_link_ops == NULL)
+ if (!nft_meta_get_eval_kind(priv->key, dest, pkt))
goto err;
- strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
break;
case NFT_META_TIME_NS:
- nft_reg_store64(dest, ktime_get_real_ns());
- break;
case NFT_META_TIME_DAY:
- nft_reg_store8(dest, nft_meta_weekday(ktime_get_real_seconds()));
- break;
case NFT_META_TIME_HOUR:
- *dest = nft_meta_hour(ktime_get_real_seconds());
+ nft_meta_get_eval_time(priv->key, dest);
+ break;
+ case NFT_META_SDIF:
+ *dest = nft_meta_get_eval_sdif(pkt);
+ break;
+ case NFT_META_SDIFNAME:
+ nft_meta_get_eval_sdifname(dest, pkt);
break;
default:
WARN_ON(1);
@@ -335,6 +488,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_MARK:
case NFT_META_IIF:
case NFT_META_OIF:
+ case NFT_META_SDIF:
case NFT_META_SKUID:
case NFT_META_SKGID:
#ifdef CONFIG_IP_ROUTE_CLASSID
@@ -356,6 +510,7 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
case NFT_META_OIFNAME:
case NFT_META_IIFKIND:
case NFT_META_OIFKIND:
+ case NFT_META_SDIFNAME:
len = IFNAMSIZ;
break;
case NFT_META_PRANDOM:
@@ -386,16 +541,28 @@ int nft_meta_get_init(const struct nft_ctx *ctx,
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
-static int nft_meta_get_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_meta_get_validate_sdif(const struct nft_ctx *ctx)
{
-#ifdef CONFIG_XFRM
- const struct nft_meta *priv = nft_expr_priv(expr);
unsigned int hooks;
- if (priv->key != NFT_META_SECPATH)
- return 0;
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
+static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx)
+{
+#ifdef CONFIG_XFRM
+ unsigned int hooks;
switch (ctx->family) {
case NFPROTO_NETDEV:
@@ -418,6 +585,25 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
#endif
}
+static int nft_meta_get_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+
+ switch (priv->key) {
+ case NFT_META_SECPATH:
+ return nft_meta_get_validate_xfrm(ctx);
+ case NFT_META_SDIF:
+ case NFT_META_SDIFNAME:
+ return nft_meta_get_validate_sdif(ctx);
+ default:
+ break;
+ }
+
+ return 0;
+}
+
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index c3c93e95b46e..8b44a4de5329 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -141,10 +141,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
switch (family) {
case NFPROTO_IPV4:
- alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip);
+ alen = sizeof_field(struct nf_nat_range, min_addr.ip);
break;
case NFPROTO_IPV6:
- alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6);
+ alen = sizeof_field(struct nf_nat_range, min_addr.ip6);
break;
default:
return -EAFNOSUPPORT;
@@ -171,7 +171,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
}
}
- plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
+ plen = sizeof_field(struct nf_nat_range, min_addr.all);
if (tb[NFTA_NAT_REG_PROTO_MIN]) {
priv->sreg_proto_min =
nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]);
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 4701fa8a45e7..89efcc5a533d 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -66,11 +66,21 @@ static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr
if (err < 0)
return err;
+ if (desc_from.type != NFT_DATA_VALUE) {
+ err = -EINVAL;
+ goto err1;
+ }
+
err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
&desc_to, tb[NFTA_RANGE_TO_DATA]);
if (err < 0)
goto err1;
+ if (desc_to.type != NFT_DATA_VALUE) {
+ err = -EINVAL;
+ goto err2;
+ }
+
if (desc_from.len != desc_to.len) {
err = -EINVAL;
goto err2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 43eeb1f609f1..5b779171565c 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx,
unsigned int plen;
int err;
- plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
+ plen = sizeof_field(struct nf_nat_range, min_addr.all);
if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
priv->sreg_proto_min =
nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 57123259452f..a9f804f7a04a 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -74,8 +74,13 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
parent = rcu_dereference_raw(parent->rb_left);
continue;
}
- if (nft_rbtree_interval_end(rbe))
- goto out;
+ if (nft_rbtree_interval_end(rbe)) {
+ if (nft_set_is_anonymous(set))
+ return false;
+ parent = rcu_dereference_raw(parent->rb_left);
+ interval = NULL;
+ continue;
+ }
*ext = &rbe->ext;
return true;
@@ -88,7 +93,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
*ext = &interval->ext;
return true;
}
-out:
+
return false;
}
@@ -139,8 +144,10 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
if (flags & NFT_SET_ELEM_INTERVAL_END)
interval = rbe;
} else {
- if (!nft_set_elem_active(&rbe->ext, genmask))
+ if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = rcu_dereference_raw(parent->rb_left);
+ continue;
+ }
if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) ||
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) ==
@@ -148,7 +155,11 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set,
*elem = rbe;
return true;
}
- return false;
+
+ if (nft_rbtree_interval_end(rbe))
+ interval = NULL;
+
+ parent = rcu_dereference_raw(parent->rb_left);
}
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index f92a82c73880..d67f83a0958d 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -50,7 +50,7 @@ static void nft_tproxy_eval_v4(const struct nft_expr *expr,
taddr = nf_tproxy_laddr4(skb, taddr, iph->daddr);
if (priv->sreg_port)
- tport = regs->data[priv->sreg_port];
+ tport = nft_reg_load16(&regs->data[priv->sreg_port]);
if (!tport)
tport = hp->dest;
@@ -117,7 +117,7 @@ static void nft_tproxy_eval_v6(const struct nft_expr *expr,
taddr = *nf_tproxy_laddr6(skb, &taddr, &iph->daddr);
if (priv->sreg_port)
- tport = regs->data[priv->sreg_port];
+ tport = nft_reg_load16(&regs->data[priv->sreg_port]);
if (!tport)
tport = hp->dest;
@@ -218,14 +218,14 @@ static int nft_tproxy_init(const struct nft_ctx *ctx,
switch (priv->family) {
case NFPROTO_IPV4:
- alen = FIELD_SIZEOF(union nf_inet_addr, in);
+ alen = sizeof_field(union nf_inet_addr, in);
err = nf_defrag_ipv4_enable(ctx->net);
if (err)
return err;
break;
#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
case NFPROTO_IPV6:
- alen = FIELD_SIZEOF(union nf_inet_addr, in6);
+ alen = sizeof_field(union nf_inet_addr, in6);
err = nf_defrag_ipv6_enable(ctx->net);
if (err)
return err;
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 3d4c2ae605a8..23cd163689d5 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -248,8 +248,9 @@ static int nft_tunnel_obj_vxlan_init(const struct nlattr *attr,
}
static const struct nla_policy nft_tunnel_opts_erspan_policy[NFTA_TUNNEL_KEY_ERSPAN_MAX + 1] = {
+ [NFTA_TUNNEL_KEY_ERSPAN_VERSION] = { .type = NLA_U32 },
[NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX] = { .type = NLA_U32 },
- [NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] = { .type = NLA_U8 },
+ [NFTA_TUNNEL_KEY_ERSPAN_V2_DIR] = { .type = NLA_U8 },
[NFTA_TUNNEL_KEY_ERSPAN_V2_HWID] = { .type = NLA_U8 },
};
@@ -442,10 +443,15 @@ static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info)
if (!nest)
return -1;
- if (nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_SRC, &info->key.u.ipv6.src) < 0 ||
- nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_DST, &info->key.u.ipv6.dst) < 0 ||
- nla_put_be32(skb, NFTA_TUNNEL_KEY_IP6_FLOWLABEL, info->key.label))
+ if (nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_SRC,
+ &info->key.u.ipv6.src) < 0 ||
+ nla_put_in6_addr(skb, NFTA_TUNNEL_KEY_IP6_DST,
+ &info->key.u.ipv6.dst) < 0 ||
+ nla_put_be32(skb, NFTA_TUNNEL_KEY_IP6_FLOWLABEL,
+ info->key.label)) {
+ nla_nest_cancel(skb, nest);
return -1;
+ }
nla_nest_end(skb, nest);
} else {
@@ -453,9 +459,13 @@ static int nft_tunnel_ip_dump(struct sk_buff *skb, struct ip_tunnel_info *info)
if (!nest)
return -1;
- if (nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_SRC, info->key.u.ipv4.src) < 0 ||
- nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_DST, info->key.u.ipv4.dst) < 0)
+ if (nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_SRC,
+ info->key.u.ipv4.src) < 0 ||
+ nla_put_in_addr(skb, NFTA_TUNNEL_KEY_IP_DST,
+ info->key.u.ipv4.dst) < 0) {
+ nla_nest_cancel(skb, nest);
return -1;
+ }
nla_nest_end(skb, nest);
}
@@ -467,42 +477,58 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
struct nft_tunnel_obj *priv)
{
struct nft_tunnel_opts *opts = &priv->opts;
- struct nlattr *nest;
+ struct nlattr *nest, *inner;
nest = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS);
if (!nest)
return -1;
if (opts->flags & TUNNEL_VXLAN_OPT) {
+ inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_VXLAN);
+ if (!inner)
+ goto failure;
if (nla_put_be32(skb, NFTA_TUNNEL_KEY_VXLAN_GBP,
htonl(opts->u.vxlan.gbp)))
- return -1;
+ goto inner_failure;
+ nla_nest_end(skb, inner);
} else if (opts->flags & TUNNEL_ERSPAN_OPT) {
+ inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_ERSPAN);
+ if (!inner)
+ goto failure;
+ if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_VERSION,
+ htonl(opts->u.erspan.version)))
+ goto inner_failure;
switch (opts->u.erspan.version) {
case ERSPAN_VERSION:
if (nla_put_be32(skb, NFTA_TUNNEL_KEY_ERSPAN_V1_INDEX,
opts->u.erspan.u.index))
- return -1;
+ goto inner_failure;
break;
case ERSPAN_VERSION2:
if (nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_HWID,
get_hwid(&opts->u.erspan.u.md2)) ||
nla_put_u8(skb, NFTA_TUNNEL_KEY_ERSPAN_V2_DIR,
opts->u.erspan.u.md2.dir))
- return -1;
+ goto inner_failure;
break;
}
+ nla_nest_end(skb, inner);
}
nla_nest_end(skb, nest);
-
return 0;
+
+inner_failure:
+ nla_nest_cancel(skb, inner);
+failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
}
static int nft_tunnel_ports_dump(struct sk_buff *skb,
struct ip_tunnel_info *info)
{
- if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, htons(info->key.tp_src)) < 0 ||
- nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, htons(info->key.tp_dst)) < 0)
+ if (nla_put_be16(skb, NFTA_TUNNEL_KEY_SPORT, info->key.tp_src) < 0 ||
+ nla_put_be16(skb, NFTA_TUNNEL_KEY_DPORT, info->key.tp_dst) < 0)
return -1;
return 0;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 2236455b10a3..37253d399c6b 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -30,7 +30,7 @@ static unsigned int jhash_rnd __read_mostly;
static unsigned int xt_rateest_hash(const char *name)
{
- return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) &
+ return jhash(name, sizeof_field(struct xt_rateest, name), jhash_rnd) &
(RATEEST_HSIZE - 1);
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 90b2ab9dd449..4e31721e7293 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2755,7 +2755,7 @@ static int __init netlink_proto_init(void)
if (err != 0)
goto out;
- BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb));
nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
if (!nl_table)
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index 78fe622eba65..11b554ce07ff 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -346,7 +346,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
nu->rx_packet_len = -1;
nu->rx_skb = nci_skb_alloc(nu->ndev,
NCI_MAX_PACKET_SIZE,
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!nu->rx_skb)
return -ENOMEM;
}
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 4c8395462303..7fbfe2adfffa 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -161,16 +161,17 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
const struct nlattr *attr, int len);
static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_action_push_mpls *mpls)
+ __be32 mpls_lse, __be16 mpls_ethertype, __u16 mac_len)
{
int err;
- err = skb_mpls_push(skb, mpls->mpls_lse, mpls->mpls_ethertype,
- skb->mac_len,
- ovs_key_mac_proto(key) == MAC_PROTO_ETHERNET);
+ err = skb_mpls_push(skb, mpls_lse, mpls_ethertype, mac_len, !!mac_len);
if (err)
return err;
+ if (!mac_len)
+ key->mac_proto = MAC_PROTO_NONE;
+
invalidate_flow_key(key);
return 0;
}
@@ -185,6 +186,9 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
if (err)
return err;
+ if (ethertype == htons(ETH_P_TEB))
+ key->mac_proto = MAC_PROTO_ETHERNET;
+
invalidate_flow_key(key);
return 0;
}
@@ -1229,10 +1233,24 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
execute_hash(skb, key, a);
break;
- case OVS_ACTION_ATTR_PUSH_MPLS:
- err = push_mpls(skb, key, nla_data(a));
+ case OVS_ACTION_ATTR_PUSH_MPLS: {
+ struct ovs_action_push_mpls *mpls = nla_data(a);
+
+ err = push_mpls(skb, key, mpls->mpls_lse,
+ mpls->mpls_ethertype, skb->mac_len);
break;
+ }
+ case OVS_ACTION_ATTR_ADD_MPLS: {
+ struct ovs_action_add_mpls *mpls = nla_data(a);
+ __u16 mac_len = 0;
+
+ if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK)
+ mac_len = skb->mac_len;
+ err = push_mpls(skb, key, mpls->mpls_lse,
+ mpls->mpls_ethertype, mac_len);
+ break;
+ }
case OVS_ACTION_ATTR_POP_MPLS:
err = pop_mpls(skb, key, nla_get_be16(a));
break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 1047e8043084..e3a37d22539c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -2497,7 +2497,7 @@ static int __init dp_init(void)
{
int err;
- BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof_field(struct sk_buff, cb));
pr_info("Open vSwitch switching datapath\n");
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index fd8ed766bdd1..758a8c77f736 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -37,7 +37,7 @@ enum sw_flow_mac_proto {
* matching for small options.
*/
#define TUN_METADATA_OFFSET(opt_len) \
- (FIELD_SIZEOF(struct sw_flow_key, tun_opts) - opt_len)
+ (sizeof_field(struct sw_flow_key, tun_opts) - opt_len)
#define TUN_METADATA_OPTS(flow_key, opt_len) \
((void *)((flow_key)->tun_opts + TUN_METADATA_OFFSET(opt_len)))
@@ -52,7 +52,7 @@ struct vlan_head {
#define OVS_SW_FLOW_KEY_METADATA_SIZE \
(offsetof(struct sw_flow_key, recirc_id) + \
- FIELD_SIZEOF(struct sw_flow_key, recirc_id))
+ sizeof_field(struct sw_flow_key, recirc_id))
struct ovs_key_nsh {
struct ovs_nsh_key_base base;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 65c2e3458ff5..7da4230627f5 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -79,6 +79,7 @@ static bool actions_may_change_flow(const struct nlattr *actions)
case OVS_ACTION_ATTR_SET_MASKED:
case OVS_ACTION_ATTR_METER:
case OVS_ACTION_ATTR_CHECK_PKT_LEN:
+ case OVS_ACTION_ATTR_ADD_MPLS:
default:
return true;
}
@@ -3005,6 +3006,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
[OVS_ACTION_ATTR_METER] = sizeof(u32),
[OVS_ACTION_ATTR_CLONE] = (u32)-1,
[OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1,
+ [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls),
};
const struct ovs_action_push_vlan *vlan;
int type = nla_type(a);
@@ -3072,6 +3074,33 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
case OVS_ACTION_ATTR_RECIRC:
break;
+ case OVS_ACTION_ATTR_ADD_MPLS: {
+ const struct ovs_action_add_mpls *mpls = nla_data(a);
+
+ if (!eth_p_mpls(mpls->mpls_ethertype))
+ return -EINVAL;
+
+ if (mpls->tun_flags & OVS_MPLS_L3_TUNNEL_FLAG_MASK) {
+ if (vlan_tci & htons(VLAN_CFI_MASK) ||
+ (eth_type != htons(ETH_P_IP) &&
+ eth_type != htons(ETH_P_IPV6) &&
+ eth_type != htons(ETH_P_ARP) &&
+ eth_type != htons(ETH_P_RARP) &&
+ !eth_p_mpls(eth_type)))
+ return -EINVAL;
+ mpls_label_count++;
+ } else {
+ if (mac_proto == MAC_PROTO_ETHERNET) {
+ mpls_label_count = 1;
+ mac_proto = MAC_PROTO_NONE;
+ } else {
+ mpls_label_count++;
+ }
+ }
+ eth_type = mpls->mpls_ethertype;
+ break;
+ }
+
case OVS_ACTION_ATTR_PUSH_MPLS: {
const struct ovs_action_push_mpls *mpls = nla_data(a);
@@ -3109,6 +3138,11 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
* recirculation.
*/
proto = nla_get_be16(a);
+
+ if (proto == htons(ETH_P_TEB) &&
+ mac_proto != MAC_PROTO_NONE)
+ return -EINVAL;
+
mpls_label_count--;
if (!eth_p_mpls(proto) || !mpls_label_count)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 53c1d41fb1c9..3bec515ccde3 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -520,7 +520,7 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
int blk_size_in_bytes)
{
struct net_device *dev;
- unsigned int mbits = 0, msec = 0, div = 0, tmo = 0;
+ unsigned int mbits, div;
struct ethtool_link_ksettings ecmd;
int err;
@@ -532,30 +532,25 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po,
}
err = __ethtool_get_link_ksettings(dev, &ecmd);
rtnl_unlock();
- if (!err) {
- /*
- * If the link speed is so slow you don't really
- * need to worry about perf anyways
- */
- if (ecmd.base.speed < SPEED_1000 ||
- ecmd.base.speed == SPEED_UNKNOWN) {
- return DEFAULT_PRB_RETIRE_TOV;
- } else {
- msec = 1;
- div = ecmd.base.speed / 1000;
- }
- }
+ if (err)
+ return DEFAULT_PRB_RETIRE_TOV;
+ /* If the link speed is so slow you don't really
+ * need to worry about perf anyways
+ */
+ if (ecmd.base.speed < SPEED_1000 ||
+ ecmd.base.speed == SPEED_UNKNOWN)
+ return DEFAULT_PRB_RETIRE_TOV;
+
+ div = ecmd.base.speed / 1000;
mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
if (div)
mbits /= div;
- tmo = mbits * msec;
-
if (div)
- return tmo+1;
- return tmo;
+ return mbits + 1;
+ return mbits;
}
static void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 49bd76a87461..ac0fae06cc15 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -35,8 +35,6 @@ static unsigned int phonet_net_id __read_mostly;
static struct phonet_net *phonet_pernet(struct net *net)
{
- BUG_ON(!net);
-
return net_generic(net, phonet_net_id);
}
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 88f98f27ad88..3d24d45be5f4 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -196,7 +196,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb,
hdr->size = cpu_to_le32(len);
hdr->confirm_rx = 0;
- skb_put_padto(skb, ALIGN(len, 4));
+ skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr));
mutex_lock(&node->ep_lock);
if (node->ep)
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 461d75274fb3..971c73c7d34c 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1002,10 +1002,13 @@ static void rfkill_sync_work(struct work_struct *work)
int __must_check rfkill_register(struct rfkill *rfkill)
{
static unsigned long rfkill_no;
- struct device *dev = &rfkill->dev;
+ struct device *dev;
int error;
- BUG_ON(!rfkill);
+ if (!rfkill)
+ return -EINVAL;
+
+ dev = &rfkill->dev;
mutex_lock(&rfkill_global_mutex);
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index c53307623236..5277631fa14c 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -696,7 +696,6 @@ struct rose_neigh *rose_get_neigh(rose_address *addr, unsigned char *cause,
for (i = 0; i < node->count; i++) {
if (!rose_ftimer_running(node->neighbour[i])) {
res = node->neighbour[i];
- failed = 0;
goto out;
}
failed = 1;
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index d72ddb67bb74..9d3c4d2d893a 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -972,7 +972,7 @@ static int __init af_rxrpc_init(void)
int ret = -1;
unsigned int tmp;
- BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb));
get_random_bytes(&tmp, sizeof(tmp));
tmp &= 0x3fffffff;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7c7d10f2e0c1..5e99df80e80a 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -209,6 +209,7 @@ struct rxrpc_skb_priv {
struct rxrpc_security {
const char *name; /* name of this service */
u8 security_index; /* security type provided */
+ u32 no_key_abort; /* Abort code indicating no key */
/* Initialise a security service */
int (*init)(void);
@@ -977,8 +978,9 @@ static inline void rxrpc_reduce_conn_timer(struct rxrpc_connection *conn,
struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
struct sk_buff *);
struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *, gfp_t);
-void rxrpc_new_incoming_connection(struct rxrpc_sock *,
- struct rxrpc_connection *, struct sk_buff *);
+void rxrpc_new_incoming_connection(struct rxrpc_sock *, struct rxrpc_connection *,
+ const struct rxrpc_security *, struct key *,
+ struct sk_buff *);
void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
/*
@@ -1103,7 +1105,9 @@ extern const struct rxrpc_security rxkad;
int __init rxrpc_init_security(void);
void rxrpc_exit_security(void);
int rxrpc_init_client_conn_security(struct rxrpc_connection *);
-int rxrpc_init_server_conn_security(struct rxrpc_connection *);
+bool rxrpc_look_up_server_security(struct rxrpc_local *, struct rxrpc_sock *,
+ const struct rxrpc_security **, struct key **,
+ struct sk_buff *);
/*
* sendmsg.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 135bf5cd8dd5..70e44abf106c 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -240,6 +240,22 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
}
/*
+ * Ping the other end to fill our RTT cache and to retrieve the rwind
+ * and MTU parameters.
+ */
+static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ ktime_t now = skb->tstamp;
+
+ if (call->peer->rtt_usage < 3 ||
+ ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
+ true, true,
+ rxrpc_propose_ack_ping_for_params);
+}
+
+/*
* Allocate a new incoming call from the prealloc pool, along with a connection
* and a peer as necessary.
*/
@@ -247,6 +263,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
struct rxrpc_local *local,
struct rxrpc_peer *peer,
struct rxrpc_connection *conn,
+ const struct rxrpc_security *sec,
+ struct key *key,
struct sk_buff *skb)
{
struct rxrpc_backlog *b = rx->backlog;
@@ -294,7 +312,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
conn->params.local = rxrpc_get_local(local);
conn->params.peer = peer;
rxrpc_see_connection(conn);
- rxrpc_new_incoming_connection(rx, conn, skb);
+ rxrpc_new_incoming_connection(rx, conn, sec, key, skb);
} else {
rxrpc_get_connection(conn);
}
@@ -333,9 +351,11 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ const struct rxrpc_security *sec = NULL;
struct rxrpc_connection *conn;
struct rxrpc_peer *peer = NULL;
- struct rxrpc_call *call;
+ struct rxrpc_call *call = NULL;
+ struct key *key = NULL;
_enter("");
@@ -346,9 +366,7 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
skb->priority = RX_INVALID_OPERATION;
- _leave(" = NULL [close]");
- call = NULL;
- goto out;
+ goto no_call;
}
/* The peer, connection and call may all have sprung into existence due
@@ -358,29 +376,19 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
*/
conn = rxrpc_find_connection_rcu(local, skb, &peer);
- call = rxrpc_alloc_incoming_call(rx, local, peer, conn, skb);
+ if (!conn && !rxrpc_look_up_server_security(local, rx, &sec, &key, skb))
+ goto no_call;
+
+ call = rxrpc_alloc_incoming_call(rx, local, peer, conn, sec, key, skb);
+ key_put(key);
if (!call) {
skb->mark = RXRPC_SKB_MARK_REJECT_BUSY;
- _leave(" = NULL [busy]");
- call = NULL;
- goto out;
+ goto no_call;
}
trace_rxrpc_receive(call, rxrpc_receive_incoming,
sp->hdr.serial, sp->hdr.seq);
- /* Lock the call to prevent rxrpc_kernel_send/recv_data() and
- * sendmsg()/recvmsg() inconveniently stealing the mutex once the
- * notification is generated.
- *
- * The BUG should never happen because the kernel should be well
- * behaved enough not to access the call before the first notification
- * event and userspace is prevented from doing so until the state is
- * appropriate.
- */
- if (!mutex_trylock(&call->user_mutex))
- BUG();
-
/* Make the call live. */
rxrpc_incoming_call(rx, call, skb);
conn = call->conn;
@@ -421,6 +429,9 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
BUG();
}
spin_unlock(&conn->state_lock);
+ spin_unlock(&rx->incoming_lock);
+
+ rxrpc_send_ping(call, skb);
if (call->state == RXRPC_CALL_SERVER_ACCEPTING)
rxrpc_notify_socket(call);
@@ -433,9 +444,12 @@ struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %p{%d}", call, call->debug_id);
-out:
- spin_unlock(&rx->incoming_lock);
return call;
+
+no_call:
+ spin_unlock(&rx->incoming_lock);
+ _leave(" = NULL [%u]", skb->mark);
+ return NULL;
}
/*
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index a1ceef4f5cd0..808a4723f868 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -376,21 +376,7 @@ static void rxrpc_secure_connection(struct rxrpc_connection *conn)
_enter("{%d}", conn->debug_id);
ASSERT(conn->security_ix != 0);
-
- if (!conn->params.key) {
- _debug("set up security");
- ret = rxrpc_init_server_conn_security(conn);
- switch (ret) {
- case 0:
- break;
- case -ENOENT:
- abort_code = RX_CALL_DEAD;
- goto abort;
- default:
- abort_code = RXKADNOAUTH;
- goto abort;
- }
- }
+ ASSERT(conn->server_key);
if (conn->security->issue_challenge(conn) < 0) {
abort_code = RX_CALL_DEAD;
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 123d6ceab15c..21da48e3d2e5 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -148,6 +148,8 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
*/
void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
struct rxrpc_connection *conn,
+ const struct rxrpc_security *sec,
+ struct key *key,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -160,6 +162,8 @@ void rxrpc_new_incoming_connection(struct rxrpc_sock *rx,
conn->service_id = sp->hdr.serviceId;
conn->security_ix = sp->hdr.securityIndex;
conn->out_clientflag = 0;
+ conn->security = sec;
+ conn->server_key = key_get(key);
if (conn->security_ix)
conn->state = RXRPC_CONN_SERVICE_UNSECURED;
else
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 157be1ff8697..86bd133b4fa0 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -193,22 +193,6 @@ send_extra_data:
}
/*
- * Ping the other end to fill our RTT cache and to retrieve the rwind
- * and MTU parameters.
- */
-static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- ktime_t now = skb->tstamp;
-
- if (call->peer->rtt_usage < 3 ||
- ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
- rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial,
- true, true,
- rxrpc_propose_ack_ping_for_params);
-}
-
-/*
* Apply a hard ACK by advancing the Tx window.
*/
static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
@@ -1396,8 +1380,6 @@ int rxrpc_input_packet(struct sock *udp_sk, struct sk_buff *skb)
call = rxrpc_new_incoming_call(local, rx, skb);
if (!call)
goto reject_packet;
- rxrpc_send_ping(call, skb);
- mutex_unlock(&call->user_mutex);
}
/* Process a call packet; this either discards or passes on the ref
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 8d8aa3c230b5..098f1f9ec53b 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -648,9 +648,9 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
u32 serial;
int ret;
- _enter("{%d,%x}", conn->debug_id, key_serial(conn->params.key));
+ _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
- ret = key_validate(conn->params.key);
+ ret = key_validate(conn->server_key);
if (ret < 0)
return ret;
@@ -1293,6 +1293,7 @@ static void rxkad_exit(void)
const struct rxrpc_security rxkad = {
.name = "rxkad",
.security_index = RXRPC_SECURITY_RXKAD,
+ .no_key_abort = RXKADUNKNOWNKEY,
.init = rxkad_init,
.exit = rxkad_exit,
.init_connection_security = rxkad_init_connection_security,
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index a4c47d2b7054..9b1fb9ed0717 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -101,62 +101,58 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
}
/*
- * initialise the security on a server connection
+ * Find the security key for a server connection.
*/
-int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
+bool rxrpc_look_up_server_security(struct rxrpc_local *local, struct rxrpc_sock *rx,
+ const struct rxrpc_security **_sec,
+ struct key **_key,
+ struct sk_buff *skb)
{
const struct rxrpc_security *sec;
- struct rxrpc_local *local = conn->params.local;
- struct rxrpc_sock *rx;
- struct key *key;
- key_ref_t kref;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ key_ref_t kref = NULL;
char kdesc[5 + 1 + 3 + 1];
_enter("");
- sprintf(kdesc, "%u:%u", conn->service_id, conn->security_ix);
+ sprintf(kdesc, "%u:%u", sp->hdr.serviceId, sp->hdr.securityIndex);
- sec = rxrpc_security_lookup(conn->security_ix);
+ sec = rxrpc_security_lookup(sp->hdr.securityIndex);
if (!sec) {
- _leave(" = -ENOKEY [lookup]");
- return -ENOKEY;
+ trace_rxrpc_abort(0, "SVS",
+ sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_INVALID_OPERATION, EKEYREJECTED);
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+ skb->priority = RX_INVALID_OPERATION;
+ return false;
}
- /* find the service */
- read_lock(&local->services_lock);
- rx = rcu_dereference_protected(local->service,
- lockdep_is_held(&local->services_lock));
- if (rx && (rx->srx.srx_service == conn->service_id ||
- rx->second_service == conn->service_id))
- goto found_service;
+ if (sp->hdr.securityIndex == RXRPC_SECURITY_NONE)
+ goto out;
- /* the service appears to have died */
- read_unlock(&local->services_lock);
- _leave(" = -ENOENT");
- return -ENOENT;
-
-found_service:
if (!rx->securities) {
- read_unlock(&local->services_lock);
- _leave(" = -ENOKEY");
- return -ENOKEY;
+ trace_rxrpc_abort(0, "SVR",
+ sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_INVALID_OPERATION, EKEYREJECTED);
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+ skb->priority = RX_INVALID_OPERATION;
+ return false;
}
/* look through the service's keyring */
kref = keyring_search(make_key_ref(rx->securities, 1UL),
&key_type_rxrpc_s, kdesc, true);
if (IS_ERR(kref)) {
- read_unlock(&local->services_lock);
- _leave(" = %ld [search]", PTR_ERR(kref));
- return PTR_ERR(kref);
+ trace_rxrpc_abort(0, "SVK",
+ sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ sec->no_key_abort, EKEYREJECTED);
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+ skb->priority = sec->no_key_abort;
+ return false;
}
- key = key_ref_to_ptr(kref);
- read_unlock(&local->services_lock);
-
- conn->server_key = key;
- conn->security = sec;
-
- _leave(" = 0");
- return 0;
+out:
+ *_sec = sec;
+ *_key = key_ref_to_ptr(kref);
+ return true;
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2985509147a2..b1e7ec726958 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -409,6 +409,23 @@ config NET_SCH_PLUG
To compile this code as a module, choose M here: the
module will be called sch_plug.
+config NET_SCH_ETS
+ tristate "Enhanced transmission selection scheduler (ETS)"
+ help
+ The Enhanced Transmission Selection scheduler is a classful
+ queuing discipline that merges functionality of PRIO and DRR
+ qdiscs in one scheduler. ETS makes it easy to configure a set of
+ strict and bandwidth-sharing bands to implement the transmission
+ selection described in 802.1Qaz.
+
+ Say Y here if you want to use the ETS packet scheduling
+ algorithm.
+
+ To compile this driver as a module, choose M here: the module
+ will be called sch_ets.
+
+ If unsure, say N.
+
menuconfig NET_SCH_DEFAULT
bool "Allow override default queue discipline"
---help---
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 415d1e1f237e..bc8856b865ff 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
+obj-$(CONFIG_NET_SCH_ETS) += sch_ets.o
obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o
obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index bf2d69335d4b..f685c0d73708 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -312,7 +312,7 @@ static void tcf_ct_act_set_labels(struct nf_conn *ct,
u32 *labels_m)
{
#if IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS)
- size_t labels_sz = FIELD_SIZEOF(struct tcf_ct_params, labels);
+ size_t labels_sz = sizeof_field(struct tcf_ct_params, labels);
if (!memchr_inv(labels_m, 0, labels_sz))
return;
@@ -936,7 +936,7 @@ static struct tc_action_ops act_ct_ops = {
static __net_init int ct_init_net(struct net *net)
{
- unsigned int n_bits = FIELD_SIZEOF(struct tcf_ct_params, labels) * 8;
+ unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8;
struct tc_ct_action_net *tn = net_generic(net, ct_net_id);
if (nf_connlabels_get(net, n_bits - 1)) {
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 1e3eb3a97532..1ad300e6dbc0 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -219,8 +219,10 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
bool use_reinsert;
bool want_ingress;
bool is_redirect;
+ bool expects_nh;
int m_eaction;
int mac_len;
+ bool at_nh;
rec_level = __this_cpu_inc_return(mirred_rec_level);
if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
@@ -261,19 +263,19 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
goto out;
}
- /* If action's target direction differs than filter's direction,
- * and devices expect a mac header on xmit, then mac push/pull is
- * needed.
- */
want_ingress = tcf_mirred_act_wants_ingress(m_eaction);
- if (skb_at_tc_ingress(skb) != want_ingress && m_mac_header_xmit) {
- if (!skb_at_tc_ingress(skb)) {
- /* caught at egress, act ingress: pull mac */
- mac_len = skb_network_header(skb) - skb_mac_header(skb);
+
+ expects_nh = want_ingress || !m_mac_header_xmit;
+ at_nh = skb->data == skb_network_header(skb);
+ if (at_nh != expects_nh) {
+ mac_len = skb_at_tc_ingress(skb) ? skb->mac_len :
+ skb_network_header(skb) - skb_mac_header(skb);
+ if (expects_nh) {
+ /* target device/action expect data at nh */
skb_pull_rcsum(skb2, mac_len);
} else {
- /* caught at ingress, act egress: push mac */
- skb_push_rcsum(skb2, skb->mac_len);
+ /* target device/action expect data at mac */
+ skb_push_rcsum(skb2, mac_len);
}
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6a0eacafdb19..76e0d122616a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -308,33 +308,12 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
tcf_proto_destroy(tp, rtnl_held, true, extack);
}
-static int walker_check_empty(struct tcf_proto *tp, void *fh,
- struct tcf_walker *arg)
+static bool tcf_proto_check_delete(struct tcf_proto *tp)
{
- if (fh) {
- arg->nonempty = true;
- return -1;
- }
- return 0;
-}
-
-static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
-{
- struct tcf_walker walker = { .fn = walker_check_empty, };
-
- if (tp->ops->walk) {
- tp->ops->walk(tp, &walker, rtnl_held);
- return !walker.nonempty;
- }
- return true;
-}
+ if (tp->ops->delete_empty)
+ return tp->ops->delete_empty(tp);
-static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
-{
- spin_lock(&tp->lock);
- if (tcf_proto_is_empty(tp, rtnl_held))
- tp->deleting = true;
- spin_unlock(&tp->lock);
+ tp->deleting = true;
return tp->deleting;
}
@@ -1751,7 +1730,7 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
* concurrently.
* Mark tp for deletion if it is empty.
*/
- if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
+ if (!tp_iter || !tcf_proto_check_delete(tp)) {
mutex_unlock(&chain->filter_chain_lock);
return;
}
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 6c68971d99df..b0f42e62dd76 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -1481,7 +1481,7 @@ static int fl_init_mask_hashtable(struct fl_flow_mask *mask)
}
#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
-#define FL_KEY_MEMBER_SIZE(member) FIELD_SIZEOF(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) sizeof_field(struct fl_flow_key, member)
#define FL_KEY_IS_MASKED(mask, member) \
memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \
@@ -2773,6 +2773,17 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl)
f->res.class = cl;
}
+static bool fl_delete_empty(struct tcf_proto *tp)
+{
+ struct cls_fl_head *head = fl_head_dereference(tp);
+
+ spin_lock(&tp->lock);
+ tp->deleting = idr_is_empty(&head->handle_idr);
+ spin_unlock(&tp->lock);
+
+ return tp->deleting;
+}
+
static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.kind = "flower",
.classify = fl_classify,
@@ -2782,6 +2793,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.put = fl_put,
.change = fl_change,
.delete = fl_delete,
+ .delete_empty = fl_delete_empty,
.walk = fl_walk,
.reoffload = fl_reoffload,
.hw_add = fl_hw_add,
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index e0f40400f679..90ef7cc79b69 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -173,8 +173,7 @@ struct cake_tin_data {
u64 tin_rate_bps;
u16 tin_rate_shft;
- u16 tin_quantum_prio;
- u16 tin_quantum_band;
+ u16 tin_quantum;
s32 tin_deficit;
u32 tin_backlog;
u32 tin_dropped;
@@ -1769,7 +1768,7 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
q->avg_window_begin));
u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
- do_div(b, window_interval);
+ b = div64_u64(b, window_interval);
q->avg_peak_bandwidth =
cake_ewma(q->avg_peak_bandwidth, b,
b > q->avg_peak_bandwidth ? 2 : 8);
@@ -1919,7 +1918,7 @@ begin:
while (b->tin_deficit < 0 ||
!(b->sparse_flow_count + b->bulk_flow_count)) {
if (b->tin_deficit <= 0)
- b->tin_deficit += b->tin_quantum_band;
+ b->tin_deficit += b->tin_quantum;
if (b->sparse_flow_count + b->bulk_flow_count)
empty = false;
@@ -2241,8 +2240,7 @@ static int cake_config_besteffort(struct Qdisc *sch)
cake_set_rate(b, rate, mtu,
us_to_ns(q->target), us_to_ns(q->interval));
- b->tin_quantum_band = 65535;
- b->tin_quantum_prio = 65535;
+ b->tin_quantum = 65535;
return 0;
}
@@ -2253,8 +2251,7 @@ static int cake_config_precedence(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
u64 rate = q->rate_bps;
- u32 quantum1 = 256;
- u32 quantum2 = 256;
+ u32 quantum = 256;
u32 i;
q->tin_cnt = 8;
@@ -2267,18 +2264,14 @@ static int cake_config_precedence(struct Qdisc *sch)
cake_set_rate(b, rate, mtu, us_to_ns(q->target),
us_to_ns(q->interval));
- b->tin_quantum_prio = max_t(u16, 1U, quantum1);
- b->tin_quantum_band = max_t(u16, 1U, quantum2);
+ b->tin_quantum = max_t(u16, 1U, quantum);
/* calculate next class's parameters */
rate *= 7;
rate >>= 3;
- quantum1 *= 3;
- quantum1 >>= 1;
-
- quantum2 *= 7;
- quantum2 >>= 3;
+ quantum *= 7;
+ quantum >>= 3;
}
return 0;
@@ -2347,8 +2340,7 @@ static int cake_config_diffserv8(struct Qdisc *sch)
struct cake_sched_data *q = qdisc_priv(sch);
u32 mtu = psched_mtu(qdisc_dev(sch));
u64 rate = q->rate_bps;
- u32 quantum1 = 256;
- u32 quantum2 = 256;
+ u32 quantum = 256;
u32 i;
q->tin_cnt = 8;
@@ -2364,18 +2356,14 @@ static int cake_config_diffserv8(struct Qdisc *sch)
cake_set_rate(b, rate, mtu, us_to_ns(q->target),
us_to_ns(q->interval));
- b->tin_quantum_prio = max_t(u16, 1U, quantum1);
- b->tin_quantum_band = max_t(u16, 1U, quantum2);
+ b->tin_quantum = max_t(u16, 1U, quantum);
/* calculate next class's parameters */
rate *= 7;
rate >>= 3;
- quantum1 *= 3;
- quantum1 >>= 1;
-
- quantum2 *= 7;
- quantum2 >>= 3;
+ quantum *= 7;
+ quantum >>= 3;
}
return 0;
@@ -2414,17 +2402,11 @@ static int cake_config_diffserv4(struct Qdisc *sch)
cake_set_rate(&q->tins[3], rate >> 2, mtu,
us_to_ns(q->target), us_to_ns(q->interval));
- /* priority weights */
- q->tins[0].tin_quantum_prio = quantum;
- q->tins[1].tin_quantum_prio = quantum >> 4;
- q->tins[2].tin_quantum_prio = quantum << 2;
- q->tins[3].tin_quantum_prio = quantum << 4;
-
/* bandwidth-sharing weights */
- q->tins[0].tin_quantum_band = quantum;
- q->tins[1].tin_quantum_band = quantum >> 4;
- q->tins[2].tin_quantum_band = quantum >> 1;
- q->tins[3].tin_quantum_band = quantum >> 2;
+ q->tins[0].tin_quantum = quantum;
+ q->tins[1].tin_quantum = quantum >> 4;
+ q->tins[2].tin_quantum = quantum >> 1;
+ q->tins[3].tin_quantum = quantum >> 2;
return 0;
}
@@ -2455,15 +2437,10 @@ static int cake_config_diffserv3(struct Qdisc *sch)
cake_set_rate(&q->tins[2], rate >> 2, mtu,
us_to_ns(q->target), us_to_ns(q->interval));
- /* priority weights */
- q->tins[0].tin_quantum_prio = quantum;
- q->tins[1].tin_quantum_prio = quantum >> 4;
- q->tins[2].tin_quantum_prio = quantum << 4;
-
/* bandwidth-sharing weights */
- q->tins[0].tin_quantum_band = quantum;
- q->tins[1].tin_quantum_band = quantum >> 4;
- q->tins[2].tin_quantum_band = quantum >> 2;
+ q->tins[0].tin_quantum = quantum;
+ q->tins[1].tin_quantum = quantum >> 4;
+ q->tins[2].tin_quantum = quantum >> 2;
return 0;
}
diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c
new file mode 100644
index 000000000000..a87e9159338c
--- /dev/null
+++ b/net/sched/sch_ets.c
@@ -0,0 +1,828 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * net/sched/sch_ets.c Enhanced Transmission Selection scheduler
+ *
+ * Description
+ * -----------
+ *
+ * The Enhanced Transmission Selection scheduler is a classful queuing
+ * discipline that merges functionality of PRIO and DRR qdiscs in one scheduler.
+ * ETS makes it easy to configure a set of strict and bandwidth-sharing bands to
+ * implement the transmission selection described in 802.1Qaz.
+ *
+ * Although ETS is technically classful, it's not possible to add and remove
+ * classes at will. Instead one specifies number of classes, how many are
+ * PRIO-like and how many DRR-like, and quanta for the latter.
+ *
+ * Algorithm
+ * ---------
+ *
+ * The strict classes, if any, are tried for traffic first: first band 0, if it
+ * has no traffic then band 1, etc.
+ *
+ * When there is no traffic in any of the strict queues, the bandwidth-sharing
+ * ones are tried next. Each band is assigned a deficit counter, initialized to
+ * "quantum" of that band. ETS maintains a list of active bandwidth-sharing
+ * bands whose qdiscs are non-empty. A packet is dequeued from the band at the
+ * head of the list if the packet size is smaller or equal to the deficit
+ * counter. If the counter is too small, it is increased by "quantum" and the
+ * scheduler moves on to the next band in the active list.
+ */
+
+#include <linux/module.h>
+#include <net/gen_stats.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct ets_class {
+ struct list_head alist; /* In struct ets_sched.active. */
+ struct Qdisc *qdisc;
+ u32 quantum;
+ u32 deficit;
+ struct gnet_stats_basic_packed bstats;
+ struct gnet_stats_queue qstats;
+};
+
+struct ets_sched {
+ struct list_head active;
+ struct tcf_proto __rcu *filter_list;
+ struct tcf_block *block;
+ unsigned int nbands;
+ unsigned int nstrict;
+ u8 prio2band[TC_PRIO_MAX + 1];
+ struct ets_class classes[TCQ_ETS_MAX_BANDS];
+};
+
+static const struct nla_policy ets_policy[TCA_ETS_MAX + 1] = {
+ [TCA_ETS_NBANDS] = { .type = NLA_U8 },
+ [TCA_ETS_NSTRICT] = { .type = NLA_U8 },
+ [TCA_ETS_QUANTA] = { .type = NLA_NESTED },
+ [TCA_ETS_PRIOMAP] = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ets_priomap_policy[TCA_ETS_MAX + 1] = {
+ [TCA_ETS_PRIOMAP_BAND] = { .type = NLA_U8 },
+};
+
+static const struct nla_policy ets_quanta_policy[TCA_ETS_MAX + 1] = {
+ [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy ets_class_policy[TCA_ETS_MAX + 1] = {
+ [TCA_ETS_QUANTA_BAND] = { .type = NLA_U32 },
+};
+
+static int ets_quantum_parse(struct Qdisc *sch, const struct nlattr *attr,
+ unsigned int *quantum,
+ struct netlink_ext_ack *extack)
+{
+ *quantum = nla_get_u32(attr);
+ if (!*quantum) {
+ NL_SET_ERR_MSG(extack, "ETS quantum cannot be zero");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static struct ets_class *
+ets_class_from_arg(struct Qdisc *sch, unsigned long arg)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+
+ return &q->classes[arg - 1];
+}
+
+static u32 ets_class_id(struct Qdisc *sch, const struct ets_class *cl)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int band = cl - q->classes;
+
+ return TC_H_MAKE(sch->handle, band + 1);
+}
+
+static void ets_offload_change(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct ets_sched *q = qdisc_priv(sch);
+ struct tc_ets_qopt_offload qopt;
+ unsigned int w_psum_prev = 0;
+ unsigned int q_psum = 0;
+ unsigned int q_sum = 0;
+ unsigned int quantum;
+ unsigned int w_psum;
+ unsigned int weight;
+ unsigned int i;
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return;
+
+ qopt.command = TC_ETS_REPLACE;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ qopt.replace_params.bands = q->nbands;
+ qopt.replace_params.qstats = &sch->qstats;
+ memcpy(&qopt.replace_params.priomap,
+ q->prio2band, sizeof(q->prio2band));
+
+ for (i = 0; i < q->nbands; i++)
+ q_sum += q->classes[i].quantum;
+
+ for (i = 0; i < q->nbands; i++) {
+ quantum = q->classes[i].quantum;
+ q_psum += quantum;
+ w_psum = quantum ? q_psum * 100 / q_sum : 0;
+ weight = w_psum - w_psum_prev;
+ w_psum_prev = w_psum;
+
+ qopt.replace_params.quanta[i] = quantum;
+ qopt.replace_params.weights[i] = weight;
+ }
+
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
+}
+
+static void ets_offload_destroy(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_ets_qopt_offload qopt;
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return;
+
+ qopt.command = TC_ETS_DESTROY;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_ETS, &qopt);
+}
+
+static void ets_offload_graft(struct Qdisc *sch, struct Qdisc *new,
+ struct Qdisc *old, unsigned long arg,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_ets_qopt_offload qopt;
+
+ qopt.command = TC_ETS_GRAFT;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ qopt.graft_params.band = arg - 1;
+ qopt.graft_params.child_handle = new->handle;
+
+ qdisc_offload_graft_helper(dev, sch, new, old, TC_SETUP_QDISC_ETS,
+ &qopt, extack);
+}
+
+static int ets_offload_dump(struct Qdisc *sch)
+{
+ struct tc_ets_qopt_offload qopt;
+
+ qopt.command = TC_ETS_STATS;
+ qopt.handle = sch->handle;
+ qopt.parent = sch->parent;
+ qopt.stats.bstats = &sch->bstats;
+ qopt.stats.qstats = &sch->qstats;
+
+ return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_ETS, &qopt);
+}
+
+static bool ets_class_is_strict(struct ets_sched *q, const struct ets_class *cl)
+{
+ unsigned int band = cl - q->classes;
+
+ return band < q->nstrict;
+}
+
+static int ets_class_change(struct Qdisc *sch, u32 classid, u32 parentid,
+ struct nlattr **tca, unsigned long *arg,
+ struct netlink_ext_ack *extack)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, *arg);
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *opt = tca[TCA_OPTIONS];
+ struct nlattr *tb[TCA_ETS_MAX + 1];
+ unsigned int quantum;
+ int err;
+
+ /* Classes can be added and removed only through Qdisc_ops.change
+ * interface.
+ */
+ if (!cl) {
+ NL_SET_ERR_MSG(extack, "Fine-grained class addition and removal is not supported");
+ return -EOPNOTSUPP;
+ }
+
+ if (!opt) {
+ NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_class_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_ETS_QUANTA_BAND])
+ /* Nothing to configure. */
+ return 0;
+
+ if (ets_class_is_strict(q, cl)) {
+ NL_SET_ERR_MSG(extack, "Strict bands do not have a configurable quantum");
+ return -EINVAL;
+ }
+
+ err = ets_quantum_parse(sch, tb[TCA_ETS_QUANTA_BAND], &quantum,
+ extack);
+ if (err)
+ return err;
+
+ sch_tree_lock(sch);
+ cl->quantum = quantum;
+ sch_tree_unlock(sch);
+
+ ets_offload_change(sch);
+ return 0;
+}
+
+static int ets_class_graft(struct Qdisc *sch, unsigned long arg,
+ struct Qdisc *new, struct Qdisc **old,
+ struct netlink_ext_ack *extack)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+
+ if (!new) {
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ ets_class_id(sch, cl), NULL);
+ if (!new)
+ new = &noop_qdisc;
+ else
+ qdisc_hash_add(new, true);
+ }
+
+ *old = qdisc_replace(sch, new, &cl->qdisc);
+ ets_offload_graft(sch, new, *old, arg, extack);
+ return 0;
+}
+
+static struct Qdisc *ets_class_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+
+ return cl->qdisc;
+}
+
+static unsigned long ets_class_find(struct Qdisc *sch, u32 classid)
+{
+ unsigned long band = TC_H_MIN(classid);
+ struct ets_sched *q = qdisc_priv(sch);
+
+ if (band - 1 >= q->nbands)
+ return 0;
+ return band;
+}
+
+static void ets_class_qlen_notify(struct Qdisc *sch, unsigned long arg)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+ struct ets_sched *q = qdisc_priv(sch);
+
+ /* We get notified about zero-length child Qdiscs as well if they are
+ * offloaded. Those aren't on the active list though, so don't attempt
+ * to remove them.
+ */
+ if (!ets_class_is_strict(q, cl) && sch->q.qlen)
+ list_del(&cl->alist);
+}
+
+static int ets_class_dump(struct Qdisc *sch, unsigned long arg,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *nest;
+
+ tcm->tcm_parent = TC_H_ROOT;
+ tcm->tcm_handle = ets_class_id(sch, cl);
+ tcm->tcm_info = cl->qdisc->handle;
+
+ nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+ if (!ets_class_is_strict(q, cl)) {
+ if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND, cl->quantum))
+ goto nla_put_failure;
+ }
+ return nla_nest_end(skb, nest);
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int ets_class_dump_stats(struct Qdisc *sch, unsigned long arg,
+ struct gnet_dump *d)
+{
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+ struct Qdisc *cl_q = cl->qdisc;
+
+ if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+ d, NULL, &cl_q->bstats) < 0 ||
+ qdisc_qstats_copy(d, cl_q) < 0)
+ return -1;
+
+ return 0;
+}
+
+static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int i;
+
+ if (arg->stop)
+ return;
+
+ for (i = 0; i < q->nbands; i++) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, i + 1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static struct tcf_block *
+ets_qdisc_tcf_block(struct Qdisc *sch, unsigned long cl,
+ struct netlink_ext_ack *extack)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+
+ if (cl) {
+ NL_SET_ERR_MSG(extack, "ETS classid must be zero");
+ return NULL;
+ }
+
+ return q->block;
+}
+
+static unsigned long ets_qdisc_bind_tcf(struct Qdisc *sch, unsigned long parent,
+ u32 classid)
+{
+ return ets_class_find(sch, classid);
+}
+
+static void ets_qdisc_unbind_tcf(struct Qdisc *sch, unsigned long arg)
+{
+}
+
+static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
+ int *qerr)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ u32 band = skb->priority;
+ struct tcf_result res;
+ struct tcf_proto *fl;
+ int err;
+
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ if (TC_H_MAJ(skb->priority) != sch->handle) {
+ fl = rcu_dereference_bh(q->filter_list);
+ err = tcf_classify(skb, fl, &res, false);
+#ifdef CONFIG_NET_CLS_ACT
+ switch (err) {
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ /* fall through */
+ case TC_ACT_SHOT:
+ return NULL;
+ }
+#endif
+ if (!fl || err < 0) {
+ if (TC_H_MAJ(band))
+ band = 0;
+ return &q->classes[q->prio2band[band & TC_PRIO_MAX]];
+ }
+ band = res.classid;
+ }
+ band = TC_H_MIN(band) - 1;
+ if (band >= q->nbands)
+ return &q->classes[q->prio2band[0]];
+ return &q->classes[band];
+}
+
+static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+ struct sk_buff **to_free)
+{
+ unsigned int len = qdisc_pkt_len(skb);
+ struct ets_sched *q = qdisc_priv(sch);
+ struct ets_class *cl;
+ int err = 0;
+ bool first;
+
+ cl = ets_classify(skb, sch, &err);
+ if (!cl) {
+ if (err & __NET_XMIT_BYPASS)
+ qdisc_qstats_drop(sch);
+ __qdisc_drop(skb, to_free);
+ return err;
+ }
+
+ first = !cl->qdisc->q.qlen;
+ err = qdisc_enqueue(skb, cl->qdisc, to_free);
+ if (unlikely(err != NET_XMIT_SUCCESS)) {
+ if (net_xmit_drop_count(err)) {
+ cl->qstats.drops++;
+ qdisc_qstats_drop(sch);
+ }
+ return err;
+ }
+
+ if (first && !ets_class_is_strict(q, cl)) {
+ list_add_tail(&cl->alist, &q->active);
+ cl->deficit = cl->quantum;
+ }
+
+ sch->qstats.backlog += len;
+ sch->q.qlen++;
+ return err;
+}
+
+static struct sk_buff *
+ets_qdisc_dequeue_skb(struct Qdisc *sch, struct sk_buff *skb)
+{
+ qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+ return skb;
+}
+
+static struct sk_buff *ets_qdisc_dequeue(struct Qdisc *sch)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ struct ets_class *cl;
+ struct sk_buff *skb;
+ unsigned int band;
+ unsigned int len;
+
+ while (1) {
+ for (band = 0; band < q->nstrict; band++) {
+ cl = &q->classes[band];
+ skb = qdisc_dequeue_peeked(cl->qdisc);
+ if (skb)
+ return ets_qdisc_dequeue_skb(sch, skb);
+ }
+
+ if (list_empty(&q->active))
+ goto out;
+
+ cl = list_first_entry(&q->active, struct ets_class, alist);
+ skb = cl->qdisc->ops->peek(cl->qdisc);
+ if (!skb) {
+ qdisc_warn_nonwc(__func__, cl->qdisc);
+ goto out;
+ }
+
+ len = qdisc_pkt_len(skb);
+ if (len <= cl->deficit) {
+ cl->deficit -= len;
+ skb = qdisc_dequeue_peeked(cl->qdisc);
+ if (unlikely(!skb))
+ goto out;
+ if (cl->qdisc->q.qlen == 0)
+ list_del(&cl->alist);
+ return ets_qdisc_dequeue_skb(sch, skb);
+ }
+
+ cl->deficit += cl->quantum;
+ list_move_tail(&cl->alist, &q->active);
+ }
+out:
+ return NULL;
+}
+
+static int ets_qdisc_priomap_parse(struct nlattr *priomap_attr,
+ unsigned int nbands, u8 *priomap,
+ struct netlink_ext_ack *extack)
+{
+ const struct nlattr *attr;
+ int prio = 0;
+ u8 band;
+ int rem;
+ int err;
+
+ err = __nla_validate_nested(priomap_attr, TCA_ETS_MAX,
+ ets_priomap_policy, NL_VALIDATE_STRICT,
+ extack);
+ if (err)
+ return err;
+
+ nla_for_each_nested(attr, priomap_attr, rem) {
+ switch (nla_type(attr)) {
+ case TCA_ETS_PRIOMAP_BAND:
+ if (prio > TC_PRIO_MAX) {
+ NL_SET_ERR_MSG_MOD(extack, "Too many priorities in ETS priomap");
+ return -EINVAL;
+ }
+ band = nla_get_u8(attr);
+ if (band >= nbands) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid band number in ETS priomap");
+ return -EINVAL;
+ }
+ priomap[prio++] = band;
+ break;
+ default:
+ WARN_ON_ONCE(1); /* Validate should have caught this. */
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int ets_qdisc_quanta_parse(struct Qdisc *sch, struct nlattr *quanta_attr,
+ unsigned int nbands, unsigned int nstrict,
+ unsigned int *quanta,
+ struct netlink_ext_ack *extack)
+{
+ const struct nlattr *attr;
+ int band = nstrict;
+ int rem;
+ int err;
+
+ err = __nla_validate_nested(quanta_attr, TCA_ETS_MAX,
+ ets_quanta_policy, NL_VALIDATE_STRICT,
+ extack);
+ if (err < 0)
+ return err;
+
+ nla_for_each_nested(attr, quanta_attr, rem) {
+ switch (nla_type(attr)) {
+ case TCA_ETS_QUANTA_BAND:
+ if (band >= nbands) {
+ NL_SET_ERR_MSG_MOD(extack, "ETS quanta has more values than bands");
+ return -EINVAL;
+ }
+ err = ets_quantum_parse(sch, attr, &quanta[band++],
+ extack);
+ if (err)
+ return err;
+ break;
+ default:
+ WARN_ON_ONCE(1); /* Validate should have caught this. */
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int ets_qdisc_change(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ unsigned int quanta[TCQ_ETS_MAX_BANDS] = {0};
+ struct Qdisc *queues[TCQ_ETS_MAX_BANDS];
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *tb[TCA_ETS_MAX + 1];
+ unsigned int oldbands = q->nbands;
+ u8 priomap[TC_PRIO_MAX + 1];
+ unsigned int nstrict = 0;
+ unsigned int nbands;
+ unsigned int i;
+ int err;
+
+ if (!opt) {
+ NL_SET_ERR_MSG(extack, "ETS options are required for this operation");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_ETS_MAX, opt, ets_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_ETS_NBANDS]) {
+ NL_SET_ERR_MSG_MOD(extack, "Number of bands is a required argument");
+ return -EINVAL;
+ }
+ nbands = nla_get_u8(tb[TCA_ETS_NBANDS]);
+ if (nbands < 1 || nbands > TCQ_ETS_MAX_BANDS) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid number of bands");
+ return -EINVAL;
+ }
+ /* Unless overridden, traffic goes to the last band. */
+ memset(priomap, nbands - 1, sizeof(priomap));
+
+ if (tb[TCA_ETS_NSTRICT]) {
+ nstrict = nla_get_u8(tb[TCA_ETS_NSTRICT]);
+ if (nstrict > nbands) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid number of strict bands");
+ return -EINVAL;
+ }
+ }
+
+ if (tb[TCA_ETS_PRIOMAP]) {
+ err = ets_qdisc_priomap_parse(tb[TCA_ETS_PRIOMAP],
+ nbands, priomap, extack);
+ if (err)
+ return err;
+ }
+
+ if (tb[TCA_ETS_QUANTA]) {
+ err = ets_qdisc_quanta_parse(sch, tb[TCA_ETS_QUANTA],
+ nbands, nstrict, quanta, extack);
+ if (err)
+ return err;
+ }
+ /* If there are more bands than strict + quanta provided, the remaining
+ * ones are ETS with quantum of MTU. Initialize the missing values here.
+ */
+ for (i = nstrict; i < nbands; i++) {
+ if (!quanta[i])
+ quanta[i] = psched_mtu(qdisc_dev(sch));
+ }
+
+ /* Before commit, make sure we can allocate all new qdiscs */
+ for (i = oldbands; i < nbands; i++) {
+ queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ ets_class_id(sch, &q->classes[i]),
+ extack);
+ if (!queues[i]) {
+ while (i > oldbands)
+ qdisc_put(queues[--i]);
+ return -ENOMEM;
+ }
+ }
+
+ sch_tree_lock(sch);
+
+ q->nbands = nbands;
+ q->nstrict = nstrict;
+ memcpy(q->prio2band, priomap, sizeof(priomap));
+
+ for (i = q->nbands; i < oldbands; i++)
+ qdisc_tree_flush_backlog(q->classes[i].qdisc);
+
+ for (i = 0; i < q->nbands; i++)
+ q->classes[i].quantum = quanta[i];
+
+ for (i = oldbands; i < q->nbands; i++) {
+ q->classes[i].qdisc = queues[i];
+ if (q->classes[i].qdisc != &noop_qdisc)
+ qdisc_hash_add(q->classes[i].qdisc, true);
+ }
+
+ sch_tree_unlock(sch);
+
+ ets_offload_change(sch);
+ for (i = q->nbands; i < oldbands; i++) {
+ qdisc_put(q->classes[i].qdisc);
+ memset(&q->classes[i], 0, sizeof(q->classes[i]));
+ }
+ return 0;
+}
+
+static int ets_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int err;
+
+ if (!opt)
+ return -EINVAL;
+
+ err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
+ if (err)
+ return err;
+
+ INIT_LIST_HEAD(&q->active);
+ return ets_qdisc_change(sch, opt, extack);
+}
+
+static void ets_qdisc_reset(struct Qdisc *sch)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int band;
+
+ for (band = q->nstrict; band < q->nbands; band++) {
+ if (q->classes[band].qdisc->q.qlen)
+ list_del(&q->classes[band].alist);
+ }
+ for (band = 0; band < q->nbands; band++)
+ qdisc_reset(q->classes[band].qdisc);
+ sch->qstats.backlog = 0;
+ sch->q.qlen = 0;
+}
+
+static void ets_qdisc_destroy(struct Qdisc *sch)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ int band;
+
+ ets_offload_destroy(sch);
+ tcf_block_put(q->block);
+ for (band = 0; band < q->nbands; band++)
+ qdisc_put(q->classes[band].qdisc);
+}
+
+static int ets_qdisc_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct ets_sched *q = qdisc_priv(sch);
+ struct nlattr *opts;
+ struct nlattr *nest;
+ int band;
+ int prio;
+ int err;
+
+ err = ets_offload_dump(sch);
+ if (err)
+ return err;
+
+ opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!opts)
+ goto nla_err;
+
+ if (nla_put_u8(skb, TCA_ETS_NBANDS, q->nbands))
+ goto nla_err;
+
+ if (q->nstrict &&
+ nla_put_u8(skb, TCA_ETS_NSTRICT, q->nstrict))
+ goto nla_err;
+
+ if (q->nbands > q->nstrict) {
+ nest = nla_nest_start(skb, TCA_ETS_QUANTA);
+ if (!nest)
+ goto nla_err;
+
+ for (band = q->nstrict; band < q->nbands; band++) {
+ if (nla_put_u32(skb, TCA_ETS_QUANTA_BAND,
+ q->classes[band].quantum))
+ goto nla_err;
+ }
+
+ nla_nest_end(skb, nest);
+ }
+
+ nest = nla_nest_start(skb, TCA_ETS_PRIOMAP);
+ if (!nest)
+ goto nla_err;
+
+ for (prio = 0; prio <= TC_PRIO_MAX; prio++) {
+ if (nla_put_u8(skb, TCA_ETS_PRIOMAP_BAND, q->prio2band[prio]))
+ goto nla_err;
+ }
+
+ nla_nest_end(skb, nest);
+
+ return nla_nest_end(skb, opts);
+
+nla_err:
+ nla_nest_cancel(skb, opts);
+ return -EMSGSIZE;
+}
+
+static const struct Qdisc_class_ops ets_class_ops = {
+ .change = ets_class_change,
+ .graft = ets_class_graft,
+ .leaf = ets_class_leaf,
+ .find = ets_class_find,
+ .qlen_notify = ets_class_qlen_notify,
+ .dump = ets_class_dump,
+ .dump_stats = ets_class_dump_stats,
+ .walk = ets_qdisc_walk,
+ .tcf_block = ets_qdisc_tcf_block,
+ .bind_tcf = ets_qdisc_bind_tcf,
+ .unbind_tcf = ets_qdisc_unbind_tcf,
+};
+
+static struct Qdisc_ops ets_qdisc_ops __read_mostly = {
+ .cl_ops = &ets_class_ops,
+ .id = "ets",
+ .priv_size = sizeof(struct ets_sched),
+ .enqueue = ets_qdisc_enqueue,
+ .dequeue = ets_qdisc_dequeue,
+ .peek = qdisc_peek_dequeued,
+ .change = ets_qdisc_change,
+ .init = ets_qdisc_init,
+ .reset = ets_qdisc_reset,
+ .destroy = ets_qdisc_destroy,
+ .dump = ets_qdisc_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init ets_init(void)
+{
+ return register_qdisc(&ets_qdisc_ops);
+}
+
+static void __exit ets_exit(void)
+{
+ unregister_qdisc(&ets_qdisc_ops);
+}
+
+module_init(ets_init);
+module_exit(ets_exit);
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index b1c7e726ce5d..a5a295477ecc 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -301,6 +301,9 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
f->socket_hash != sk->sk_hash)) {
f->credit = q->initial_quantum;
f->socket_hash = sk->sk_hash;
+ if (q->rate_enable)
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
if (fq_flow_is_throttled(f))
fq_flow_unset_throttled(q, f);
f->time_next_packet = 0ULL;
@@ -322,8 +325,12 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
fq_flow_set_detached(f);
f->sk = sk;
- if (skb->sk == sk)
+ if (skb->sk == sk) {
f->socket_hash = sk->sk_hash;
+ if (q->rate_enable)
+ smp_store_release(&sk->sk_pacing_status,
+ SK_PACING_FQ);
+ }
f->credit = q->initial_quantum;
rb_link_node(&f->fq_node, parent, p);
@@ -428,17 +435,9 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
f->qlen++;
qdisc_qstats_backlog_inc(sch, skb);
if (fq_flow_is_detached(f)) {
- struct sock *sk = skb->sk;
-
fq_flow_add_tail(&q->new_flows, f);
if (time_after(jiffies, f->age + q->flow_refill_delay))
f->credit = max_t(u32, f->credit, q->quantum);
- if (sk && q->rate_enable) {
- if (unlikely(smp_load_acquire(&sk->sk_pacing_status) !=
- SK_PACING_FQ))
- smp_store_release(&sk->sk_pacing_status,
- SK_PACING_FQ);
- }
q->inactive_flows--;
}
@@ -787,10 +786,12 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_QUANTUM]) {
u32 quantum = nla_get_u32(tb[TCA_FQ_QUANTUM]);
- if (quantum > 0)
+ if (quantum > 0 && quantum <= (1 << 20)) {
q->quantum = quantum;
- else
+ } else {
+ NL_SET_ERR_MSG_MOD(extack, "invalid quantum");
err = -EINVAL;
+ }
}
if (tb[TCA_FQ_INITIAL_QUANTUM])
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5ab696efca95..6c9595f1048a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -441,7 +441,7 @@ static void dev_watchdog(struct timer_list *t)
trace_net_dev_xmit_timeout(dev, i);
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
dev->name, netdev_drivername(dev), i);
- dev->netdev_ops->ndo_tx_timeout(dev);
+ dev->netdev_ops->ndo_tx_timeout(dev, i);
}
if (!mod_timer(&dev->watchdog_timer,
round_jiffies(jiffies +
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 18b884cfdfe8..647941702f9f 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -292,8 +292,14 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
struct tc_prio_qopt_offload graft_offload;
unsigned long band = arg - 1;
- if (new == NULL)
- new = &noop_qdisc;
+ if (!new) {
+ new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle, arg), extack);
+ if (!new)
+ new = &noop_qdisc;
+ else
+ qdisc_hash_add(new, true);
+ }
*old = qdisc_replace(sch, new, &q->queues[band]);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index bbd5004a5d09..437079a4883d 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -584,7 +584,6 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
const gfp_t gfp,
const int peer_state)
{
- struct net *net = sock_net(asoc->base.sk);
struct sctp_transport *peer;
struct sctp_sock *sp;
unsigned short port;
@@ -614,7 +613,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
return peer;
}
- peer = sctp_transport_new(net, addr, gfp);
+ peer = sctp_transport_new(asoc->base.net, addr, gfp);
if (!peer)
return NULL;
@@ -974,7 +973,7 @@ static void sctp_assoc_bh_rcv(struct work_struct *work)
struct sctp_association *asoc =
container_of(work, struct sctp_association,
base.inqueue.immediate);
- struct net *net = sock_net(asoc->base.sk);
+ struct net *net = asoc->base.net;
union sctp_subtype subtype;
struct sctp_endpoint *ep;
struct sctp_chunk *chunk;
@@ -1442,7 +1441,8 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
/* Should we send a SACK to update our peer? */
static inline bool sctp_peer_needs_update(struct sctp_association *asoc)
{
- struct net *net = sock_net(asoc->base.sk);
+ struct net *net = asoc->base.net;
+
switch (asoc->state) {
case SCTP_STATE_ESTABLISHED:
case SCTP_STATE_SHUTDOWN_PENDING:
@@ -1576,7 +1576,7 @@ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc,
if (asoc->peer.ipv6_address)
flags |= SCTP_ADDR6_PEERSUPP;
- return sctp_bind_addr_copy(sock_net(asoc->base.sk),
+ return sctp_bind_addr_copy(asoc->base.net,
&asoc->base.bind_addr,
&asoc->ep->base.bind_addr,
scope, gfp, flags);
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index cc3ce5d80b08..ab6a997e222f 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -225,7 +225,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
if (msg_len >= first_len) {
msg->can_delay = 0;
if (msg_len > first_len)
- SCTP_INC_STATS(sock_net(asoc->base.sk),
+ SCTP_INC_STATS(asoc->base.net,
SCTP_MIB_FRAGUSRMSGS);
} else {
/* Which may be the only one... */
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 3ccab7440c9e..48c9c2c7602f 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -244,7 +244,7 @@ struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
struct sctp_endpoint *retval = NULL;
if ((htons(ep->base.bind_addr.port) == laddr->v4.sin_port) &&
- net_eq(sock_net(ep->base.sk), net)) {
+ net_eq(ep->base.net, net)) {
if (sctp_bind_addr_match(&ep->base.bind_addr, laddr,
sctp_sk(ep->base.sk)))
retval = ep;
@@ -292,8 +292,8 @@ bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
const union sctp_addr *paddr)
{
struct sctp_sockaddr_entry *addr;
+ struct net *net = ep->base.net;
struct sctp_bind_addr *bp;
- struct net *net = sock_net(ep->base.sk);
bp = &ep->base.bind_addr;
/* This function is called with the socket lock held,
@@ -384,7 +384,7 @@ normal:
if (asoc && sctp_chunk_is_data(chunk))
asoc->peer.last_data_from = chunk->transport;
else {
- SCTP_INC_STATS(sock_net(ep->base.sk), SCTP_MIB_INCTRLCHUNKS);
+ SCTP_INC_STATS(ep->base.net, SCTP_MIB_INCTRLCHUNKS);
if (asoc)
asoc->stats.ictrlchunks++;
}
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 4d2bcfc9d7f8..efaaefc3bb1c 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -937,7 +937,7 @@ int sctp_hash_transport(struct sctp_transport *t)
if (t->asoc->temp)
return 0;
- arg.net = sock_net(t->asoc->base.sk);
+ arg.net = t->asoc->base.net;
arg.paddr = &t->ipaddr;
arg.lport = htons(t->asoc->base.bind_addr.port);
@@ -1004,12 +1004,11 @@ struct sctp_transport *sctp_epaddr_lookup_transport(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr)
{
- struct net *net = sock_net(ep->base.sk);
struct rhlist_head *tmp, *list;
struct sctp_transport *t;
struct sctp_hash_cmp_arg arg = {
.paddr = paddr,
- .net = net,
+ .net = ep->base.net,
.lport = htons(ep->base.bind_addr.port),
};
diff --git a/net/sctp/output.c b/net/sctp/output.c
index dbda7e7927fd..1441eaf460bb 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -282,7 +282,7 @@ static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt,
sctp_chunk_free(sack);
goto out;
}
- SCTP_INC_STATS(sock_net(asoc->base.sk),
+ SCTP_INC_STATS(asoc->base.net,
SCTP_MIB_OUTCTRLCHUNKS);
asoc->stats.octrlchunks++;
asoc->peer.sack_needed = 0;
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 0dab62b67b9a..577e3bc4ee6f 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -36,6 +36,7 @@
#include <net/sctp/sctp.h>
#include <net/sctp/sm.h>
#include <net/sctp/stream_sched.h>
+#include <trace/events/sctp.h>
/* Declare internal functions here. */
static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn);
@@ -279,7 +280,7 @@ void sctp_outq_free(struct sctp_outq *q)
/* Put a new chunk in an sctp_outq. */
void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp)
{
- struct net *net = sock_net(q->asoc->base.sk);
+ struct net *net = q->asoc->base.net;
pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk,
chunk && chunk->chunk_hdr ?
@@ -533,7 +534,7 @@ void sctp_retransmit_mark(struct sctp_outq *q,
void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
enum sctp_retransmit_reason reason)
{
- struct net *net = sock_net(q->asoc->base.sk);
+ struct net *net = q->asoc->base.net;
switch (reason) {
case SCTP_RTXR_T3_RTX:
@@ -1238,6 +1239,12 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk)
/* Grab the association's destination address list. */
transport_list = &asoc->peer.transport_addr_list;
+ /* SCTP path tracepoint for congestion control debugging. */
+ if (trace_sctp_probe_path_enabled()) {
+ list_for_each_entry(transport, transport_list, transports)
+ trace_sctp_probe_path(transport, asoc);
+ }
+
sack_ctsn = ntohl(sack->cum_tsn_ack);
gap_ack_blocks = ntohs(sack->num_gap_ack_blocks);
asoc->stats.gapcnt += gap_ack_blocks;
@@ -1884,6 +1891,6 @@ void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn)
if (ftsn_chunk) {
list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
- SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(asoc->base.net, SCTP_MIB_OUTCTRLCHUNKS);
}
}
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index fbbf19128c2d..78af2fcf90cc 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -227,6 +227,7 @@ static void sctp_v4_from_skb(union sctp_addr *addr, struct sk_buff *skb,
sa->sin_port = sh->dest;
sa->sin_addr.s_addr = ip_hdr(skb)->daddr;
}
+ memset(sa->sin_zero, 0, sizeof(sa->sin_zero));
}
/* Initialize an sctp_addr from a socket. */
@@ -235,6 +236,7 @@ static void sctp_v4_from_sk(union sctp_addr *addr, struct sock *sk)
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = 0;
addr->v4.sin_addr.s_addr = inet_sk(sk)->inet_rcv_saddr;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
}
/* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -257,6 +259,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr,
addr->v4.sin_family = AF_INET;
addr->v4.sin_port = port;
addr->v4.sin_addr.s_addr = param->v4.addr.s_addr;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
}
/* Initialize an address parameter from a sctp_addr and return the length
@@ -281,6 +284,7 @@ static void sctp_v4_dst_saddr(union sctp_addr *saddr, struct flowi4 *fl4,
saddr->v4.sin_family = AF_INET;
saddr->v4.sin_port = port;
saddr->v4.sin_addr.s_addr = fl4->saddr;
+ memset(saddr->v4.sin_zero, 0, sizeof(saddr->v4.sin_zero));
}
/* Compare two addresses exactly. */
@@ -303,6 +307,7 @@ static void sctp_v4_inaddr_any(union sctp_addr *addr, __be16 port)
addr->v4.sin_family = AF_INET;
addr->v4.sin_addr.s_addr = htonl(INADDR_ANY);
addr->v4.sin_port = port;
+ memset(addr->v4.sin_zero, 0, sizeof(addr->v4.sin_zero));
}
/* Is this a wildcard address? */
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 48d63956a68c..09050c1d5517 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -2307,7 +2307,6 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
const union sctp_addr *peer_addr,
struct sctp_init_chunk *peer_init, gfp_t gfp)
{
- struct net *net = sock_net(asoc->base.sk);
struct sctp_transport *transport;
struct list_head *pos, *temp;
union sctp_params param;
@@ -2363,8 +2362,8 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk,
* also give us an option to silently ignore the packet, which
* is what we'll do here.
*/
- if (!net->sctp.addip_noauth &&
- (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) {
+ if (!asoc->base.net->sctp.addip_noauth &&
+ (asoc->peer.asconf_capable && !asoc->peer.auth_capable)) {
asoc->peer.addip_disabled_mask |= (SCTP_PARAM_ADD_IP |
SCTP_PARAM_DEL_IP |
SCTP_PARAM_SET_PRIMARY);
@@ -2491,9 +2490,9 @@ static int sctp_process_param(struct sctp_association *asoc,
const union sctp_addr *peer_addr,
gfp_t gfp)
{
- struct net *net = sock_net(asoc->base.sk);
struct sctp_endpoint *ep = asoc->ep;
union sctp_addr_param *addr_param;
+ struct net *net = asoc->base.net;
struct sctp_transport *t;
enum sctp_scope scope;
union sctp_addr addr;
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index acd737d4c0e0..2bc29463e1dc 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -516,8 +516,6 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
struct sctp_transport *transport,
int is_hb)
{
- struct net *net = sock_net(asoc->base.sk);
-
/* The check for association's overall error counter exceeding the
* threshold is done in the state function.
*/
@@ -544,10 +542,10 @@ static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands,
* is SCTP_ACTIVE, then mark this transport as Partially Failed,
* see SCTP Quick Failover Draft, section 5.1
*/
- if (net->sctp.pf_enable &&
- (transport->state == SCTP_ACTIVE) &&
- (transport->error_count < transport->pathmaxrxt) &&
- (transport->error_count > transport->pf_retrans)) {
+ if (asoc->base.net->sctp.pf_enable &&
+ transport->state == SCTP_ACTIVE &&
+ transport->error_count < transport->pathmaxrxt &&
+ transport->error_count > transport->pf_retrans) {
sctp_assoc_control_transport(asoc, transport,
SCTP_TRANSPORT_PF,
@@ -798,10 +796,8 @@ static int sctp_cmd_process_sack(struct sctp_cmd_seq *cmds,
int err = 0;
if (sctp_outq_sack(&asoc->outqueue, chunk)) {
- struct net *net = sock_net(asoc->base.sk);
-
/* There are no more TSNs awaiting SACK. */
- err = sctp_do_sm(net, SCTP_EVENT_T_OTHER,
+ err = sctp_do_sm(asoc->base.net, SCTP_EVENT_T_OTHER,
SCTP_ST_OTHER(SCTP_EVENT_NO_PENDING_TSN),
asoc->state, asoc->ep, asoc, NULL,
GFP_ATOMIC);
@@ -834,7 +830,7 @@ static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
struct sctp_association *new)
{
- struct net *net = sock_net(asoc->base.sk);
+ struct net *net = asoc->base.net;
struct sctp_chunk *abort;
if (!sctp_assoc_update(asoc, new))
@@ -1363,8 +1359,10 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
/* Generate an INIT ACK chunk. */
new_obj = sctp_make_init_ack(asoc, chunk, GFP_ATOMIC,
0);
- if (!new_obj)
- goto nomem;
+ if (!new_obj) {
+ error = -ENOMEM;
+ break;
+ }
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
SCTP_CHUNK(new_obj));
@@ -1386,7 +1384,8 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
if (!new_obj) {
if (cmd->obj.chunk)
sctp_chunk_free(cmd->obj.chunk);
- goto nomem;
+ error = -ENOMEM;
+ break;
}
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
SCTP_CHUNK(new_obj));
@@ -1433,8 +1432,10 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
/* Generate a SHUTDOWN chunk. */
new_obj = sctp_make_shutdown(asoc, chunk);
- if (!new_obj)
- goto nomem;
+ if (!new_obj) {
+ error = -ENOMEM;
+ break;
+ }
sctp_add_cmd_sf(commands, SCTP_CMD_REPLY,
SCTP_CHUNK(new_obj));
break;
@@ -1770,11 +1771,17 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type,
break;
}
- if (error)
+ if (error) {
+ cmd = sctp_next_cmd(commands);
+ while (cmd) {
+ if (cmd->verb == SCTP_CMD_REPLY)
+ sctp_chunk_free(cmd->obj.chunk);
+ cmd = sctp_next_cmd(commands);
+ }
break;
+ }
}
-out:
/* If this is in response to a received chunk, wait until
* we are done with the packet to open the queue so that we don't
* send multiple packets in response to a single request.
@@ -1789,7 +1796,4 @@ out:
sp->data_ready_signalled = 0;
return error;
-nomem:
- error = -ENOMEM;
- goto out;
}
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 4ab8208a2dd4..748e3b19ec1d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1320,7 +1320,7 @@ static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc,
struct sctp_chunk *init,
struct sctp_cmd_seq *commands)
{
- struct net *net = sock_net(new_asoc->base.sk);
+ struct net *net = new_asoc->base.net;
struct sctp_transport *new_addr;
int ret = 1;
@@ -3281,8 +3281,6 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
struct sctp_sackhdr *sackh;
__u32 ctsn;
- trace_sctp_probe(ep, asoc, chunk);
-
if (!sctp_vtag_verify(chunk, asoc))
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -3299,6 +3297,15 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
chunk->subh.sack_hdr = sackh;
ctsn = ntohl(sackh->cum_tsn_ack);
+ /* If Cumulative TSN Ack beyond the max tsn currently
+ * send, terminating the association and respond to the
+ * sender with an ABORT.
+ */
+ if (TSN_lte(asoc->next_tsn, ctsn))
+ return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
+
+ trace_sctp_probe(ep, asoc, chunk);
+
/* i) If Cumulative TSN Ack is less than the Cumulative TSN
* Ack Point, then drop the SACK. Since Cumulative TSN
* Ack is monotonically increasing, a SACK whose
@@ -3312,13 +3319,6 @@ enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net,
return SCTP_DISPOSITION_DISCARD;
}
- /* If Cumulative TSN Ack beyond the max tsn currently
- * send, terminating the association and respond to the
- * sender with an ABORT.
- */
- if (!TSN_lt(ctsn, asoc->next_tsn))
- return sctp_sf_violation_ctsn(net, ep, asoc, type, arg, commands);
-
/* Return this SACK for further processing. */
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_CHUNK(chunk));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0b485952a71c..1b56fc440606 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -436,8 +436,7 @@ static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
static int sctp_send_asconf(struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
- struct net *net = sock_net(asoc->base.sk);
- int retval = 0;
+ int retval = 0;
/* If there is an outstanding ASCONF chunk, queue it for later
* transmission.
@@ -449,7 +448,7 @@ static int sctp_send_asconf(struct sctp_association *asoc,
/* Hold the chunk until an ASCONF_ACK is received. */
sctp_chunk_hold(chunk);
- retval = sctp_primitive_ASCONF(net, asoc, chunk);
+ retval = sctp_primitive_ASCONF(asoc->base.net, asoc, chunk);
if (retval)
sctp_chunk_free(chunk);
else
@@ -2428,9 +2427,8 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
int error;
if (params->spp_flags & SPP_HB_DEMAND && trans) {
- struct net *net = sock_net(trans->asoc->base.sk);
-
- error = sctp_primitive_REQUESTHEARTBEAT(net, trans->asoc, trans);
+ error = sctp_primitive_REQUESTHEARTBEAT(trans->asoc->base.net,
+ trans->asoc, trans);
if (error)
return error;
}
@@ -5364,7 +5362,7 @@ struct sctp_transport *sctp_transport_get_next(struct net *net,
if (!sctp_transport_hold(t))
continue;
- if (net_eq(sock_net(t->asoc->base.sk), net) &&
+ if (net_eq(t->asoc->base.net, net) &&
t->asoc->peer.primary_path == t)
break;
diff --git a/net/sctp/stream.c b/net/sctp/stream.c
index e83cdaa2ab76..67f7e71f9129 100644
--- a/net/sctp/stream.c
+++ b/net/sctp/stream.c
@@ -119,7 +119,7 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
* a new one with new outcnt to save memory if needed.
*/
if (outcnt == stream->outcnt)
- goto in;
+ goto handle_in;
/* Filter out chunks queued on streams that won't exist anymore */
sched->unsched_all(stream);
@@ -128,24 +128,28 @@ int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt,
ret = sctp_stream_alloc_out(stream, outcnt, gfp);
if (ret)
- goto out;
+ goto out_err;
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
-in:
+handle_in:
sctp_stream_interleave_init(stream);
if (!incnt)
goto out;
ret = sctp_stream_alloc_in(stream, incnt, gfp);
- if (ret) {
- sched->free(stream);
- genradix_free(&stream->out);
- stream->outcnt = 0;
- goto out;
- }
+ if (ret)
+ goto in_err;
+ goto out;
+
+in_err:
+ sched->free(stream);
+ genradix_free(&stream->in);
+out_err:
+ genradix_free(&stream->out);
+ stream->outcnt = 0;
out:
return ret;
}
@@ -218,10 +222,9 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new)
static int sctp_send_reconf(struct sctp_association *asoc,
struct sctp_chunk *chunk)
{
- struct net *net = sock_net(asoc->base.sk);
int retval = 0;
- retval = sctp_primitive_RECONF(net, asoc, chunk);
+ retval = sctp_primitive_RECONF(asoc->base.net, asoc, chunk);
if (retval)
sctp_chunk_free(chunk);
diff --git a/net/sctp/stream_interleave.c b/net/sctp/stream_interleave.c
index 40c40be23fcb..6b13f737ebf2 100644
--- a/net/sctp/stream_interleave.c
+++ b/net/sctp/stream_interleave.c
@@ -241,9 +241,8 @@ out:
if (!first_frag)
return NULL;
- retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
- &ulpq->reasm, first_frag,
- last_frag);
+ retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm,
+ first_frag, last_frag);
if (retval) {
sin->fsn = next_fsn;
if (is_last) {
@@ -326,7 +325,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled(
pd_point = sctp_sk(asoc->base.sk)->pd_point;
if (pd_point && pd_point <= pd_len) {
- retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ retval = sctp_make_reassembled_event(asoc->base.net,
&ulpq->reasm,
pd_first, pd_last);
if (retval) {
@@ -337,8 +336,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled(
goto out;
found:
- retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
- &ulpq->reasm,
+ retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm,
first_frag, pos);
if (retval)
retval->msg_flags |= MSG_EOR;
@@ -630,7 +628,7 @@ out:
if (!first_frag)
return NULL;
- retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ retval = sctp_make_reassembled_event(ulpq->asoc->base.net,
&ulpq->reasm_uo, first_frag,
last_frag);
if (retval) {
@@ -716,7 +714,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo(
pd_point = sctp_sk(asoc->base.sk)->pd_point;
if (pd_point && pd_point <= pd_len) {
- retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ retval = sctp_make_reassembled_event(asoc->base.net,
&ulpq->reasm_uo,
pd_first, pd_last);
if (retval) {
@@ -727,8 +725,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_reassembled_uo(
goto out;
found:
- retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
- &ulpq->reasm_uo,
+ retval = sctp_make_reassembled_event(asoc->base.net, &ulpq->reasm_uo,
first_frag, pos);
if (retval)
retval->msg_flags |= MSG_EOR;
@@ -814,7 +811,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_first_uo(struct sctp_ulpq *ulpq)
return NULL;
out:
- retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ retval = sctp_make_reassembled_event(ulpq->asoc->base.net,
&ulpq->reasm_uo, first_frag,
last_frag);
if (retval) {
@@ -921,7 +918,7 @@ static struct sctp_ulpevent *sctp_intl_retrieve_first(struct sctp_ulpq *ulpq)
return NULL;
out:
- retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ retval = sctp_make_reassembled_event(ulpq->asoc->base.net,
&ulpq->reasm, first_frag,
last_frag);
if (retval) {
@@ -1159,7 +1156,7 @@ static void sctp_generate_iftsn(struct sctp_outq *q, __u32 ctsn)
if (ftsn_chunk) {
list_add_tail(&ftsn_chunk->list, &q->control_chunk_list);
- SCTP_INC_STATS(sock_net(asoc->base.sk), SCTP_MIB_OUTCTRLCHUNKS);
+ SCTP_INC_STATS(asoc->base.net, SCTP_MIB_OUTCTRLCHUNKS);
}
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 7235a6032671..806af58f4375 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -263,7 +263,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
pf->af->from_sk(&addr, sk);
pf->to_sk_daddr(&t->ipaddr, sk);
- dst->ops->update_pmtu(dst, sk, NULL, pmtu);
+ dst->ops->update_pmtu(dst, sk, NULL, pmtu, true);
pf->to_sk_daddr(&addr, sk);
dst = sctp_transport_dst_check(t);
@@ -334,7 +334,7 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
pr_debug("%s: rto_pending not set on transport %p!\n", __func__, tp);
if (tp->rttvar || tp->srtt) {
- struct net *net = sock_net(tp->asoc->base.sk);
+ struct net *net = tp->asoc->base.net;
/* 6.3.1 C3) When a new RTT measurement R' is made, set
* RTTVAR <- (1 - RTO.Beta) * RTTVAR + RTO.Beta * |SRTT - R'|
* SRTT <- (1 - RTO.Alpha) * SRTT + RTO.Alpha * R'
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index b6536b7f14c0..1c6c640607c5 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -486,10 +486,9 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
cevent = sctp_skb2event(pd_first);
pd_point = sctp_sk(asoc->base.sk)->pd_point;
if (pd_point && pd_point <= pd_len) {
- retval = sctp_make_reassembled_event(sock_net(asoc->base.sk),
+ retval = sctp_make_reassembled_event(asoc->base.net,
&ulpq->reasm,
- pd_first,
- pd_last);
+ pd_first, pd_last);
if (retval)
sctp_ulpq_set_pd(ulpq);
}
@@ -497,7 +496,7 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_reassembled(struct sctp_ulpq *ul
done:
return retval;
found:
- retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
+ retval = sctp_make_reassembled_event(ulpq->asoc->base.net,
&ulpq->reasm, first_frag, pos);
if (retval)
retval->msg_flags |= MSG_EOR;
@@ -563,8 +562,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_partial(struct sctp_ulpq *ulpq)
* further.
*/
done:
- retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
- &ulpq->reasm, first_frag, last_frag);
+ retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm,
+ first_frag, last_frag);
if (retval && is_last)
retval->msg_flags |= MSG_EOR;
@@ -664,8 +663,8 @@ static struct sctp_ulpevent *sctp_ulpq_retrieve_first(struct sctp_ulpq *ulpq)
* further.
*/
done:
- retval = sctp_make_reassembled_event(sock_net(ulpq->asoc->base.sk),
- &ulpq->reasm, first_frag, last_frag);
+ retval = sctp_make_reassembled_event(ulpq->asoc->base.net, &ulpq->reasm,
+ first_frag, last_frag);
return retval;
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index b997072c72e5..cee5bf4a9bb9 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -857,6 +857,8 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr,
goto out;
sock_hold(&smc->sk); /* sock put in passive closing */
+ if (smc->use_fallback)
+ goto out;
if (flags & O_NONBLOCK) {
if (schedule_work(&smc->connect_work))
smc->connect_nonblock = 1;
@@ -1721,8 +1723,6 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
sk->sk_err = smc->clcsock->sk->sk_err;
sk->sk_error_report(sk);
}
- if (rc)
- return rc;
if (optlen < sizeof(int))
return -EINVAL;
@@ -1730,6 +1730,8 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
return -EFAULT;
lock_sock(sk);
+ if (rc || smc->use_fallback)
+ goto out;
switch (optname) {
case TCP_ULP:
case TCP_FASTOPEN:
@@ -1741,15 +1743,14 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
smc_switch_to_fallback(smc);
smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
} else {
- if (!smc->use_fallback)
- rc = -EINVAL;
+ rc = -EINVAL;
}
break;
case TCP_NODELAY:
if (sk->sk_state != SMC_INIT &&
sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_CLOSED) {
- if (val && !smc->use_fallback)
+ if (val)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
}
@@ -1758,7 +1759,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
if (sk->sk_state != SMC_INIT &&
sk->sk_state != SMC_LISTEN &&
sk->sk_state != SMC_CLOSED) {
- if (!val && !smc->use_fallback)
+ if (!val)
mod_delayed_work(system_wq, &smc->conn.tx_work,
0);
}
@@ -1769,6 +1770,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
default:
break;
}
+out:
release_sock(sk);
return rc;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index bb92c7c6214c..2249de5379ee 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -41,7 +41,7 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */
.num = 0,
};
-static atomic_t lgr_cnt; /* number of existing link groups */
+static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -1287,7 +1287,7 @@ static int smc_core_reboot_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
smc_lgrs_shutdown();
-
+ smc_ib_unregister_client();
return 0;
}
@@ -1297,7 +1297,6 @@ static struct notifier_block smc_reboot_notifier = {
int __init smc_core_init(void)
{
- atomic_set(&lgr_cnt, 0);
return register_reboot_notifier(&smc_reboot_notifier);
}
diff --git a/net/socket.c b/net/socket.c
index 4d38d49d6ad9..b79a05de7c6e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -129,6 +129,18 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
+#ifdef CONFIG_PROC_FS
+static void sock_show_fdinfo(struct seq_file *m, struct file *f)
+{
+ struct socket *sock = f->private_data;
+
+ if (sock->ops->show_fdinfo)
+ sock->ops->show_fdinfo(m, sock);
+}
+#else
+#define sock_show_fdinfo NULL
+#endif
+
/*
* Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
* in the operation structures but are done directly via the socketcall() multiplexor.
@@ -150,6 +162,7 @@ static const struct file_operations socket_file_ops = {
.sendpage = sock_sendpage,
.splice_write = generic_splice_sendpage,
.splice_read = sock_splice_read,
+ .show_fdinfo = sock_show_fdinfo,
};
/*
@@ -957,7 +970,7 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
.msg_iocb = iocb};
ssize_t res;
- if (file->f_flags & O_NONBLOCK)
+ if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT;
if (iocb->ki_pos != 0)
@@ -982,7 +995,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (iocb->ki_pos != 0)
return -ESPIPE;
- if (file->f_flags & O_NONBLOCK)
+ if (file->f_flags & O_NONBLOCK || (iocb->ki_flags & IOCB_NOWAIT))
msg.msg_flags = MSG_DONTWAIT;
if (sock->type == SOCK_SEQPACKET)
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 11255e970dd4..ee49a9f1dd4f 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -9,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \
core.o link.o discover.o msg.o \
name_distr.o subscr.o monitor.o name_table.o net.o \
netlink.o netlink_compat.o node.o socket.o eth_media.o \
- topsrv.o socket.o group.o trace.o
+ topsrv.o group.o trace.o
CFLAGS_trace.o += -I$(src)
@@ -20,5 +20,3 @@ tipc-$(CONFIG_TIPC_CRYPTO) += crypto.o
obj-$(CONFIG_TIPC_DIAG) += diag.o
-
-tipc_diag-y := diag.o
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 55aeba681cf4..4c20be08b9c4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -305,17 +305,17 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts,
* @skb: socket buffer to copy
* @method: send method to be used
* @dests: destination nodes for message.
- * @cong_link_cnt: returns number of encountered congested destination links
* Returns 0 if success, otherwise errno
*/
static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
struct tipc_mc_method *method,
- struct tipc_nlist *dests,
- u16 *cong_link_cnt)
+ struct tipc_nlist *dests)
{
struct tipc_msg *hdr, *_hdr;
struct sk_buff_head tmpq;
struct sk_buff *_skb;
+ u16 cong_link_cnt;
+ int rc = 0;
/* Is a cluster supporting with new capabilities ? */
if (!(tipc_net(net)->capabilities & TIPC_MCAST_RBCTL))
@@ -343,18 +343,19 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb,
_hdr = buf_msg(_skb);
msg_set_size(_hdr, MCAST_H_SIZE);
msg_set_is_rcast(_hdr, !msg_is_rcast(hdr));
+ msg_set_errcode(_hdr, TIPC_ERR_NO_PORT);
__skb_queue_head_init(&tmpq);
__skb_queue_tail(&tmpq, _skb);
if (method->rcast)
- tipc_bcast_xmit(net, &tmpq, cong_link_cnt);
+ rc = tipc_bcast_xmit(net, &tmpq, &cong_link_cnt);
else
- tipc_rcast_xmit(net, &tmpq, dests, cong_link_cnt);
+ rc = tipc_rcast_xmit(net, &tmpq, dests, &cong_link_cnt);
/* This queue should normally be empty by now */
__skb_queue_purge(&tmpq);
- return 0;
+ return rc;
}
/* tipc_mcast_xmit - deliver message to indicated destination nodes
@@ -396,9 +397,14 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
msg_set_is_rcast(hdr, method->rcast);
/* Switch method ? */
- if (rcast != method->rcast)
- tipc_mcast_send_sync(net, skb, method,
- dests, cong_link_cnt);
+ if (rcast != method->rcast) {
+ rc = tipc_mcast_send_sync(net, skb, method, dests);
+ if (unlikely(rc)) {
+ pr_err("Unable to send SYN: method %d, rc %d\n",
+ rcast, rc);
+ goto exit;
+ }
+ }
if (method->rcast)
rc = tipc_rcast_xmit(net, pkts, dests, cong_link_cnt);
@@ -562,18 +568,18 @@ int tipc_bclink_reset_stats(struct net *net)
return 0;
}
-static int tipc_bc_link_set_queue_limits(struct net *net, u32 limit)
+static int tipc_bc_link_set_queue_limits(struct net *net, u32 max_win)
{
struct tipc_link *l = tipc_bc_sndlink(net);
if (!l)
return -ENOPROTOOPT;
- if (limit < BCLINK_WIN_MIN)
- limit = BCLINK_WIN_MIN;
- if (limit > TIPC_MAX_LINK_WIN)
+ if (max_win < BCLINK_WIN_MIN)
+ max_win = BCLINK_WIN_MIN;
+ if (max_win > TIPC_MAX_LINK_WIN)
return -EINVAL;
tipc_bcast_lock(net);
- tipc_link_set_queue_limits(l, limit);
+ tipc_link_set_queue_limits(l, BCLINK_WIN_MIN, max_win);
tipc_bcast_unlock(net);
return 0;
}
@@ -683,6 +689,7 @@ int tipc_bcast_init(struct net *net)
if (!tipc_link_bc_create(net, 0, 0,
FB_MTU,
BCLINK_WIN_DEFAULT,
+ BCLINK_WIN_DEFAULT,
0,
&bb->inputq,
NULL,
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index d7ec26bd739d..34ca7b789eba 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -311,7 +311,8 @@ static int tipc_enable_bearer(struct net *net, const char *name,
b->identity = bearer_id;
b->tolerance = m->tolerance;
- b->window = m->window;
+ b->min_win = m->min_win;
+ b->max_win = m->max_win;
b->domain = disc_domain;
b->net_plane = bearer_id + 'A';
b->priority = prio;
@@ -796,7 +797,7 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg,
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, bearer->tolerance))
goto prop_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->window))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bearer->max_win))
goto prop_msg_full;
if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP)
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, bearer->mtu))
@@ -1088,7 +1089,7 @@ int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_PRIO])
b->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
- b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ b->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
if (b->media->type_id != TIPC_MEDIA_TYPE_UDP)
return -EINVAL;
@@ -1142,7 +1143,7 @@ static int __tipc_nl_add_media(struct tipc_nl_msg *msg,
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_TOL, media->tolerance))
goto prop_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->window))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, media->max_win))
goto prop_msg_full;
if (media->type_id == TIPC_MEDIA_TYPE_UDP)
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_MTU, media->mtu))
@@ -1275,7 +1276,7 @@ int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info)
if (props[TIPC_NLA_PROP_PRIO])
m->priority = nla_get_u32(props[TIPC_NLA_PROP_PRIO]);
if (props[TIPC_NLA_PROP_WIN])
- m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ m->max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
if (props[TIPC_NLA_PROP_MTU]) {
if (m->type_id != TIPC_MEDIA_TYPE_UDP)
return -EINVAL;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index d0c79cc6c0c2..bc0023119da2 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -119,7 +119,8 @@ struct tipc_media {
char *raw);
u32 priority;
u32 tolerance;
- u32 window;
+ u32 min_win;
+ u32 max_win;
u32 mtu;
u32 type_id;
u32 hwaddr_len;
@@ -158,7 +159,8 @@ struct tipc_bearer {
struct packet_type pt;
struct rcu_head rcu;
u32 priority;
- u32 window;
+ u32 min_win;
+ u32 max_win;
u32 tolerance;
u32 domain;
u32 identity;
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index b043e8c6397a..bfe43da127c0 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -194,6 +194,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
{
struct tipc_net *tn = tipc_net(net);
struct tipc_msg *hdr = buf_msg(skb);
+ u32 pnet_hash = msg_peer_net_hash(hdr);
u16 caps = msg_node_capabilities(hdr);
bool legacy = tn->legacy_addr_format;
u32 sugg = msg_sugg_node_addr(hdr);
@@ -242,9 +243,8 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb,
return;
if (!tipc_in_scope(legacy, b->domain, src))
return;
- tipc_node_check_dest(net, src, peer_id, b, caps, signature,
- msg_peer_net_hash(hdr), &maddr, &respond,
- &dupl_addr);
+ tipc_node_check_dest(net, src, peer_id, b, caps, signature, pnet_hash,
+ &maddr, &respond, &dupl_addr);
if (dupl_addr)
disc_dupl_alert(b, src, &maddr);
if (!respond)
diff --git a/net/tipc/eth_media.c b/net/tipc/eth_media.c
index f69a2fde9f4a..8b0bb600602d 100644
--- a/net/tipc/eth_media.c
+++ b/net/tipc/eth_media.c
@@ -92,7 +92,8 @@ struct tipc_media eth_media_info = {
.raw2addr = tipc_eth_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
- .window = TIPC_DEF_LINK_WIN,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_MAX_LINK_WIN,
.type_id = TIPC_MEDIA_TYPE_ETH,
.hwaddr_len = ETH_ALEN,
.name = "eth"
diff --git a/net/tipc/ib_media.c b/net/tipc/ib_media.c
index e8c16718e3fa..7aa9ff88458d 100644
--- a/net/tipc/ib_media.c
+++ b/net/tipc/ib_media.c
@@ -42,6 +42,8 @@
#include "core.h"
#include "bearer.h"
+#define TIPC_MAX_IB_LINK_WIN 500
+
/* convert InfiniBand address (media address format) media address to string */
static int tipc_ib_addr2str(struct tipc_media_addr *a, char *str_buf,
int str_size)
@@ -94,7 +96,8 @@ struct tipc_media ib_media_info = {
.raw2addr = tipc_ib_raw2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
- .window = TIPC_DEF_LINK_WIN,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_MAX_IB_LINK_WIN,
.type_id = TIPC_MEDIA_TYPE_IB,
.hwaddr_len = INFINIBAND_ALEN,
.name = "ib"
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 24d4d10756d3..467c53a1fb5c 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -164,7 +164,6 @@ struct tipc_link {
struct sk_buff *target_bskb;
} backlog[5];
u16 snd_nxt;
- u16 window;
/* Reception */
u16 rcv_nxt;
@@ -175,6 +174,12 @@ struct tipc_link {
/* Congestion handling */
struct sk_buff_head wakeupq;
+ u16 window;
+ u16 min_win;
+ u16 ssthresh;
+ u16 max_win;
+ u16 cong_acks;
+ u16 checkpoint;
/* Fragmentation/reassembly */
struct sk_buff *reasm_buf;
@@ -244,12 +249,13 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
static void tipc_link_build_bc_init_msg(struct tipc_link *l,
struct sk_buff_head *xmitq);
-static bool tipc_link_release_pkts(struct tipc_link *l, u16 to);
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data);
+static int tipc_link_release_pkts(struct tipc_link *l, u16 to);
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap);
static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
struct tipc_gap_ack_blks *ga,
struct sk_buff_head *xmitq);
-
+static void tipc_link_update_cwin(struct tipc_link *l, int released,
+ bool retransmitted);
/*
* Simple non-static link routines (i.e. referenced outside this file)
*/
@@ -308,9 +314,14 @@ u32 tipc_link_id(struct tipc_link *l)
return l->peer_bearer_id << 16 | l->bearer_id;
}
-int tipc_link_window(struct tipc_link *l)
+int tipc_link_min_win(struct tipc_link *l)
{
- return l->window;
+ return l->min_win;
+}
+
+int tipc_link_max_win(struct tipc_link *l)
+{
+ return l->max_win;
}
int tipc_link_prio(struct tipc_link *l)
@@ -436,7 +447,8 @@ u32 tipc_link_state(struct tipc_link *l)
* @net_plane: network plane (A,B,c..) this link belongs to
* @mtu: mtu to be advertised by link
* @priority: priority to be used by link
- * @window: send window to be used by link
+ * @min_win: minimal send window to be used by link
+ * @max_win: maximal send window to be used by link
* @session: session to be used by link
* @ownnode: identity of own node
* @peer: node id of peer node
@@ -451,7 +463,7 @@ u32 tipc_link_state(struct tipc_link *l)
*/
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
int tolerance, char net_plane, u32 mtu, int priority,
- int window, u32 session, u32 self,
+ u32 min_win, u32 max_win, u32 session, u32 self,
u32 peer, u8 *peer_id, u16 peer_caps,
struct tipc_link *bc_sndlink,
struct tipc_link *bc_rcvlink,
@@ -495,7 +507,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
l->advertised_mtu = mtu;
l->mtu = mtu;
l->priority = priority;
- tipc_link_set_queue_limits(l, window);
+ tipc_link_set_queue_limits(l, min_win, max_win);
l->ackers = 1;
l->bc_sndlink = bc_sndlink;
l->bc_rcvlink = bc_rcvlink;
@@ -523,7 +535,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
* Returns true if link was created, otherwise false
*/
bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
- int mtu, int window, u16 peer_caps,
+ int mtu, u32 min_win, u32 max_win, u16 peer_caps,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
struct tipc_link *bc_sndlink,
@@ -531,9 +543,9 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
{
struct tipc_link *l;
- if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, window,
- 0, ownnode, peer, NULL, peer_caps, bc_sndlink,
- NULL, inputq, namedq, link))
+ if (!tipc_link_create(net, "", MAX_BEARERS, 0, 'Z', mtu, 0, min_win,
+ max_win, 0, ownnode, peer, NULL, peer_caps,
+ bc_sndlink, NULL, inputq, namedq, link))
return false;
l = *link;
@@ -772,6 +784,8 @@ bool tipc_link_too_silent(struct tipc_link *l)
return (l->silent_intv_cnt + 2 > l->abort_limit);
}
+static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
+ u16 from, u16 to, struct sk_buff_head *xmitq);
/* tipc_link_timeout - perform periodic task as instructed from node timeout
*/
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
@@ -804,6 +818,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
probe |= l->silent_intv_cnt;
if (probe || mstate->monitoring)
l->silent_intv_cnt++;
+ if (l->snd_nxt == l->checkpoint) {
+ tipc_link_update_cwin(l, 0, 0);
+ probe = true;
+ }
+ l->checkpoint = l->snd_nxt;
break;
case LINK_RESET:
setup = l->rst_cnt++ <= 4;
@@ -959,7 +978,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
int pkt_cnt = skb_queue_len(list);
int imp = msg_importance(hdr);
unsigned int mss = tipc_link_mss(l);
- unsigned int maxwin = l->window;
+ unsigned int cwin = l->window;
unsigned int mtu = l->mtu;
bool new_bundle;
int rc = 0;
@@ -988,7 +1007,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
/* Prepare each packet for sending, and add to relevant queue: */
while ((skb = __skb_dequeue(list))) {
- if (likely(skb_queue_len(transmq) < maxwin)) {
+ if (likely(skb_queue_len(transmq) < cwin)) {
hdr = buf_msg(skb);
msg_set_seqno(hdr, seqno);
msg_set_ack(hdr, ack);
@@ -1035,17 +1054,61 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list,
return rc;
}
+static void tipc_link_update_cwin(struct tipc_link *l, int released,
+ bool retransmitted)
+{
+ int bklog_len = skb_queue_len(&l->backlogq);
+ struct sk_buff_head *txq = &l->transmq;
+ int txq_len = skb_queue_len(txq);
+ u16 cwin = l->window;
+
+ /* Enter fast recovery */
+ if (unlikely(retransmitted)) {
+ l->ssthresh = max_t(u16, l->window / 2, 300);
+ l->window = l->ssthresh;
+ return;
+ }
+ /* Enter slow start */
+ if (unlikely(!released)) {
+ l->ssthresh = max_t(u16, l->window / 2, 300);
+ l->window = l->min_win;
+ return;
+ }
+ /* Don't increase window if no pressure on the transmit queue */
+ if (txq_len + bklog_len < cwin)
+ return;
+
+ /* Don't increase window if there are holes the transmit queue */
+ if (txq_len && l->snd_nxt - buf_seqno(skb_peek(txq)) != txq_len)
+ return;
+
+ l->cong_acks += released;
+
+ /* Slow start */
+ if (cwin <= l->ssthresh) {
+ l->window = min_t(u16, cwin + released, l->max_win);
+ return;
+ }
+ /* Congestion avoidance */
+ if (l->cong_acks < cwin)
+ return;
+ l->window = min_t(u16, ++cwin, l->max_win);
+ l->cong_acks = 0;
+}
+
static void tipc_link_advance_backlog(struct tipc_link *l,
struct sk_buff_head *xmitq)
{
+ u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+ struct sk_buff_head *txq = &l->transmq;
struct sk_buff *skb, *_skb;
- struct tipc_msg *hdr;
- u16 seqno = l->snd_nxt;
u16 ack = l->rcv_nxt - 1;
- u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+ u16 seqno = l->snd_nxt;
+ struct tipc_msg *hdr;
+ u16 cwin = l->window;
u32 imp;
- while (skb_queue_len(&l->transmq) < l->window) {
+ while (skb_queue_len(txq) < cwin) {
skb = skb_peek(&l->backlogq);
if (!skb)
break;
@@ -1141,6 +1204,7 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
u16 ack = l->rcv_nxt - 1;
+ int retransmitted = 0;
struct tipc_msg *hdr;
int rc = 0;
@@ -1160,7 +1224,6 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
continue;
if (more(msg_seqno(hdr), to))
break;
-
if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
continue;
TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
@@ -1173,11 +1236,12 @@ static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
-
+ retransmitted++;
/* Increase actual retrans counter & mark first time */
if (!TIPC_SKB_CB(skb)->retr_cnt++)
TIPC_SKB_CB(skb)->retr_stamp = jiffies;
}
+ tipc_link_update_cwin(l, 0, retransmitted);
return 0;
}
@@ -1338,9 +1402,9 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb,
return rc;
}
-static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
+static int tipc_link_release_pkts(struct tipc_link *l, u16 acked)
{
- bool released = false;
+ int released = 0;
struct sk_buff *skb, *tmp;
skb_queue_walk_safe(&l->transmq, skb, tmp) {
@@ -1348,7 +1412,7 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
break;
__skb_unlink(skb, &l->transmq);
kfree_skb(skb);
- released = true;
+ released++;
}
return released;
}
@@ -1359,14 +1423,14 @@ static bool tipc_link_release_pkts(struct tipc_link *l, u16 acked)
*
* returns the actual allocated memory size
*/
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data)
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap)
{
struct sk_buff *skb = skb_peek(&l->deferdq);
struct tipc_gap_ack_blks *ga = data;
u16 len, expect, seqno = 0;
u8 n = 0;
- if (!skb)
+ if (!skb || !gap)
goto exit;
expect = buf_seqno(skb);
@@ -1417,8 +1481,10 @@ static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
struct sk_buff *skb, *_skb, *tmp;
struct tipc_msg *hdr;
u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
+ bool retransmitted = false;
u16 ack = l->rcv_nxt - 1;
bool passed = false;
+ u16 released = 0;
u16 seqno, n = 0;
int rc = 0;
@@ -1430,6 +1496,7 @@ next_gap_ack:
/* release skb */
__skb_unlink(skb, &l->transmq);
kfree_skb(skb);
+ released++;
} else if (less_eq(seqno, acked + gap)) {
/* First, check if repeated retrans failures occurs? */
if (!passed && link_retransmit_failure(l, l, &rc))
@@ -1449,7 +1516,7 @@ next_gap_ack:
_skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, _skb);
l->stats.retransmitted++;
-
+ retransmitted = true;
/* Increase actual retrans counter & mark first time */
if (!TIPC_SKB_CB(skb)->retr_cnt++)
TIPC_SKB_CB(skb)->retr_stamp = jiffies;
@@ -1463,7 +1530,10 @@ next_gap_ack:
goto next_gap_ack;
}
}
-
+ if (released || retransmitted)
+ tipc_link_update_cwin(l, released, retransmitted);
+ if (released)
+ tipc_link_advance_backlog(l, xmitq);
return 0;
}
@@ -1487,7 +1557,6 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq)
l->snd_nxt = l->rcv_nxt;
return TIPC_LINK_SND_STATE;
}
-
/* Unicast ACK */
l->rcv_unacked = 0;
l->stats.sent_acks++;
@@ -1521,7 +1590,8 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
struct sk_buff_head *xmitq)
{
u32 def_cnt = ++l->stats.deferred_recv;
- u32 defq_len = skb_queue_len(&l->deferdq);
+ struct sk_buff_head *dfq = &l->deferdq;
+ u32 defq_len = skb_queue_len(dfq);
int match1, match2;
if (link_is_bc_rcvlink(l)) {
@@ -1532,8 +1602,12 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
return 0;
}
- if (defq_len >= 3 && !((defq_len - 3) % 16))
- tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq);
+ if (defq_len >= 3 && !((defq_len - 3) % 16)) {
+ u16 rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
+
+ tipc_link_build_proto_msg(l, STATE_MSG, 0, 0,
+ rcvgap, 0, 0, xmitq);
+ }
return 0;
}
@@ -1548,6 +1622,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *defq = &l->deferdq;
struct tipc_msg *hdr = buf_msg(skb);
u16 seqno, rcv_nxt, win_lim;
+ int released = 0;
int rc = 0;
/* Verify and update link state */
@@ -1566,21 +1641,17 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
if (unlikely(!link_is_up(l))) {
if (l->state == LINK_ESTABLISHING)
rc = TIPC_LINK_UP_EVT;
- goto drop;
+ kfree_skb(skb);
+ break;
}
/* Drop if outside receive window */
if (unlikely(less(seqno, rcv_nxt) || more(seqno, win_lim))) {
l->stats.duplicates++;
- goto drop;
- }
-
- /* Forward queues and wake up waiting users */
- if (likely(tipc_link_release_pkts(l, msg_ack(hdr)))) {
- tipc_link_advance_backlog(l, xmitq);
- if (unlikely(!skb_queue_empty(&l->wakeupq)))
- link_prepare_wakeup(l);
+ kfree_skb(skb);
+ break;
}
+ released += tipc_link_release_pkts(l, msg_ack(hdr));
/* Defer delivery if sequence gap */
if (unlikely(seqno != rcv_nxt)) {
@@ -1603,9 +1674,13 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
break;
} while ((skb = __tipc_skb_dequeue(defq, l->rcv_nxt)));
- return rc;
-drop:
- kfree_skb(skb);
+ /* Forward queues and wake up waiting users */
+ if (released) {
+ tipc_link_update_cwin(l, released, 0);
+ tipc_link_advance_backlog(l, xmitq);
+ if (unlikely(!skb_queue_empty(&l->wakeupq)))
+ link_prepare_wakeup(l);
+ }
return rc;
}
@@ -1631,7 +1706,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
if (!tipc_link_is_up(l) && (mtyp == STATE_MSG))
return;
- if (!skb_queue_empty(dfq))
+ if ((probe || probe_reply) && !skb_queue_empty(dfq))
rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt;
skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE,
@@ -1664,7 +1739,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
msg_set_probe(hdr, probe);
msg_set_is_keepalive(hdr, probe || probe_reply);
if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
- glen = tipc_build_gap_ack_blks(l, data);
+ glen = tipc_build_gap_ack_blks(l, data, rcvgap);
tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id);
msg_set_size(hdr, INT_H_SIZE + glen + dlen);
skb_trim(skb, INT_H_SIZE + glen + dlen);
@@ -2074,19 +2149,18 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
&l->mon_state, l->bearer_id);
/* Send NACK if peer has sent pkts we haven't received yet */
- if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l))
+ if ((reply || msg_is_keepalive(hdr)) &&
+ more(peers_snd_nxt, rcv_nxt) &&
+ !tipc_link_is_synching(l) &&
+ skb_queue_empty(&l->deferdq))
rcvgap = peers_snd_nxt - l->rcv_nxt;
if (rcvgap || reply)
tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
rcvgap, 0, 0, xmitq);
rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq);
-
- /* If NACK, retransmit will now start at right position */
if (gap)
l->stats.recv_nacks++;
-
- tipc_link_advance_backlog(l, xmitq);
if (unlikely(!skb_queue_empty(&l->wakeupq)))
link_prepare_wakeup(l);
}
@@ -2305,15 +2379,18 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
return 0;
}
-void tipc_link_set_queue_limits(struct tipc_link *l, u32 win)
+void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win)
{
int max_bulk = TIPC_MAX_PUBL / (l->mtu / ITEM_SIZE);
- l->window = win;
- l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win);
- l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2);
- l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3);
- l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4);
+ l->min_win = min_win;
+ l->ssthresh = max_win;
+ l->max_win = max_win;
+ l->window = min_win;
+ l->backlog[TIPC_LOW_IMPORTANCE].limit = min_win * 2;
+ l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = min_win * 4;
+ l->backlog[TIPC_HIGH_IMPORTANCE].limit = min_win * 6;
+ l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = min_win * 8;
l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk;
}
@@ -2366,10 +2443,10 @@ int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[])
}
if (props[TIPC_NLA_PROP_WIN]) {
- u32 win;
+ u32 max_win;
- win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
- if ((win < TIPC_MIN_LINK_WIN) || (win > TIPC_MAX_LINK_WIN))
+ max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ if (max_win < TIPC_DEF_LINK_WIN || max_win > TIPC_MAX_LINK_WIN)
return -EINVAL;
}
@@ -2605,7 +2682,7 @@ int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
prop = nla_nest_start_noflag(msg->skb, TIPC_NLA_LINK_PROP);
if (!prop)
goto attr_msg_full;
- if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->window))
+ if (nla_put_u32(msg->skb, TIPC_NLA_PROP_WIN, bcl->max_win))
goto prop_msg_full;
if (nla_put_u32(msg->skb, TIPC_NLA_PROP_BROADCAST, bc_mode))
goto prop_msg_full;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index c09e9d49d0a3..d3c1c3fc1659 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -73,7 +73,7 @@ enum {
bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
int tolerance, char net_plane, u32 mtu, int priority,
- int window, u32 session, u32 ownnode,
+ u32 min_win, u32 max_win, u32 session, u32 ownnode,
u32 peer, u8 *peer_id, u16 peer_caps,
struct tipc_link *bc_sndlink,
struct tipc_link *bc_rcvlink,
@@ -81,7 +81,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
struct sk_buff_head *namedq,
struct tipc_link **link);
bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
- int mtu, int window, u16 peer_caps,
+ int mtu, u32 min_win, u32 max_win, u16 peer_caps,
struct sk_buff_head *inputq,
struct sk_buff_head *namedq,
struct tipc_link *bc_sndlink,
@@ -115,7 +115,8 @@ char *tipc_link_name_ext(struct tipc_link *l, char *buf);
u32 tipc_link_state(struct tipc_link *l);
char tipc_link_plane(struct tipc_link *l);
int tipc_link_prio(struct tipc_link *l);
-int tipc_link_window(struct tipc_link *l);
+int tipc_link_min_win(struct tipc_link *l);
+int tipc_link_max_win(struct tipc_link *l);
void tipc_link_update_caps(struct tipc_link *l, u16 capabilities);
bool tipc_link_validate_msg(struct tipc_link *l, struct tipc_msg *hdr);
unsigned long tipc_link_tolerance(struct tipc_link *l);
@@ -124,7 +125,7 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol,
void tipc_link_set_prio(struct tipc_link *l, u32 prio,
struct sk_buff_head *xmitq);
void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit);
-void tipc_link_set_queue_limits(struct tipc_link *l, u32 window);
+void tipc_link_set_queue_limits(struct tipc_link *l, u32 min_win, u32 max_win);
int __tipc_nl_add_link(struct net *net, struct tipc_nl_msg *msg,
struct tipc_link *link, int nlflags);
int tipc_nl_parse_link_prop(struct nlattr *prop, struct nlattr *props[]);
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 92d04dc2a44b..359b2bc888cf 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -36,6 +36,7 @@
#include <net/sock.h>
#include <linux/list_sort.h>
+#include <linux/rbtree_augmented.h>
#include "core.h"
#include "netlink.h"
#include "name_table.h"
@@ -51,6 +52,7 @@
* @lower: service range lower bound
* @upper: service range upper bound
* @tree_node: member of service range RB tree
+ * @max: largest 'upper' in this node subtree
* @local_publ: list of identical publications made from this node
* Used by closest_first lookup and multicast lookup algorithm
* @all_publ: all publications identical to this one, whatever node and scope
@@ -60,6 +62,7 @@ struct service_range {
u32 lower;
u32 upper;
struct rb_node tree_node;
+ u32 max;
struct list_head local_publ;
struct list_head all_publ;
};
@@ -84,6 +87,130 @@ struct tipc_service {
struct rcu_head rcu;
};
+#define service_range_upper(sr) ((sr)->upper)
+RB_DECLARE_CALLBACKS_MAX(static, sr_callbacks,
+ struct service_range, tree_node, u32, max,
+ service_range_upper)
+
+#define service_range_entry(rbtree_node) \
+ (container_of(rbtree_node, struct service_range, tree_node))
+
+#define service_range_overlap(sr, start, end) \
+ ((sr)->lower <= (end) && (sr)->upper >= (start))
+
+/**
+ * service_range_foreach_match - iterate over tipc service rbtree for each
+ * range match
+ * @sr: the service range pointer as a loop cursor
+ * @sc: the pointer to tipc service which holds the service range rbtree
+ * @start, end: the range (end >= start) for matching
+ */
+#define service_range_foreach_match(sr, sc, start, end) \
+ for (sr = service_range_match_first((sc)->ranges.rb_node, \
+ start, \
+ end); \
+ sr; \
+ sr = service_range_match_next(&(sr)->tree_node, \
+ start, \
+ end))
+
+/**
+ * service_range_match_first - find first service range matching a range
+ * @n: the root node of service range rbtree for searching
+ * @start, end: the range (end >= start) for matching
+ *
+ * Return: the leftmost service range node in the rbtree that overlaps the
+ * specific range if any. Otherwise, returns NULL.
+ */
+static struct service_range *service_range_match_first(struct rb_node *n,
+ u32 start, u32 end)
+{
+ struct service_range *sr;
+ struct rb_node *l, *r;
+
+ /* Non overlaps in tree at all? */
+ if (!n || service_range_entry(n)->max < start)
+ return NULL;
+
+ while (n) {
+ l = n->rb_left;
+ if (l && service_range_entry(l)->max >= start) {
+ /* A leftmost overlap range node must be one in the left
+ * subtree. If not, it has lower > end, then nodes on
+ * the right side cannot satisfy the condition either.
+ */
+ n = l;
+ continue;
+ }
+
+ /* No one in the left subtree can match, return if this node is
+ * an overlap i.e. leftmost.
+ */
+ sr = service_range_entry(n);
+ if (service_range_overlap(sr, start, end))
+ return sr;
+
+ /* Ok, try to lookup on the right side */
+ r = n->rb_right;
+ if (sr->lower <= end &&
+ r && service_range_entry(r)->max >= start) {
+ n = r;
+ continue;
+ }
+ break;
+ }
+
+ return NULL;
+}
+
+/**
+ * service_range_match_next - find next service range matching a range
+ * @n: a node in service range rbtree from which the searching starts
+ * @start, end: the range (end >= start) for matching
+ *
+ * Return: the next service range node to the given node in the rbtree that
+ * overlaps the specific range if any. Otherwise, returns NULL.
+ */
+static struct service_range *service_range_match_next(struct rb_node *n,
+ u32 start, u32 end)
+{
+ struct service_range *sr;
+ struct rb_node *p, *r;
+
+ while (n) {
+ r = n->rb_right;
+ if (r && service_range_entry(r)->max >= start)
+ /* A next overlap range node must be one in the right
+ * subtree. If not, it has lower > end, then any next
+ * successor (- an ancestor) of this node cannot
+ * satisfy the condition either.
+ */
+ return service_range_match_first(r, start, end);
+
+ /* No one in the right subtree can match, go up to find an
+ * ancestor of this node which is parent of a left-hand child.
+ */
+ while ((p = rb_parent(n)) && n == p->rb_right)
+ n = p;
+ if (!p)
+ break;
+
+ /* Return if this ancestor is an overlap */
+ sr = service_range_entry(p);
+ if (service_range_overlap(sr, start, end))
+ return sr;
+
+ /* Ok, try to lookup more from this ancestor */
+ if (sr->lower <= end) {
+ n = p;
+ continue;
+ }
+ break;
+ }
+
+ return NULL;
+}
+
static int hash(int x)
{
return x & (TIPC_NAMETBL_SIZE - 1);
@@ -139,84 +266,51 @@ static struct tipc_service *tipc_service_create(u32 type, struct hlist_head *hd)
return service;
}
-/**
- * tipc_service_first_range - find first service range in tree matching instance
- *
- * Very time-critical, so binary search through range rb tree
- */
-static struct service_range *tipc_service_first_range(struct tipc_service *sc,
- u32 instance)
-{
- struct rb_node *n = sc->ranges.rb_node;
- struct service_range *sr;
-
- while (n) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->lower > instance)
- n = n->rb_left;
- else if (sr->upper < instance)
- n = n->rb_right;
- else
- return sr;
- }
- return NULL;
-}
-
/* tipc_service_find_range - find service range matching publication parameters
*/
static struct service_range *tipc_service_find_range(struct tipc_service *sc,
u32 lower, u32 upper)
{
- struct rb_node *n = sc->ranges.rb_node;
struct service_range *sr;
- sr = tipc_service_first_range(sc, lower);
- if (!sr)
- return NULL;
-
- /* Look for exact match */
- for (n = &sr->tree_node; n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper == upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
+ /* Look for exact match */
+ if (sr->lower == lower && sr->upper == upper)
+ return sr;
}
- if (!n || sr->lower != lower || sr->upper != upper)
- return NULL;
- return sr;
+ return NULL;
}
static struct service_range *tipc_service_create_range(struct tipc_service *sc,
u32 lower, u32 upper)
{
struct rb_node **n, *parent = NULL;
- struct service_range *sr, *tmp;
+ struct service_range *sr;
n = &sc->ranges.rb_node;
while (*n) {
- tmp = container_of(*n, struct service_range, tree_node);
parent = *n;
- tmp = container_of(parent, struct service_range, tree_node);
- if (lower < tmp->lower)
- n = &(*n)->rb_left;
- else if (lower > tmp->lower)
- n = &(*n)->rb_right;
- else if (upper < tmp->upper)
- n = &(*n)->rb_left;
- else if (upper > tmp->upper)
- n = &(*n)->rb_right;
+ sr = service_range_entry(parent);
+ if (lower == sr->lower && upper == sr->upper)
+ return sr;
+ if (sr->max < upper)
+ sr->max = upper;
+ if (lower <= sr->lower)
+ n = &parent->rb_left;
else
- return tmp;
+ n = &parent->rb_right;
}
sr = kzalloc(sizeof(*sr), GFP_ATOMIC);
if (!sr)
return NULL;
sr->lower = lower;
sr->upper = upper;
+ sr->max = upper;
INIT_LIST_HEAD(&sr->local_publ);
INIT_LIST_HEAD(&sr->all_publ);
rb_link_node(&sr->tree_node, parent, n);
- rb_insert_color(&sr->tree_node, &sc->ranges);
+ rb_insert_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
return sr;
}
@@ -310,7 +404,6 @@ static void tipc_service_subscribe(struct tipc_service *service,
struct list_head publ_list;
struct service_range *sr;
struct tipc_name_seq ns;
- struct rb_node *n;
u32 filter;
ns.type = tipc_sub_read(sb, seq.type);
@@ -325,13 +418,7 @@ static void tipc_service_subscribe(struct tipc_service *service,
return;
INIT_LIST_HEAD(&publ_list);
- for (n = rb_first(&service->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->lower > ns.upper)
- break;
- if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper))
- continue;
-
+ service_range_foreach_match(sr, service, ns.lower, ns.upper) {
first = NULL;
list_for_each_entry(p, &sr->all_publ, all_publ) {
if (filter & TIPC_SUB_PORTS)
@@ -425,7 +512,7 @@ struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type,
/* Remove service range item if this was its last publication */
if (list_empty(&sr->all_publ)) {
- rb_erase(&sr->tree_node, &sc->ranges);
+ rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
kfree(sr);
}
@@ -473,34 +560,39 @@ u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *dnode)
rcu_read_lock();
sc = tipc_service_find(net, type);
if (unlikely(!sc))
- goto not_found;
+ goto exit;
spin_lock_bh(&sc->lock);
- sr = tipc_service_first_range(sc, instance);
- if (unlikely(!sr))
- goto no_match;
-
- /* Select lookup algorithm: local, closest-first or round-robin */
- if (*dnode == self) {
- list = &sr->local_publ;
- if (list_empty(list))
- goto no_match;
- p = list_first_entry(list, struct publication, local_publ);
- list_move_tail(&p->local_publ, &sr->local_publ);
- } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
- list = &sr->local_publ;
- p = list_first_entry(list, struct publication, local_publ);
- list_move_tail(&p->local_publ, &sr->local_publ);
- } else {
- list = &sr->all_publ;
- p = list_first_entry(list, struct publication, all_publ);
- list_move_tail(&p->all_publ, &sr->all_publ);
+ service_range_foreach_match(sr, sc, instance, instance) {
+ /* Select lookup algo: local, closest-first or round-robin */
+ if (*dnode == self) {
+ list = &sr->local_publ;
+ if (list_empty(list))
+ continue;
+ p = list_first_entry(list, struct publication,
+ local_publ);
+ list_move_tail(&p->local_publ, &sr->local_publ);
+ } else if (legacy && !*dnode && !list_empty(&sr->local_publ)) {
+ list = &sr->local_publ;
+ p = list_first_entry(list, struct publication,
+ local_publ);
+ list_move_tail(&p->local_publ, &sr->local_publ);
+ } else {
+ list = &sr->all_publ;
+ p = list_first_entry(list, struct publication,
+ all_publ);
+ list_move_tail(&p->all_publ, &sr->all_publ);
+ }
+ port = p->port;
+ node = p->node;
+ /* Todo: as for legacy, pick the first matching range only, a
+ * "true" round-robin will be performed as needed.
+ */
+ break;
}
- port = p->port;
- node = p->node;
-no_match:
spin_unlock_bh(&sc->lock);
-not_found:
+
+exit:
rcu_read_unlock();
*dnode = node;
return port;
@@ -523,7 +615,8 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope,
spin_lock_bh(&sc->lock);
- sr = tipc_service_first_range(sc, instance);
+ /* Todo: a full search i.e. service_range_foreach_match() instead? */
+ sr = service_range_match_first(sc->ranges.rb_node, instance, instance);
if (!sr)
goto no_match;
@@ -552,7 +645,6 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
- struct rb_node *n;
rcu_read_lock();
sc = tipc_service_find(net, type);
@@ -560,13 +652,7 @@ void tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper,
goto exit;
spin_lock_bh(&sc->lock);
-
- for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper < lower)
- continue;
- if (sr->lower > upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
list_for_each_entry(p, &sr->local_publ, local_publ) {
if (p->scope == scope || (!exact && p->scope < scope))
tipc_dest_push(dports, 0, p->port);
@@ -587,7 +673,6 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
struct service_range *sr;
struct tipc_service *sc;
struct publication *p;
- struct rb_node *n;
rcu_read_lock();
sc = tipc_service_find(net, type);
@@ -595,13 +680,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower,
goto exit;
spin_lock_bh(&sc->lock);
-
- for (n = rb_first(&sc->ranges); n; n = rb_next(n)) {
- sr = container_of(n, struct service_range, tree_node);
- if (sr->upper < lower)
- continue;
- if (sr->lower > upper)
- break;
+ service_range_foreach_match(sr, sc, lower, upper) {
list_for_each_entry(p, &sr->all_publ, all_publ) {
tipc_nlist_add(nodes, p->node);
}
@@ -799,7 +878,7 @@ static void tipc_service_delete(struct net *net, struct tipc_service *sc)
tipc_service_remove_publ(sr, p->node, p->key);
kfree_rcu(p, rcu);
}
- rb_erase(&sr->tree_node, &sc->ranges);
+ rb_erase_augmented(&sr->tree_node, &sc->ranges, &sr_callbacks);
kfree(sr);
}
hlist_del_init_rcu(&sc->service_list);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index 2de3cec9929d..85400e4242de 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -302,3 +302,59 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info)
return err;
}
+
+static int __tipc_nl_addr_legacy_get(struct net *net, struct tipc_nl_msg *msg)
+{
+ struct tipc_net *tn = tipc_net(net);
+ struct nlattr *attrs;
+ void *hdr;
+
+ hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family,
+ 0, TIPC_NL_ADDR_LEGACY_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ attrs = nla_nest_start(msg->skb, TIPC_NLA_NET);
+ if (!attrs)
+ goto msg_full;
+
+ if (tn->legacy_addr_format)
+ if (nla_put_flag(msg->skb, TIPC_NLA_NET_ADDR_LEGACY))
+ goto attr_msg_full;
+
+ nla_nest_end(msg->skb, attrs);
+ genlmsg_end(msg->skb, hdr);
+
+ return 0;
+
+attr_msg_full:
+ nla_nest_cancel(msg->skb, attrs);
+msg_full:
+ genlmsg_cancel(msg->skb, hdr);
+
+ return -EMSGSIZE;
+}
+
+int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net *net = sock_net(skb->sk);
+ struct tipc_nl_msg msg;
+ struct sk_buff *rep;
+ int err;
+
+ rep = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!rep)
+ return -ENOMEM;
+
+ msg.skb = rep;
+ msg.portid = info->snd_portid;
+ msg.seq = info->snd_seq;
+
+ err = __tipc_nl_addr_legacy_get(net, &msg);
+ if (err) {
+ nlmsg_free(msg.skb);
+ return err;
+ }
+
+ return genlmsg_reply(msg.skb, info);
+}
diff --git a/net/tipc/net.h b/net/tipc/net.h
index b7f2e364eb99..6740d97c706e 100644
--- a/net/tipc/net.h
+++ b/net/tipc/net.h
@@ -47,5 +47,6 @@ void tipc_net_stop(struct net *net);
int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb);
int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info);
+int tipc_nl_net_addr_legacy_get(struct sk_buff *skb, struct genl_info *info);
#endif
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index e53231bd23b4..7c35094c20b8 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -83,6 +83,7 @@ const struct nla_policy tipc_nl_net_policy[TIPC_NLA_NET_MAX + 1] = {
[TIPC_NLA_NET_ADDR] = { .type = NLA_U32 },
[TIPC_NLA_NET_NODEID] = { .type = NLA_U64 },
[TIPC_NLA_NET_NODEID_W1] = { .type = NLA_U64 },
+ [TIPC_NLA_NET_ADDR_LEGACY] = { .type = NLA_FLAG }
};
const struct nla_policy tipc_nl_link_policy[TIPC_NLA_LINK_MAX + 1] = {
@@ -273,6 +274,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
.doit = tipc_nl_node_flush_key,
},
#endif
+ {
+ .cmd = TIPC_NL_ADDR_LEGACY_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = tipc_nl_net_addr_legacy_get,
+ },
};
struct genl_family tipc_genl_family __ro_after_init = {
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 0254bb7e418b..217516357ef2 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -204,8 +204,8 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
return -ENOMEM;
}
- attrbuf = kmalloc_array(tipc_genl_family.maxattr + 1,
- sizeof(struct nlattr *), GFP_KERNEL);
+ attrbuf = kcalloc(tipc_genl_family.maxattr + 1,
+ sizeof(struct nlattr *), GFP_KERNEL);
if (!attrbuf) {
err = -ENOMEM;
goto err_out;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index ab04e00cb95b..99b28b69fc17 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1139,7 +1139,8 @@ void tipc_node_check_dest(struct net *net, u32 addr,
snd_l = tipc_bc_sndlink(net);
if (!tipc_link_bc_create(net, tipc_own_addr(net),
addr, U16_MAX,
- tipc_link_window(snd_l),
+ tipc_link_min_win(snd_l),
+ tipc_link_max_win(snd_l),
n->capabilities,
&n->bc_entry.inputq1,
&n->bc_entry.namedq, snd_l,
@@ -1233,7 +1234,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
get_random_bytes(&session, sizeof(u16));
if (!tipc_link_create(net, if_name, b->identity, b->tolerance,
b->net_plane, b->mtu, b->priority,
- b->window, session,
+ b->min_win, b->max_win, session,
tipc_own_addr(net), addr, peer_id,
n->capabilities,
tipc_bc_sndlink(n->net), n->bc_entry.link,
@@ -2360,8 +2361,7 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
if (attrs[TIPC_NLA_LINK_PROP]) {
struct nlattr *props[TIPC_NLA_PROP_MAX + 1];
- err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP],
- props);
+ err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_LINK_PROP], props);
if (err) {
res = err;
goto out;
@@ -2380,10 +2380,12 @@ int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info)
tipc_link_set_prio(link, prio, &xmitq);
}
if (props[TIPC_NLA_PROP_WIN]) {
- u32 win;
+ u32 max_win;
- win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
- tipc_link_set_queue_limits(link, win);
+ max_win = nla_get_u32(props[TIPC_NLA_PROP_WIN]);
+ tipc_link_set_queue_limits(link,
+ tipc_link_min_win(link),
+ max_win);
}
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 41688da233ab..f9b4fb92c0b1 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -287,12 +287,12 @@ static void tipc_sk_respond(struct sock *sk, struct sk_buff *skb, int err)
*
* Caller must hold socket lock
*/
-static void tsk_rej_rx_queue(struct sock *sk)
+static void tsk_rej_rx_queue(struct sock *sk, int error)
{
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sk->sk_receive_queue)))
- tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT);
+ tipc_sk_respond(sk, skb, error);
}
static bool tipc_sk_connected(struct sock *sk)
@@ -545,34 +545,45 @@ static void __tipc_shutdown(struct socket *sock, int error)
/* Remove pending SYN */
__skb_queue_purge(&sk->sk_write_queue);
- /* Reject all unreceived messages, except on an active connection
- * (which disconnects locally & sends a 'FIN+' to peer).
- */
- while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
- if (TIPC_SKB_CB(skb)->bytes_read) {
- kfree_skb(skb);
- continue;
- }
- if (!tipc_sk_type_connectionless(sk) &&
- sk->sk_state != TIPC_DISCONNECTING) {
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
- tipc_node_remove_conn(net, dnode, tsk->portid);
- }
- tipc_sk_respond(sk, skb, error);
+ /* Remove partially received buffer if any */
+ skb = skb_peek(&sk->sk_receive_queue);
+ if (skb && TIPC_SKB_CB(skb)->bytes_read) {
+ __skb_unlink(skb, &sk->sk_receive_queue);
+ kfree_skb(skb);
}
- if (tipc_sk_type_connectionless(sk))
+ /* Reject all unreceived messages if connectionless */
+ if (tipc_sk_type_connectionless(sk)) {
+ tsk_rej_rx_queue(sk, error);
return;
+ }
- if (sk->sk_state != TIPC_DISCONNECTING) {
+ switch (sk->sk_state) {
+ case TIPC_CONNECTING:
+ case TIPC_ESTABLISHED:
+ tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ tipc_node_remove_conn(net, dnode, tsk->portid);
+ /* Send a FIN+/- to its peer */
+ skb = __skb_dequeue(&sk->sk_receive_queue);
+ if (skb) {
+ __skb_queue_purge(&sk->sk_receive_queue);
+ tipc_sk_respond(sk, skb, error);
+ break;
+ }
skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode,
tsk_own_node(tsk), tsk_peer_port(tsk),
tsk->portid, error);
if (skb)
tipc_node_xmit_skb(net, skb, dnode, tsk->portid);
- tipc_node_remove_conn(net, dnode, tsk->portid);
- tipc_set_sk_state(sk, TIPC_DISCONNECTING);
+ break;
+ case TIPC_LISTEN:
+ /* Reject all SYN messages */
+ tsk_rej_rx_queue(sk, error);
+ break;
+ default:
+ __skb_queue_purge(&sk->sk_receive_queue);
+ break;
}
}
@@ -1364,8 +1375,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
struct tipc_msg *hdr = &tsk->phdr;
struct tipc_name_seq *seq;
struct sk_buff_head pkts;
- u32 dport, dnode = 0;
- u32 type, inst;
+ u32 dport = 0, dnode = 0;
+ u32 type = 0, inst = 0;
int mtu, rc;
if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE))
@@ -1418,23 +1429,11 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
type = dest->addr.name.name.type;
inst = dest->addr.name.name.instance;
dnode = dest->addr.name.domain;
- msg_set_type(hdr, TIPC_NAMED_MSG);
- msg_set_hdr_sz(hdr, NAMED_H_SIZE);
- msg_set_nametype(hdr, type);
- msg_set_nameinst(hdr, inst);
- msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
dport = tipc_nametbl_translate(net, type, inst, &dnode);
- msg_set_destnode(hdr, dnode);
- msg_set_destport(hdr, dport);
if (unlikely(!dport && !dnode))
return -EHOSTUNREACH;
} else if (dest->addrtype == TIPC_ADDR_ID) {
dnode = dest->addr.id.node;
- msg_set_type(hdr, TIPC_DIRECT_MSG);
- msg_set_lookup_scope(hdr, 0);
- msg_set_destnode(hdr, dnode);
- msg_set_destport(hdr, dest->addr.id.ref);
- msg_set_hdr_sz(hdr, BASIC_H_SIZE);
} else {
return -EINVAL;
}
@@ -1445,6 +1444,22 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (unlikely(rc))
return rc;
+ if (dest->addrtype == TIPC_ADDR_NAME) {
+ msg_set_type(hdr, TIPC_NAMED_MSG);
+ msg_set_hdr_sz(hdr, NAMED_H_SIZE);
+ msg_set_nametype(hdr, type);
+ msg_set_nameinst(hdr, inst);
+ msg_set_lookup_scope(hdr, tipc_node2scope(dnode));
+ msg_set_destnode(hdr, dnode);
+ msg_set_destport(hdr, dport);
+ } else { /* TIPC_ADDR_ID */
+ msg_set_type(hdr, TIPC_DIRECT_MSG);
+ msg_set_lookup_scope(hdr, 0);
+ msg_set_destnode(hdr, dnode);
+ msg_set_destport(hdr, dest->addr.id.ref);
+ msg_set_hdr_sz(hdr, BASIC_H_SIZE);
+ }
+
__skb_queue_head_init(&pkts);
mtu = tipc_node_get_mtu(net, dnode, tsk->portid, false);
rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts);
@@ -2428,8 +2443,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p)
return sock_intr_errno(*timeo_p);
add_wait_queue(sk_sleep(sk), &wait);
- done = sk_wait_event(sk, timeo_p,
- sk->sk_state != TIPC_CONNECTING, &wait);
+ done = sk_wait_event(sk, timeo_p, tipc_sk_connected(sk),
+ &wait);
remove_wait_queue(sk_sleep(sk), &wait);
} while (!done);
return 0;
@@ -2639,7 +2654,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
* Reject any stray messages received by new socket
* before the socket lock was taken (very, very unlikely)
*/
- tsk_rej_rx_queue(new_sk);
+ tsk_rej_rx_queue(new_sk, TIPC_ERR_NO_PORT);
/* Connect new socket to it's peer */
tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index ed113735c019..d6620ad53546 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -828,7 +828,8 @@ struct tipc_media udp_media_info = {
.msg2addr = tipc_udp_msg2addr,
.priority = TIPC_DEF_LINK_PRI,
.tolerance = TIPC_DEF_LINK_TOL,
- .window = TIPC_DEF_LINK_WIN,
+ .min_win = TIPC_DEF_LINK_WIN,
+ .max_win = TIPC_DEF_LINK_WIN,
.mtu = TIPC_DEF_LINK_UDP_MTU,
.type_id = TIPC_MEDIA_TYPE_UDP,
.hwaddr_len = 0,
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index cd91ad812291..1ba5a92832bb 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -178,7 +178,7 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq)
* socket and no in-flight SKBs associated with this
* socket, so it is safe to free all the resources.
*/
-static void tls_device_sk_destruct(struct sock *sk)
+void tls_device_sk_destruct(struct sock *sk)
{
struct tls_context *tls_ctx = tls_get_ctx(sk);
struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx);
@@ -196,6 +196,7 @@ static void tls_device_sk_destruct(struct sock *sk)
if (refcount_dec_and_test(&tls_ctx->refcount))
tls_device_queue_ctx_destruction(tls_ctx);
}
+EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
void tls_device_free_resources_tx(struct sock *sk)
{
@@ -903,7 +904,7 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk,
spin_unlock_irq(&tls_device_lock);
ctx->sk_destruct = sk->sk_destruct;
- sk->sk_destruct = tls_device_sk_destruct;
+ smp_store_release(&sk->sk_destruct, tls_device_sk_destruct);
}
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7cfdce10de36..6756a3ccc392 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -676,6 +676,16 @@ static int unix_set_peek_off(struct sock *sk, int val)
return 0;
}
+static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+ struct unix_sock *u;
+
+ if (sk) {
+ u = unix_sk(sock->sk);
+ seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
+ }
+}
static const struct proto_ops unix_stream_ops = {
.family = PF_UNIX,
@@ -701,6 +711,7 @@ static const struct proto_ops unix_stream_ops = {
.sendpage = unix_stream_sendpage,
.splice_read = unix_stream_splice_read,
.set_peek_off = unix_set_peek_off,
+ .show_fdinfo = unix_show_fdinfo,
};
static const struct proto_ops unix_dgram_ops = {
@@ -726,6 +737,7 @@ static const struct proto_ops unix_dgram_ops = {
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = unix_set_peek_off,
+ .show_fdinfo = unix_show_fdinfo,
};
static const struct proto_ops unix_seqpacket_ops = {
@@ -751,6 +763,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
.set_peek_off = unix_set_peek_off,
+ .show_fdinfo = unix_show_fdinfo,
};
static struct proto unix_proto = {
@@ -788,6 +801,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
mutex_init(&u->bindlock); /* single task binding lock */
init_waitqueue_head(&u->peer_wait);
init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
+ memset(&u->scm_stat, 0, sizeof(struct scm_stat));
unix_insert_socket(unix_sockets_unbound(sk), sk);
out:
if (sk == NULL)
@@ -1572,6 +1586,28 @@ static bool unix_skb_scm_eq(struct sk_buff *skb,
unix_secdata_eq(scm, skb);
}
+static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
+{
+ struct scm_fp_list *fp = UNIXCB(skb).fp;
+ struct unix_sock *u = unix_sk(sk);
+
+ lockdep_assert_held(&sk->sk_receive_queue.lock);
+
+ if (unlikely(fp && fp->count))
+ u->scm_stat.nr_fds += fp->count;
+}
+
+static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
+{
+ struct scm_fp_list *fp = UNIXCB(skb).fp;
+ struct unix_sock *u = unix_sk(sk);
+
+ lockdep_assert_held(&sk->sk_receive_queue.lock);
+
+ if (unlikely(fp && fp->count))
+ u->scm_stat.nr_fds -= fp->count;
+}
+
/*
* Send AF_UNIX data.
*/
@@ -1757,7 +1793,10 @@ restart_locked:
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
maybe_add_creds(skb, sock, other);
- skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_lock(&other->sk_receive_queue.lock);
+ scm_stat_add(other, skb);
+ __skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
other->sk_data_ready(other);
sock_put(other);
@@ -1859,7 +1898,10 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto pipe_err_free;
maybe_add_creds(skb, sock, other);
- skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_lock(&other->sk_receive_queue.lock);
+ scm_stat_add(other, skb);
+ __skb_queue_tail(&other->sk_receive_queue, skb);
+ spin_unlock(&other->sk_receive_queue.lock);
unix_state_unlock(other);
other->sk_data_ready(other);
sent += size;
@@ -2058,8 +2100,8 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
mutex_lock(&u->iolock);
skip = sk_peek_offset(sk, flags);
- skb = __skb_try_recv_datagram(sk, flags, NULL, &skip, &err,
- &last);
+ skb = __skb_try_recv_datagram(sk, flags, scm_stat_del,
+ &skip, &err, &last);
if (skb)
break;
@@ -2353,8 +2395,12 @@ unlock:
sk_peek_offset_bwd(sk, chunk);
- if (UNIXCB(skb).fp)
+ if (UNIXCB(skb).fp) {
+ spin_lock(&sk->sk_receive_queue.lock);
+ scm_stat_del(sk, skb);
+ spin_unlock(&sk->sk_receive_queue.lock);
unix_detach_fds(&scm, skb);
+ }
if (unix_skb_len(skb))
break;
@@ -2865,7 +2911,7 @@ static int __init af_unix_init(void)
{
int rc = -1;
- BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
+ BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
rc = proto_register(&unix_proto, 1);
if (rc != 0) {
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 8abcb815af2d..56356d2980c8 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -26,6 +26,18 @@ config VSOCKETS_DIAG
Enable this module so userspace applications can query open sockets.
+config VSOCKETS_LOOPBACK
+ tristate "Virtual Sockets loopback transport"
+ depends on VSOCKETS
+ default y
+ select VIRTIO_VSOCKETS_COMMON
+ help
+ This module implements a loopback transport for Virtual Sockets,
+ using vmw_vsock_virtio_transport_common.
+
+ To compile this driver as a module, choose M here: the module
+ will be called vsock_loopback. If unsure, say N.
+
config VMWARE_VMCI_VSOCKETS
tristate "VMware VMCI transport for Virtual Sockets"
depends on VSOCKETS && VMWARE_VMCI
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 7c6f9a0b67b0..6a943ec95c4a 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o
+obj-$(CONFIG_VSOCKETS_LOOPBACK) += vsock_loopback.o
vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 74db4cd637a7..9c5b2a91baad 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -136,6 +136,8 @@ static const struct vsock_transport *transport_h2g;
static const struct vsock_transport *transport_g2h;
/* Transport used for DGRAM communication */
static const struct vsock_transport *transport_dgram;
+/* Transport used for local communication */
+static const struct vsock_transport *transport_local;
static DEFINE_MUTEX(vsock_register_mutex);
/**** UTILS ****/
@@ -386,6 +388,21 @@ void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
}
EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
+static bool vsock_use_local_transport(unsigned int remote_cid)
+{
+ if (!transport_local)
+ return false;
+
+ if (remote_cid == VMADDR_CID_LOCAL)
+ return true;
+
+ if (transport_g2h) {
+ return remote_cid == transport_g2h->get_local_cid();
+ } else {
+ return remote_cid == VMADDR_CID_HOST;
+ }
+}
+
static void vsock_deassign_transport(struct vsock_sock *vsk)
{
if (!vsk->transport)
@@ -402,9 +419,9 @@ static void vsock_deassign_transport(struct vsock_sock *vsk)
* (e.g. during the connect() or when a connection request on a listener
* socket is received).
* The vsk->remote_addr is used to decide which transport to use:
+ * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
+ * g2h is not loaded, will use local transport;
* - remote CID <= VMADDR_CID_HOST will use guest->host transport;
- * - remote CID == local_cid (guest->host transport) will use guest->host
- * transport for loopback (host->guest transports don't support loopback);
* - remote CID > VMADDR_CID_HOST will use host->guest transport;
*/
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
@@ -419,9 +436,9 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
new_transport = transport_dgram;
break;
case SOCK_STREAM:
- if (remote_cid <= VMADDR_CID_HOST ||
- (transport_g2h &&
- remote_cid == transport_g2h->get_local_cid()))
+ if (vsock_use_local_transport(remote_cid))
+ new_transport = transport_local;
+ else if (remote_cid <= VMADDR_CID_HOST)
new_transport = transport_g2h;
else
new_transport = transport_h2g;
@@ -464,6 +481,9 @@ bool vsock_find_cid(unsigned int cid)
if (transport_h2g && cid == VMADDR_CID_HOST)
return true;
+ if (transport_local && cid == VMADDR_CID_LOCAL)
+ return true;
+
return false;
}
EXPORT_SYMBOL_GPL(vsock_find_cid);
@@ -2137,7 +2157,7 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport);
int vsock_core_register(const struct vsock_transport *t, int features)
{
- const struct vsock_transport *t_h2g, *t_g2h, *t_dgram;
+ const struct vsock_transport *t_h2g, *t_g2h, *t_dgram, *t_local;
int err = mutex_lock_interruptible(&vsock_register_mutex);
if (err)
@@ -2146,6 +2166,7 @@ int vsock_core_register(const struct vsock_transport *t, int features)
t_h2g = transport_h2g;
t_g2h = transport_g2h;
t_dgram = transport_dgram;
+ t_local = transport_local;
if (features & VSOCK_TRANSPORT_F_H2G) {
if (t_h2g) {
@@ -2171,9 +2192,18 @@ int vsock_core_register(const struct vsock_transport *t, int features)
t_dgram = t;
}
+ if (features & VSOCK_TRANSPORT_F_LOCAL) {
+ if (t_local) {
+ err = -EBUSY;
+ goto err_busy;
+ }
+ t_local = t;
+ }
+
transport_h2g = t_h2g;
transport_g2h = t_g2h;
transport_dgram = t_dgram;
+ transport_local = t_local;
err_busy:
mutex_unlock(&vsock_register_mutex);
@@ -2194,6 +2224,9 @@ void vsock_core_unregister(const struct vsock_transport *t)
if (transport_dgram == t)
transport_dgram = NULL;
+ if (transport_local == t)
+ transport_local = NULL;
+
mutex_unlock(&vsock_register_mutex);
}
EXPORT_SYMBOL_GPL(vsock_core_unregister);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 1458c5c8b64d..dfbaf6bd8b1c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -44,10 +44,6 @@ struct virtio_vsock {
spinlock_t send_pkt_list_lock;
struct list_head send_pkt_list;
- struct work_struct loopback_work;
- spinlock_t loopback_list_lock; /* protects loopback_list */
- struct list_head loopback_list;
-
atomic_t queued_replies;
/* The following fields are protected by rx_lock. vqs[VSOCK_VQ_RX]
@@ -86,20 +82,6 @@ out_rcu:
return ret;
}
-static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock,
- struct virtio_vsock_pkt *pkt)
-{
- int len = pkt->len;
-
- spin_lock_bh(&vsock->loopback_list_lock);
- list_add_tail(&pkt->list, &vsock->loopback_list);
- spin_unlock_bh(&vsock->loopback_list_lock);
-
- queue_work(virtio_vsock_workqueue, &vsock->loopback_work);
-
- return len;
-}
-
static void
virtio_transport_send_pkt_work(struct work_struct *work)
{
@@ -194,7 +176,8 @@ virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
}
if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
- len = virtio_transport_send_pkt_loopback(vsock, pkt);
+ virtio_transport_free_pkt(pkt);
+ len = -ENODEV;
goto out_rcu;
}
@@ -502,33 +485,6 @@ static struct virtio_transport virtio_transport = {
.send_pkt = virtio_transport_send_pkt,
};
-static void virtio_transport_loopback_work(struct work_struct *work)
-{
- struct virtio_vsock *vsock =
- container_of(work, struct virtio_vsock, loopback_work);
- LIST_HEAD(pkts);
-
- spin_lock_bh(&vsock->loopback_list_lock);
- list_splice_init(&vsock->loopback_list, &pkts);
- spin_unlock_bh(&vsock->loopback_list_lock);
-
- mutex_lock(&vsock->rx_lock);
-
- if (!vsock->rx_run)
- goto out;
-
- while (!list_empty(&pkts)) {
- struct virtio_vsock_pkt *pkt;
-
- pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
- list_del_init(&pkt->list);
-
- virtio_transport_recv_pkt(&virtio_transport, pkt);
- }
-out:
- mutex_unlock(&vsock->rx_lock);
-}
-
static void virtio_transport_rx_work(struct work_struct *work)
{
struct virtio_vsock *vsock =
@@ -633,13 +589,10 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
mutex_init(&vsock->event_lock);
spin_lock_init(&vsock->send_pkt_list_lock);
INIT_LIST_HEAD(&vsock->send_pkt_list);
- spin_lock_init(&vsock->loopback_list_lock);
- INIT_LIST_HEAD(&vsock->loopback_list);
INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
INIT_WORK(&vsock->event_work, virtio_transport_event_work);
INIT_WORK(&vsock->send_pkt_work, virtio_transport_send_pkt_work);
- INIT_WORK(&vsock->loopback_work, virtio_transport_loopback_work);
mutex_lock(&vsock->tx_lock);
vsock->tx_run = true;
@@ -720,22 +673,12 @@ static void virtio_vsock_remove(struct virtio_device *vdev)
}
spin_unlock_bh(&vsock->send_pkt_list_lock);
- spin_lock_bh(&vsock->loopback_list_lock);
- while (!list_empty(&vsock->loopback_list)) {
- pkt = list_first_entry(&vsock->loopback_list,
- struct virtio_vsock_pkt, list);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
- spin_unlock_bh(&vsock->loopback_list_lock);
-
/* Delete virtqueues and flush outstanding callbacks if any */
vdev->config->del_vqs(vdev);
/* Other works can be queued before 'config->del_vqs()', so we flush
* all works before to free the vsock object to avoid use after free.
*/
- flush_work(&vsock->loopback_work);
flush_work(&vsock->rx_work);
flush_work(&vsock->tx_work);
flush_work(&vsock->event_work);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index e5ea29c6bca7..d9f0c9c5425a 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -11,9 +11,6 @@
#include <linux/sched/signal.h>
#include <linux/ctype.h>
#include <linux/list.h>
-#include <linux/virtio.h>
-#include <linux/virtio_ids.h>
-#include <linux/virtio_config.h>
#include <linux/virtio_vsock.h>
#include <uapi/linux/vsockmon.h>
@@ -34,6 +31,9 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
{
const struct vsock_transport *t = vsock_core_get_transport(vsk);
+ if (WARN_ON(!t))
+ return NULL;
+
return container_of(t, struct virtio_transport, transport);
}
@@ -161,15 +161,25 @@ void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
}
EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
+/* This function can only be used on connecting/connected sockets,
+ * since a socket assigned to a transport is required.
+ *
+ * Do not use on listener sockets!
+ */
static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt_info *info)
{
u32 src_cid, src_port, dst_cid, dst_port;
+ const struct virtio_transport *t_ops;
struct virtio_vsock_sock *vvs;
struct virtio_vsock_pkt *pkt;
u32 pkt_len = info->pkt_len;
- src_cid = virtio_transport_get_ops(vsk)->transport.get_local_cid();
+ t_ops = virtio_transport_get_ops(vsk);
+ if (unlikely(!t_ops))
+ return -EFAULT;
+
+ src_cid = t_ops->transport.get_local_cid();
src_port = vsk->local_addr.svm_port;
if (!info->remote_cid) {
dst_cid = vsk->remote_addr.svm_cid;
@@ -202,7 +212,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
virtio_transport_inc_tx_pkt(vvs, pkt);
- return virtio_transport_get_ops(vsk)->send_pkt(pkt);
+ return t_ops->send_pkt(pkt);
}
static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
@@ -1021,18 +1031,18 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
int ret;
if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset_no_sock(t, pkt);
return -EINVAL;
}
if (sk_acceptq_is_full(sk)) {
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset_no_sock(t, pkt);
return -ENOMEM;
}
child = vsock_create_connected(sk);
if (!child) {
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset_no_sock(t, pkt);
return -ENOMEM;
}
@@ -1054,7 +1064,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
*/
if (ret || vchild->transport != &t->transport) {
release_sock(child);
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset_no_sock(t, pkt);
sock_put(child);
return ret;
}
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 644d32e43d23..4b8b1150a738 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -648,7 +648,7 @@ static int vmci_transport_recv_dgram_cb(void *data, struct vmci_datagram *dg)
static bool vmci_transport_stream_allow(u32 cid, u32 port)
{
static const u32 non_socket_contexts[] = {
- VMADDR_CID_RESERVED,
+ VMADDR_CID_LOCAL,
};
int i;
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
new file mode 100644
index 000000000000..a45f7ffca8c5
--- /dev/null
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* loopback transport for vsock using virtio_transport_common APIs
+ *
+ * Copyright (C) 2013-2019 Red Hat, Inc.
+ * Authors: Asias He <[email protected]>
+ * Stefan Hajnoczi <[email protected]>
+ * Stefano Garzarella <[email protected]>
+ *
+ */
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/virtio_vsock.h>
+
+struct vsock_loopback {
+ struct workqueue_struct *workqueue;
+
+ spinlock_t pkt_list_lock; /* protects pkt_list */
+ struct list_head pkt_list;
+ struct work_struct pkt_work;
+};
+
+static struct vsock_loopback the_vsock_loopback;
+
+static u32 vsock_loopback_get_local_cid(void)
+{
+ return VMADDR_CID_LOCAL;
+}
+
+static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
+{
+ struct vsock_loopback *vsock = &the_vsock_loopback;
+ int len = pkt->len;
+
+ spin_lock_bh(&vsock->pkt_list_lock);
+ list_add_tail(&pkt->list, &vsock->pkt_list);
+ spin_unlock_bh(&vsock->pkt_list_lock);
+
+ queue_work(vsock->workqueue, &vsock->pkt_work);
+
+ return len;
+}
+
+static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
+{
+ struct vsock_loopback *vsock = &the_vsock_loopback;
+ struct virtio_vsock_pkt *pkt, *n;
+ LIST_HEAD(freeme);
+
+ spin_lock_bh(&vsock->pkt_list_lock);
+ list_for_each_entry_safe(pkt, n, &vsock->pkt_list, list) {
+ if (pkt->vsk != vsk)
+ continue;
+ list_move(&pkt->list, &freeme);
+ }
+ spin_unlock_bh(&vsock->pkt_list_lock);
+
+ list_for_each_entry_safe(pkt, n, &freeme, list) {
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+
+ return 0;
+}
+
+static struct virtio_transport loopback_transport = {
+ .transport = {
+ .module = THIS_MODULE,
+
+ .get_local_cid = vsock_loopback_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+ .cancel_pkt = vsock_loopback_cancel_pkt,
+
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+ .notify_buffer_size = virtio_transport_notify_buffer_size,
+ },
+
+ .send_pkt = vsock_loopback_send_pkt,
+};
+
+static void vsock_loopback_work(struct work_struct *work)
+{
+ struct vsock_loopback *vsock =
+ container_of(work, struct vsock_loopback, pkt_work);
+ LIST_HEAD(pkts);
+
+ spin_lock_bh(&vsock->pkt_list_lock);
+ list_splice_init(&vsock->pkt_list, &pkts);
+ spin_unlock_bh(&vsock->pkt_list_lock);
+
+ while (!list_empty(&pkts)) {
+ struct virtio_vsock_pkt *pkt;
+
+ pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+
+ virtio_transport_deliver_tap_pkt(pkt);
+ virtio_transport_recv_pkt(&loopback_transport, pkt);
+ }
+}
+
+static int __init vsock_loopback_init(void)
+{
+ struct vsock_loopback *vsock = &the_vsock_loopback;
+ int ret;
+
+ vsock->workqueue = alloc_workqueue("vsock-loopback", 0, 0);
+ if (!vsock->workqueue)
+ return -ENOMEM;
+
+ spin_lock_init(&vsock->pkt_list_lock);
+ INIT_LIST_HEAD(&vsock->pkt_list);
+ INIT_WORK(&vsock->pkt_work, vsock_loopback_work);
+
+ ret = vsock_core_register(&loopback_transport.transport,
+ VSOCK_TRANSPORT_F_LOCAL);
+ if (ret)
+ goto out_wq;
+
+ return 0;
+
+out_wq:
+ destroy_workqueue(vsock->workqueue);
+ return ret;
+}
+
+static void __exit vsock_loopback_exit(void)
+{
+ struct vsock_loopback *vsock = &the_vsock_loopback;
+ struct virtio_vsock_pkt *pkt;
+
+ vsock_core_unregister(&loopback_transport.transport);
+
+ flush_work(&vsock->pkt_work);
+
+ spin_lock_bh(&vsock->pkt_list_lock);
+ while (!list_empty(&vsock->pkt_list)) {
+ pkt = list_first_entry(&vsock->pkt_list,
+ struct virtio_vsock_pkt, list);
+ list_del(&pkt->list);
+ virtio_transport_free_pkt(pkt);
+ }
+ spin_unlock_bh(&vsock->pkt_list_lock);
+
+ destroy_workqueue(vsock->workqueue);
+}
+
+module_init(vsock_loopback_init);
+module_exit(vsock_loopback_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Stefano Garzarella <[email protected]>");
+MODULE_DESCRIPTION("loopback transport for vsock");
+MODULE_ALIAS_NETPROTO(PF_VSOCK);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 350513744575..3e25229a059d 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1102,6 +1102,7 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
#ifdef CONFIG_CFG80211_WEXT
kzfree(wdev->wext.keys);
+ wdev->wext.keys = NULL;
#endif
/* only initialized if we have a netdev */
if (wdev->netdev)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index da5262b2298b..fa3526592c51 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12900,8 +12900,7 @@ static int nl80211_vendor_check_policy(const struct wiphy_vendor_command *vcmd,
return -EINVAL;
}
- return nl80211_validate_nested(attr, vcmd->maxattr, vcmd->policy,
- extack);
+ return nla_validate_nested(attr, vcmd->maxattr, vcmd->policy, extack);
}
static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info)
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index c34f7d077604..2efe44a34644 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -659,6 +659,12 @@ static int x25_release(struct socket *sock)
sock_set_flag(sk, SOCK_DEAD);
sock_set_flag(sk, SOCK_DESTROY);
break;
+
+ case X25_STATE_5:
+ x25_write_internal(sk, X25_CLEAR_REQUEST);
+ x25_disconnect(sk, 0, 0, 0);
+ __x25_destroy_socket(sk);
+ goto out;
}
sock_orphan(sk);
@@ -1054,6 +1060,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
if (test_bit(X25_ACCPT_APPRV_FLAG, &makex25->flags)) {
x25_write_internal(make, X25_CALL_ACCEPTED);
makex25->state = X25_STATE_3;
+ } else {
+ makex25->state = X25_STATE_5;
}
/*
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index f97c43344e95..4d3bb46aaae0 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -382,6 +382,35 @@ out_clear:
return 0;
}
+/*
+ * State machine for state 5, Call Accepted / Call Connected pending (X25_ACCPT_APPRV_FLAG).
+ * The handling of the timer(s) is in file x25_timer.c
+ * Handling of state 0 and connection release is in af_x25.c.
+ */
+static int x25_state5_machine(struct sock *sk, struct sk_buff *skb, int frametype)
+{
+ struct x25_sock *x25 = x25_sk(sk);
+
+ switch (frametype) {
+ case X25_CLEAR_REQUEST:
+ if (!pskb_may_pull(skb, X25_STD_MIN_LEN + 2)) {
+ x25_write_internal(sk, X25_CLEAR_REQUEST);
+ x25->state = X25_STATE_2;
+ x25_start_t23timer(sk);
+ return 0;
+ }
+
+ x25_write_internal(sk, X25_CLEAR_CONFIRMATION);
+ x25_disconnect(sk, 0, skb->data[3], skb->data[4]);
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
/* Higher level upcall for a LAPB frame */
int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb)
{
@@ -406,6 +435,9 @@ int x25_process_rx_frame(struct sock *sk, struct sk_buff *skb)
case X25_STATE_4:
queued = x25_state4_machine(sk, skb, frametype);
break;
+ case X25_STATE_5:
+ queued = x25_state5_machine(sk, skb, frametype);
+ break;
}
x25_kick(sk);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 956793893c9d..02ada7ab8c6e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -31,6 +31,8 @@
#define TX_BATCH_SIZE 16
+static DEFINE_PER_CPU(struct list_head, xskmap_flush_list);
+
bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
{
return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) &&
@@ -39,21 +41,21 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
{
- return xskq_has_addrs(umem->fq, cnt);
+ return xskq_cons_has_entries(umem->fq, cnt);
}
EXPORT_SYMBOL(xsk_umem_has_addrs);
-u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
+bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
{
- return xskq_peek_addr(umem->fq, addr, umem);
+ return xskq_cons_peek_addr(umem->fq, addr, umem);
}
EXPORT_SYMBOL(xsk_umem_peek_addr);
-void xsk_umem_discard_addr(struct xdp_umem *umem)
+void xsk_umem_release_addr(struct xdp_umem *umem)
{
- xskq_discard_addr(umem->fq);
+ xskq_cons_release(umem->fq);
}
-EXPORT_SYMBOL(xsk_umem_discard_addr);
+EXPORT_SYMBOL(xsk_umem_release_addr);
void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
{
@@ -124,7 +126,7 @@ static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
void *to_buf = xdp_umem_get_data(umem, addr);
addr = xsk_umem_add_offset_to_addr(addr);
- if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) {
+ if (xskq_cons_crosses_non_contig_pg(umem, addr, len + metalen)) {
void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
u64 page_start = addr & ~(PAGE_SIZE - 1);
u64 first_len = PAGE_SIZE - (addr - page_start);
@@ -146,7 +148,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
u32 metalen;
int err;
- if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
+ if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
xs->rx_dropped++;
return -ENOSPC;
@@ -165,9 +167,9 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
offset += metalen;
addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
- err = xskq_produce_batch_desc(xs->rx, addr, len);
+ err = xskq_prod_reserve_desc(xs->rx, addr, len);
if (!err) {
- xskq_discard_addr(xs->umem->fq);
+ xskq_cons_release(xs->umem->fq);
xdp_return_buff(xdp);
return 0;
}
@@ -178,7 +180,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
{
- int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len);
+ int err = xskq_prod_reserve_desc(xs->rx, xdp->handle, len);
if (err)
xs->rx_dropped++;
@@ -214,7 +216,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
static void xsk_flush(struct xdp_sock *xs)
{
- xskq_produce_flush_desc(xs->rx);
+ xskq_prod_submit(xs->rx);
xs->sk.sk_data_ready(&xs->sk);
}
@@ -234,7 +236,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
goto out_unlock;
}
- if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) ||
+ if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
err = -ENOSPC;
goto out_drop;
@@ -245,12 +247,12 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
memcpy(buffer, xdp->data_meta, len + metalen);
addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
- err = xskq_produce_batch_desc(xs->rx, addr, len);
+ err = xskq_prod_reserve_desc(xs->rx, addr, len);
if (err)
goto out_drop;
- xskq_discard_addr(xs->umem->fq);
- xskq_produce_flush_desc(xs->rx);
+ xskq_cons_release(xs->umem->fq);
+ xskq_prod_submit(xs->rx);
spin_unlock_bh(&xs->rx_lock);
@@ -264,11 +266,9 @@ out_unlock:
return err;
}
-int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
- struct xdp_sock *xs)
+int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+ struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
int err;
err = xsk_rcv(xs, xdp);
@@ -281,10 +281,9 @@ int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp,
return 0;
}
-void __xsk_map_flush(struct bpf_map *map)
+void __xsk_map_flush(void)
{
- struct xsk_map *m = container_of(map, struct xsk_map, map);
- struct list_head *flush_list = this_cpu_ptr(m->flush_list);
+ struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
struct xdp_sock *xs, *tmp;
list_for_each_entry_safe(xs, tmp, flush_list, flush_node) {
@@ -295,7 +294,7 @@ void __xsk_map_flush(struct bpf_map *map)
void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
{
- xskq_produce_flush_addr_n(umem->cq, nb_entries);
+ xskq_prod_submit_n(umem->cq, nb_entries);
}
EXPORT_SYMBOL(xsk_umem_complete_tx);
@@ -317,13 +316,18 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
rcu_read_lock();
list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
- if (!xskq_peek_desc(xs->tx, desc, umem))
+ if (!xskq_cons_peek_desc(xs->tx, desc, umem))
continue;
- if (xskq_produce_addr_lazy(umem->cq, desc->addr))
+ /* This is the backpreassure mechanism for the Tx path.
+ * Reserve space in the completion queue and only proceed
+ * if there is space in it. This avoids having to implement
+ * any buffering in the Tx path.
+ */
+ if (xskq_prod_reserve_addr(umem->cq, desc->addr))
goto out;
- xskq_discard_desc(xs->tx);
+ xskq_cons_release(xs->tx);
rcu_read_unlock();
return true;
}
@@ -334,12 +338,21 @@ out:
}
EXPORT_SYMBOL(xsk_umem_consume_tx);
-static int xsk_zc_xmit(struct xdp_sock *xs)
+static int xsk_wakeup(struct xdp_sock *xs, u8 flags)
{
struct net_device *dev = xs->dev;
+ int err;
+
+ rcu_read_lock();
+ err = dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, flags);
+ rcu_read_unlock();
+
+ return err;
+}
- return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
- XDP_WAKEUP_TX);
+static int xsk_zc_xmit(struct xdp_sock *xs)
+{
+ return xsk_wakeup(xs, XDP_WAKEUP_TX);
}
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -349,7 +362,7 @@ static void xsk_destruct_skb(struct sk_buff *skb)
unsigned long flags;
spin_lock_irqsave(&xs->tx_completion_lock, flags);
- WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr));
+ xskq_prod_submit_addr(xs->umem->cq, addr);
spin_unlock_irqrestore(&xs->tx_completion_lock, flags);
sock_wfree(skb);
@@ -369,7 +382,7 @@ static int xsk_generic_xmit(struct sock *sk)
if (xs->queue_id >= xs->dev->real_num_tx_queues)
goto out;
- while (xskq_peek_desc(xs->tx, &desc, xs->umem)) {
+ while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) {
char *buffer;
u64 addr;
u32 len;
@@ -390,7 +403,12 @@ static int xsk_generic_xmit(struct sock *sk)
addr = desc.addr;
buffer = xdp_umem_get_data(xs->umem, addr);
err = skb_store_bits(skb, 0, buffer, len);
- if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) {
+ /* This is the backpreassure mechanism for the Tx path.
+ * Reserve space in the completion queue and only proceed
+ * if there is space in it. This avoids having to implement
+ * any buffering in the Tx path.
+ */
+ if (unlikely(err) || xskq_prod_reserve(xs->umem->cq)) {
kfree_skb(skb);
goto out;
}
@@ -402,7 +420,7 @@ static int xsk_generic_xmit(struct sock *sk)
skb->destructor = xsk_destruct_skb;
err = dev_direct_xmit(skb, xs->queue_id);
- xskq_discard_desc(xs->tx);
+ xskq_cons_release(xs->tx);
/* Ignore NET_XMIT_CN as packet might have been sent */
if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) {
/* SKB completed but not sent */
@@ -453,27 +471,24 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
__poll_t mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
- struct net_device *dev;
struct xdp_umem *umem;
if (unlikely(!xsk_is_bound(xs)))
return mask;
- dev = xs->dev;
umem = xs->umem;
if (umem->need_wakeup) {
- if (dev->netdev_ops->ndo_xsk_wakeup)
- dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id,
- umem->need_wakeup);
+ if (xs->zc)
+ xsk_wakeup(xs, umem->need_wakeup);
else
/* Poll needs to drive Tx also in copy mode */
__xsk_sendmsg(sk);
}
- if (xs->rx && !xskq_empty_desc(xs->rx))
+ if (xs->rx && !xskq_prod_is_empty(xs->rx))
mask |= EPOLLIN | EPOLLRDNORM;
- if (xs->tx && !xskq_full_desc(xs->tx))
+ if (xs->tx && !xskq_cons_is_full(xs->tx))
mask |= EPOLLOUT | EPOLLWRNORM;
return mask;
@@ -1177,7 +1192,7 @@ static struct pernet_operations xsk_net_ops = {
static int __init xsk_init(void)
{
- int err;
+ int err, cpu;
err = proto_register(&xsk_proto, 0 /* no slab */);
if (err)
@@ -1195,6 +1210,8 @@ static int __init xsk_init(void)
if (err)
goto out_pernet;
+ for_each_possible_cpu(cpu)
+ INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu));
return 0;
out_pernet:
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index b66504592d9b..c90e9c1e3c63 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -18,14 +18,14 @@ void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)
q->chunk_mask = chunk_mask;
}
-static u32 xskq_umem_get_ring_size(struct xsk_queue *q)
+static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue)
{
- return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u64);
-}
+ struct xdp_umem_ring *umem_ring;
+ struct xdp_rxtx_ring *rxtx_ring;
-static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q)
-{
- return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc);
+ if (umem_queue)
+ return struct_size(umem_ring, desc, q->nentries);
+ return struct_size(rxtx_ring, desc, q->nentries);
}
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
@@ -43,8 +43,7 @@ struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN |
__GFP_COMP | __GFP_NORETRY;
- size = umem_queue ? xskq_umem_get_ring_size(q) :
- xskq_rxtx_get_ring_size(q);
+ size = xskq_get_ring_size(q, umem_queue);
q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags,
get_order(size));
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index eddae4688862..bec2af11853a 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -10,9 +10,6 @@
#include <linux/if_xdp.h>
#include <net/xdp_sock.h>
-#define RX_BATCH_SIZE 16
-#define LAZY_UPDATE_THRESHOLD 128
-
struct xdp_ring {
u32 producer ____cacheline_aligned_in_smp;
u32 consumer ____cacheline_aligned_in_smp;
@@ -36,10 +33,8 @@ struct xsk_queue {
u64 size;
u32 ring_mask;
u32 nentries;
- u32 prod_head;
- u32 prod_tail;
- u32 cons_head;
- u32 cons_tail;
+ u32 cached_prod;
+ u32 cached_cons;
struct xdp_ring *ring;
u64 invalid_descs;
};
@@ -86,56 +81,31 @@ struct xsk_queue {
* now and again after circling through the ring.
*/
-/* Common functions operating for both RXTX and umem queues */
-
-static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
-{
- return q ? q->invalid_descs : 0;
-}
-
-static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt)
-{
- u32 entries = q->prod_tail - q->cons_tail;
-
- if (entries == 0) {
- /* Refresh the local pointer */
- q->prod_tail = READ_ONCE(q->ring->producer);
- entries = q->prod_tail - q->cons_tail;
- }
-
- return (entries > dcnt) ? dcnt : entries;
-}
-
-static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt)
-{
- u32 free_entries = q->nentries - (producer - q->cons_tail);
-
- if (free_entries >= dcnt)
- return free_entries;
-
- /* Refresh the local tail pointer */
- q->cons_tail = READ_ONCE(q->ring->consumer);
- return q->nentries - (producer - q->cons_tail);
-}
-
-static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt)
-{
- u32 entries = q->prod_tail - q->cons_tail;
-
- if (entries >= cnt)
- return true;
-
- /* Refresh the local pointer. */
- q->prod_tail = READ_ONCE(q->ring->producer);
- entries = q->prod_tail - q->cons_tail;
-
- return entries >= cnt;
-}
+/* The operations on the rings are the following:
+ *
+ * producer consumer
+ *
+ * RESERVE entries PEEK in the ring for entries
+ * WRITE data into the ring READ data from the ring
+ * SUBMIT entries RELEASE entries
+ *
+ * The producer reserves one or more entries in the ring. It can then
+ * fill in these entries and finally submit them so that they can be
+ * seen and read by the consumer.
+ *
+ * The consumer peeks into the ring to see if the producer has written
+ * any new entries. If so, the producer can then read these entries
+ * and when it is done reading them release them back to the producer
+ * so that the producer can use these slots to fill in new entries.
+ *
+ * The function names below reflect these operations.
+ */
-/* UMEM queue */
+/* Functions that read and validate content from consumer rings. */
-static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
- u64 length)
+static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem,
+ u64 addr,
+ u64 length)
{
bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
bool next_pg_contig =
@@ -145,9 +115,16 @@ static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr,
return cross_pg && !next_pg_contig;
}
-static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
+static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
+ u64 addr,
+ u64 length,
+ struct xdp_umem *umem)
{
- if (addr >= q->size) {
+ u64 base_addr = xsk_umem_extract_addr(addr);
+
+ addr = xsk_umem_add_offset_to_addr(addr);
+ if (base_addr >= q->size || addr >= q->size ||
+ xskq_cons_crosses_non_contig_pg(umem, addr, length)) {
q->invalid_descs++;
return false;
}
@@ -155,15 +132,9 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr)
return true;
}
-static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
- u64 length,
- struct xdp_umem *umem)
+static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr)
{
- u64 base_addr = xsk_umem_extract_addr(addr);
-
- addr = xsk_umem_add_offset_to_addr(addr);
- if (base_addr >= q->size || addr >= q->size ||
- xskq_crosses_non_contig_pg(umem, addr, length)) {
+ if (addr >= q->size) {
q->invalid_descs++;
return false;
}
@@ -171,204 +142,240 @@ static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr,
return true;
}
-static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr,
- struct xdp_umem *umem)
+static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
- while (q->cons_tail != q->cons_head) {
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- unsigned int idx = q->cons_tail & q->ring_mask;
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask;
+ while (q->cached_cons != q->cached_prod) {
+ u32 idx = q->cached_cons & q->ring_mask;
+
+ *addr = ring->desc[idx] & q->chunk_mask;
if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
- if (xskq_is_valid_addr_unaligned(q, *addr,
+ if (xskq_cons_is_valid_unaligned(q, *addr,
umem->chunk_size_nohr,
umem))
- return addr;
+ return true;
goto out;
}
- if (xskq_is_valid_addr(q, *addr))
- return addr;
+ if (xskq_cons_is_valid_addr(q, *addr))
+ return true;
out:
- q->cons_tail++;
+ q->cached_cons++;
}
- return NULL;
+ return false;
}
-static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr,
- struct xdp_umem *umem)
+static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
+ struct xdp_desc *d,
+ struct xdp_umem *umem)
{
- if (q->cons_tail == q->cons_head) {
- smp_mb(); /* D, matches A */
- WRITE_ONCE(q->ring->consumer, q->cons_tail);
- q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
+ if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
+ if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem))
+ return false;
+
+ if (d->len > umem->chunk_size_nohr || d->options) {
+ q->invalid_descs++;
+ return false;
+ }
- /* Order consumer and data */
- smp_rmb();
+ return true;
}
- return xskq_validate_addr(q, addr, umem);
-}
+ if (!xskq_cons_is_valid_addr(q, d->addr))
+ return false;
-static inline void xskq_discard_addr(struct xsk_queue *q)
-{
- q->cons_tail++;
+ if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
+ d->options) {
+ q->invalid_descs++;
+ return false;
+ }
+
+ return true;
}
-static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr)
+static inline bool xskq_cons_read_desc(struct xsk_queue *q,
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
{
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
-
- if (xskq_nb_free(q, q->prod_tail, 1) == 0)
- return -ENOSPC;
+ while (q->cached_cons != q->cached_prod) {
+ struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
+ u32 idx = q->cached_cons & q->ring_mask;
- /* A, matches D */
- ring->desc[q->prod_tail++ & q->ring_mask] = addr;
+ *desc = ring->desc[idx];
+ if (xskq_cons_is_valid_desc(q, desc, umem))
+ return true;
- /* Order producer and data */
- smp_wmb(); /* B, matches C */
+ q->cached_cons++;
+ }
- WRITE_ONCE(q->ring->producer, q->prod_tail);
- return 0;
+ return false;
}
-static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr)
+/* Functions for consumers */
+
+static inline void __xskq_cons_release(struct xsk_queue *q)
{
- struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+ smp_mb(); /* D, matches A */
+ WRITE_ONCE(q->ring->consumer, q->cached_cons);
+}
- if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0)
- return -ENOSPC;
+static inline void __xskq_cons_peek(struct xsk_queue *q)
+{
+ /* Refresh the local pointer */
+ q->cached_prod = READ_ONCE(q->ring->producer);
+ smp_rmb(); /* C, matches B */
+}
- /* A, matches D */
- ring->desc[q->prod_head++ & q->ring_mask] = addr;
- return 0;
+static inline void xskq_cons_get_entries(struct xsk_queue *q)
+{
+ __xskq_cons_release(q);
+ __xskq_cons_peek(q);
}
-static inline void xskq_produce_flush_addr_n(struct xsk_queue *q,
- u32 nb_entries)
+static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
{
- /* Order producer and data */
- smp_wmb(); /* B, matches C */
+ u32 entries = q->cached_prod - q->cached_cons;
- q->prod_tail += nb_entries;
- WRITE_ONCE(q->ring->producer, q->prod_tail);
+ if (entries >= cnt)
+ return true;
+
+ __xskq_cons_peek(q);
+ entries = q->cached_prod - q->cached_cons;
+
+ return entries >= cnt;
}
-static inline int xskq_reserve_addr(struct xsk_queue *q)
+static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr,
+ struct xdp_umem *umem)
{
- if (xskq_nb_free(q, q->prod_head, 1) == 0)
- return -ENOSPC;
+ if (q->cached_prod == q->cached_cons)
+ xskq_cons_get_entries(q);
+ return xskq_cons_read_addr(q, addr, umem);
+}
- /* A, matches D */
- q->prod_head++;
- return 0;
+static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
+ struct xdp_desc *desc,
+ struct xdp_umem *umem)
+{
+ if (q->cached_prod == q->cached_cons)
+ xskq_cons_get_entries(q);
+ return xskq_cons_read_desc(q, desc, umem);
}
-/* Rx/Tx queue */
+static inline void xskq_cons_release(struct xsk_queue *q)
+{
+ /* To improve performance, only update local state here.
+ * Reflect this to global state when we get new entries
+ * from the ring in xskq_cons_get_entries().
+ */
+ q->cached_cons++;
+}
-static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d,
- struct xdp_umem *umem)
+static inline bool xskq_cons_is_full(struct xsk_queue *q)
{
- if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
- if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem))
- return false;
+ /* No barriers needed since data is not accessed */
+ return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) ==
+ q->nentries;
+}
- if (d->len > umem->chunk_size_nohr || d->options) {
- q->invalid_descs++;
- return false;
- }
+/* Functions for producers */
- return true;
- }
+static inline bool xskq_prod_is_full(struct xsk_queue *q)
+{
+ u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons);
- if (!xskq_is_valid_addr(q, d->addr))
+ if (free_entries)
return false;
- if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
- d->options) {
- q->invalid_descs++;
- return false;
- }
+ /* Refresh the local tail pointer */
+ q->cached_cons = READ_ONCE(q->ring->consumer);
+ free_entries = q->nentries - (q->cached_prod - q->cached_cons);
- return true;
+ return !free_entries;
}
-static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q,
- struct xdp_desc *desc,
- struct xdp_umem *umem)
+static inline int xskq_prod_reserve(struct xsk_queue *q)
{
- while (q->cons_tail != q->cons_head) {
- struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
- unsigned int idx = q->cons_tail & q->ring_mask;
-
- *desc = READ_ONCE(ring->desc[idx]);
- if (xskq_is_valid_desc(q, desc, umem))
- return desc;
-
- q->cons_tail++;
- }
+ if (xskq_prod_is_full(q))
+ return -ENOSPC;
- return NULL;
+ /* A, matches D */
+ q->cached_prod++;
+ return 0;
}
-static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q,
- struct xdp_desc *desc,
- struct xdp_umem *umem)
+static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
{
- if (q->cons_tail == q->cons_head) {
- smp_mb(); /* D, matches A */
- WRITE_ONCE(q->ring->consumer, q->cons_tail);
- q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE);
-
- /* Order consumer and data */
- smp_rmb(); /* C, matches B */
- }
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
- return xskq_validate_desc(q, desc, umem);
-}
+ if (xskq_prod_is_full(q))
+ return -ENOSPC;
-static inline void xskq_discard_desc(struct xsk_queue *q)
-{
- q->cons_tail++;
+ /* A, matches D */
+ ring->desc[q->cached_prod++ & q->ring_mask] = addr;
+ return 0;
}
-static inline int xskq_produce_batch_desc(struct xsk_queue *q,
- u64 addr, u32 len)
+static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
+ u64 addr, u32 len)
{
struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
- unsigned int idx;
+ u32 idx;
- if (xskq_nb_free(q, q->prod_head, 1) == 0)
+ if (xskq_prod_is_full(q))
return -ENOSPC;
/* A, matches D */
- idx = (q->prod_head++) & q->ring_mask;
+ idx = q->cached_prod++ & q->ring_mask;
ring->desc[idx].addr = addr;
ring->desc[idx].len = len;
return 0;
}
-static inline void xskq_produce_flush_desc(struct xsk_queue *q)
+static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx)
{
- /* Order producer and data */
smp_wmb(); /* B, matches C */
- q->prod_tail = q->prod_head;
- WRITE_ONCE(q->ring->producer, q->prod_tail);
+ WRITE_ONCE(q->ring->producer, idx);
+}
+
+static inline void xskq_prod_submit(struct xsk_queue *q)
+{
+ __xskq_prod_submit(q, q->cached_prod);
+}
+
+static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
+{
+ struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+ u32 idx = q->ring->producer;
+
+ ring->desc[idx++ & q->ring_mask] = addr;
+
+ __xskq_prod_submit(q, idx);
}
-static inline bool xskq_full_desc(struct xsk_queue *q)
+static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
{
- return xskq_nb_avail(q, q->nentries) == q->nentries;
+ __xskq_prod_submit(q, q->ring->producer + nb_entries);
}
-static inline bool xskq_empty_desc(struct xsk_queue *q)
+static inline bool xskq_prod_is_empty(struct xsk_queue *q)
{
- return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries;
+ /* No barriers needed since data is not accessed */
+ return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer);
+}
+
+/* For both producers and consumers */
+
+static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
+{
+ return q ? q->invalid_descs : 0;
}
void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 1fc42ad8ff49..5b89c0370f33 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -38,6 +38,8 @@ tprogs-y += tc_l2_redirect
tprogs-y += lwt_len_hist
tprogs-y += xdp_tx_iptunnel
tprogs-y += test_map_in_map
+tprogs-y += per_socket_stats_example
+tprogs-y += xdp_redirect
tprogs-y += xdp_redirect_map
tprogs-y += xdp_redirect_cpu
tprogs-y += xdp_monitor
@@ -196,7 +198,7 @@ endif
TPROGCFLAGS_bpf_load.o += -Wno-unused-variable
-TPROGS_LDLIBS += $(LIBBPF) -lelf
+TPROGS_LDLIBS += $(LIBBPF) -lelf -lz
TPROGLDLIBS_tracex4 += -lrt
TPROGLDLIBS_trace_output += -lrt
TPROGLDLIBS_map_perf_test += -lrt
@@ -234,6 +236,7 @@ BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \
readelf -S ./llvm_btf_verify.o | grep BTF; \
/bin/rm -f ./llvm_btf_verify.o)
+BPF_EXTRA_CFLAGS += -fno-stack-protector
ifneq ($(BTF_LLVM_PROBE),)
BPF_EXTRA_CFLAGS += -g
else
diff --git a/samples/bpf/syscall_tp_kern.c b/samples/bpf/syscall_tp_kern.c
index 1d78819ffef1..630ce8c4d5a2 100644
--- a/samples/bpf/syscall_tp_kern.c
+++ b/samples/bpf/syscall_tp_kern.c
@@ -47,13 +47,27 @@ static __always_inline void count(void *map)
SEC("tracepoint/syscalls/sys_enter_open")
int trace_enter_open(struct syscalls_enter_open_args *ctx)
{
- count((void *)&enter_open_map);
+ count(&enter_open_map);
+ return 0;
+}
+
+SEC("tracepoint/syscalls/sys_enter_openat")
+int trace_enter_open_at(struct syscalls_enter_open_args *ctx)
+{
+ count(&enter_open_map);
return 0;
}
SEC("tracepoint/syscalls/sys_exit_open")
int trace_enter_exit(struct syscalls_exit_open_args *ctx)
{
- count((void *)&exit_open_map);
+ count(&exit_open_map);
+ return 0;
+}
+
+SEC("tracepoint/syscalls/sys_exit_openat")
+int trace_enter_exit_at(struct syscalls_exit_open_args *ctx)
+{
+ count(&exit_open_map);
return 0;
}
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index 16a16eadd509..749a50f2f9f3 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -37,9 +37,9 @@ static void print_ksym(__u64 addr)
}
printf("%s;", sym->name);
- if (!strcmp(sym->name, "sys_read"))
+ if (!strstr(sym->name, "sys_read"))
sys_read_seen = true;
- else if (!strcmp(sym->name, "sys_write"))
+ else if (!strstr(sym->name, "sys_write"))
sys_write_seen = true;
}
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 3e553eed95a7..38a8852cb57f 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -98,7 +98,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -109,6 +109,9 @@ int main(int argc, char **argv)
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c
index d86e9ad0356b..008789eb6ada 100644
--- a/samples/bpf/xdp_adjust_tail_user.c
+++ b/samples/bpf/xdp_adjust_tail_user.c
@@ -120,7 +120,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -132,6 +132,9 @@ int main(int argc, char **argv)
opt_flags[opt] = 0;
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c
index 97ff1dad7669..c30f9acfdb84 100644
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -27,11 +27,13 @@
#include "libbpf.h"
#include <bpf/bpf.h>
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+
static int do_attach(int idx, int prog_fd, int map_fd, const char *name)
{
int err;
- err = bpf_set_link_xdp_fd(idx, prog_fd, 0);
+ err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags);
if (err < 0) {
printf("ERROR: failed to attach program to %s\n", name);
return err;
@@ -49,7 +51,7 @@ static int do_detach(int idx, const char *name)
{
int err;
- err = bpf_set_link_xdp_fd(idx, -1, 0);
+ err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
if (err < 0)
printf("ERROR: failed to detach program from %s\n", name);
@@ -83,11 +85,17 @@ int main(int argc, char **argv)
int attach = 1;
int ret = 0;
- while ((opt = getopt(argc, argv, ":dD")) != -1) {
+ while ((opt = getopt(argc, argv, ":dDSF")) != -1) {
switch (opt) {
case 'd':
attach = 0;
break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
case 'D':
prog_name = "xdp_fwd_direct";
break;
@@ -97,6 +105,9 @@ int main(int argc, char **argv)
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 0da6e9e7132e..79a2fb7d16cb 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -16,6 +16,10 @@ static const char *__doc__ =
#include <getopt.h>
#include <net/if.h>
#include <time.h>
+#include <linux/limits.h>
+
+#define __must_check
+#include <linux/err.h>
#include <arpa/inet.h>
#include <linux/if_link.h>
@@ -46,6 +50,10 @@ static int cpus_count_map_fd;
static int cpus_iterator_map_fd;
static int exception_cnt_map_fd;
+#define NUM_TP 5
+struct bpf_link *tp_links[NUM_TP] = { 0 };
+static int tp_cnt = 0;
+
/* Exit return codes */
#define EXIT_OK 0
#define EXIT_FAIL 1
@@ -88,6 +96,10 @@ static void int_exit(int sig)
printf("program on interface changed, not removing\n");
}
}
+ /* Detach tracepoints */
+ while (tp_cnt)
+ bpf_link__destroy(tp_links[--tp_cnt]);
+
exit(EXIT_OK);
}
@@ -588,23 +600,61 @@ static void stats_poll(int interval, bool use_separators, char *prog_name,
free_stats_record(prev);
}
+static struct bpf_link * attach_tp(struct bpf_object *obj,
+ const char *tp_category,
+ const char* tp_name)
+{
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ char sec_name[PATH_MAX];
+ int len;
+
+ len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s",
+ tp_category, tp_name);
+ if (len < 0)
+ exit(EXIT_FAIL);
+
+ prog = bpf_object__find_program_by_title(obj, sec_name);
+ if (!prog) {
+ fprintf(stderr, "ERR: finding progsec: %s\n", sec_name);
+ exit(EXIT_FAIL_BPF);
+ }
+
+ link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
+ if (IS_ERR(link))
+ exit(EXIT_FAIL_BPF);
+
+ return link;
+}
+
+static void init_tracepoints(struct bpf_object *obj) {
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err");
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err");
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception");
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue");
+ tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread");
+}
+
static int init_map_fds(struct bpf_object *obj)
{
- cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
- rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
+ /* Maps updated by tracepoints */
redirect_err_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt");
+ exception_cnt_map_fd =
+ bpf_object__find_map_fd_by_name(obj, "exception_cnt");
cpumap_enqueue_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt");
cpumap_kthread_cnt_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt");
+
+ /* Maps used by XDP */
+ rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt");
+ cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map");
cpus_available_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpus_available");
cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count");
cpus_iterator_map_fd =
bpf_object__find_map_fd_by_name(obj, "cpus_iterator");
- exception_cnt_map_fd =
- bpf_object__find_map_fd_by_name(obj, "exception_cnt");
if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 ||
redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 ||
@@ -662,6 +712,7 @@ int main(int argc, char **argv)
strerror(errno));
return EXIT_FAIL;
}
+ init_tracepoints(obj);
if (init_map_fds(obj) < 0) {
fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n");
return EXIT_FAIL;
@@ -728,6 +779,10 @@ int main(int argc, char **argv)
return EXIT_FAIL_OPTION;
}
}
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
index f70ee33907fd..cc840661faab 100644
--- a/samples/bpf/xdp_redirect_map_user.c
+++ b/samples/bpf/xdp_redirect_map_user.c
@@ -116,7 +116,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -127,6 +127,9 @@ int main(int argc, char **argv)
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c
index 5440cd620607..71dff8e3382a 100644
--- a/samples/bpf/xdp_redirect_user.c
+++ b/samples/bpf/xdp_redirect_user.c
@@ -117,7 +117,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -128,6 +128,9 @@ int main(int argc, char **argv)
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]);
return 1;
diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c
index 1469b66ebad1..fef286c5add2 100644
--- a/samples/bpf/xdp_router_ipv4_user.c
+++ b/samples/bpf/xdp_router_ipv4_user.c
@@ -662,6 +662,9 @@ int main(int ac, char **argv)
}
}
+ if (!(flags & XDP_FLAGS_SKB_MODE))
+ flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == ac) {
usage(basename(argv[0]));
return 1;
diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c
index 8fc3ad01de72..fc4983fd6959 100644
--- a/samples/bpf/xdp_rxq_info_user.c
+++ b/samples/bpf/xdp_rxq_info_user.c
@@ -551,6 +551,10 @@ int main(int argc, char **argv)
return EXIT_FAIL_OPTION;
}
}
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
/* Required option */
if (ifindex == -1) {
fprintf(stderr, "ERR: required option --dev missing\n");
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c
index a5760e8bf2c4..8c1af1b7372d 100644
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -52,13 +52,13 @@ static int do_detach(int idx, const char *name)
__u32 curr_prog_id = 0;
int err = 0;
- err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0);
+ err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags);
if (err) {
printf("bpf_get_link_xdp_id failed\n");
return err;
}
if (prog_id == curr_prog_id) {
- err = bpf_set_link_xdp_fd(idx, -1, 0);
+ err = bpf_set_link_xdp_fd(idx, -1, xdp_flags);
if (err < 0)
printf("ERROR: failed to detach prog from %s\n", name);
} else if (!curr_prog_id) {
@@ -115,7 +115,7 @@ int main(int argc, char **argv)
.prog_type = BPF_PROG_TYPE_XDP,
};
struct perf_buffer_opts pb_opts = {};
- const char *optstr = "F";
+ const char *optstr = "FS";
int prog_fd, map_fd, opt;
struct bpf_object *obj;
struct bpf_map *map;
@@ -127,12 +127,18 @@ int main(int argc, char **argv)
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
break;
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
default:
usage(basename(argv[0]));
return 1;
}
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
if (optind == argc) {
usage(basename(argv[0]));
return 1;
diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c
index 2fe4c7f5ffe5..5f33b5530032 100644
--- a/samples/bpf/xdp_tx_iptunnel_user.c
+++ b/samples/bpf/xdp_tx_iptunnel_user.c
@@ -231,7 +231,7 @@ int main(int argc, char **argv)
xdp_flags |= XDP_FLAGS_SKB_MODE;
break;
case 'N':
- xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'F':
xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
@@ -243,6 +243,9 @@ int main(int argc, char **argv)
opt_flags[opt] = 0;
}
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE))
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+
for (i = 0; i < strlen(optstr); i++) {
if (opt_flags[(unsigned int)optstr[i]]) {
fprintf(stderr, "Missing argument -%c\n", optstr[i]);
diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c
index a15480010828..d74c4c83fc93 100644
--- a/samples/bpf/xdpsock_user.c
+++ b/samples/bpf/xdpsock_user.c
@@ -10,6 +10,9 @@
#include <linux/if_link.h>
#include <linux/if_xdp.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <arpa/inet.h>
#include <locale.h>
#include <net/ethernet.h>
#include <net/if.h>
@@ -45,12 +48,14 @@
#endif
#define NUM_FRAMES (4 * 1024)
-#define BATCH_SIZE 64
+#define MIN_PKT_SIZE 64
#define DEBUG_HEXDUMP 0
typedef __u64 u64;
typedef __u32 u32;
+typedef __u16 u16;
+typedef __u8 u8;
static unsigned long prev_time;
@@ -65,6 +70,13 @@ static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static const char *opt_if = "";
static int opt_ifindex;
static int opt_queue;
+static unsigned long opt_duration;
+static unsigned long start_time;
+static bool benchmark_done;
+static u32 opt_batch_size = 64;
+static int opt_pkt_count;
+static u16 opt_pkt_size = MIN_PKT_SIZE;
+static u32 opt_pkt_fill_pattern = 0x12345678;
static int opt_poll;
static int opt_interval = 1;
static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
@@ -167,10 +179,21 @@ static void dump_stats(void)
}
}
+static bool is_benchmark_done(void)
+{
+ if (opt_duration > 0) {
+ unsigned long dt = (get_nsecs() - start_time);
+
+ if (dt >= opt_duration)
+ benchmark_done = true;
+ }
+ return benchmark_done;
+}
+
static void *poller(void *arg)
{
(void)arg;
- for (;;) {
+ while (!is_benchmark_done()) {
sleep(opt_interval);
dump_stats();
}
@@ -196,6 +219,11 @@ static void remove_xdp_program(void)
static void int_exit(int sig)
{
+ benchmark_done = true;
+}
+
+static void xdpsock_cleanup(void)
+{
struct xsk_umem *umem = xsks[0]->umem->umem;
int i;
@@ -204,8 +232,6 @@ static void int_exit(int sig)
xsk_socket__delete(xsks[i]->xsk);
(void)xsk_umem__delete(umem);
remove_xdp_program();
-
- exit(EXIT_SUCCESS);
}
static void __exit_with_error(int error, const char *file, const char *func,
@@ -220,13 +246,6 @@ static void __exit_with_error(int error, const char *file, const char *func,
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \
__LINE__)
-
-static const char pkt_data[] =
- "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00"
- "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14"
- "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b"
- "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa";
-
static void swap_mac_addresses(void *data)
{
struct ether_header *eth = (struct ether_header *)data;
@@ -274,11 +293,243 @@ static void hex_dump(void *pkt, size_t length, u64 addr)
printf("\n");
}
-static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
+static void *memset32_htonl(void *dest, u32 val, u32 size)
+{
+ u32 *ptr = (u32 *)dest;
+ int i;
+
+ val = htonl(val);
+
+ for (i = 0; i < (size & (~0x3)); i += 4)
+ ptr[i >> 2] = val;
+
+ for (; i < size; i++)
+ ((char *)dest)[i] = ((char *)&val)[i & 3];
+
+ return dest;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline unsigned short from32to16(unsigned int x)
+{
+ /* add up 16-bit and 16-bit for 16+c bit */
+ x = (x & 0xffff) + (x >> 16);
+ /* add up carry.. */
+ x = (x & 0xffff) + (x >> 16);
+ return x;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static unsigned int do_csum(const unsigned char *buff, int len)
+{
+ unsigned int result = 0;
+ int odd;
+
+ if (len <= 0)
+ goto out;
+ odd = 1 & (unsigned long)buff;
+ if (odd) {
+#ifdef __LITTLE_ENDIAN
+ result += (*buff << 8);
+#else
+ result = *buff;
+#endif
+ len--;
+ buff++;
+ }
+ if (len >= 2) {
+ if (2 & (unsigned long)buff) {
+ result += *(unsigned short *)buff;
+ len -= 2;
+ buff += 2;
+ }
+ if (len >= 4) {
+ const unsigned char *end = buff +
+ ((unsigned int)len & ~3);
+ unsigned int carry = 0;
+
+ do {
+ unsigned int w = *(unsigned int *)buff;
+
+ buff += 4;
+ result += carry;
+ result += w;
+ carry = (w > result);
+ } while (buff < end);
+ result += carry;
+ result = (result & 0xffff) + (result >> 16);
+ }
+ if (len & 2) {
+ result += *(unsigned short *)buff;
+ buff += 2;
+ }
+ }
+ if (len & 1)
+#ifdef __LITTLE_ENDIAN
+ result += *buff;
+#else
+ result += (*buff << 8);
+#endif
+ result = from32to16(result);
+ if (odd)
+ result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
+out:
+ return result;
+}
+
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl);
+
+/*
+ * This is a version of ip_compute_csum() optimized for IP headers,
+ * which always checksum on 4 octet boundaries.
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+ return (__force __sum16)~do_csum(iph, ihl * 4);
+}
+
+/*
+ * Fold a partial checksum
+ * This function code has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum);
+
+/*
+ * This function code has been taken from
+ * Linux kernel lib/checksum.c
+ */
+__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __wsum)from64to32(s);
+}
+
+/*
+ * This function has been taken from
+ * Linux kernel include/asm-generic/checksum.h
+ */
+static inline __sum16
+csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len,
+ u8 proto, u16 *udp_pkt)
+{
+ u32 csum = 0;
+ u32 cnt = 0;
+
+ /* udp hdr and data */
+ for (; cnt < len; cnt += 2)
+ csum += udp_pkt[cnt >> 1];
+
+ return csum_tcpudp_magic(saddr, daddr, len, proto, csum);
+}
+
+#define ETH_FCS_SIZE 4
+
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
+ sizeof(struct udphdr))
+
+#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE)
+#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
+#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
+#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
+
+static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
+
+static void gen_eth_hdr_data(void)
+{
+ struct udphdr *udp_hdr = (struct udphdr *)(pkt_data +
+ sizeof(struct ethhdr) +
+ sizeof(struct iphdr));
+ struct iphdr *ip_hdr = (struct iphdr *)(pkt_data +
+ sizeof(struct ethhdr));
+ struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data;
+
+ /* ethernet header */
+ memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN);
+ memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_IP);
+
+ /* IP header */
+ ip_hdr->version = IPVERSION;
+ ip_hdr->ihl = 0x5; /* 20 byte header */
+ ip_hdr->tos = 0x0;
+ ip_hdr->tot_len = htons(IP_PKT_SIZE);
+ ip_hdr->id = 0;
+ ip_hdr->frag_off = 0;
+ ip_hdr->ttl = IPDEFTTL;
+ ip_hdr->protocol = IPPROTO_UDP;
+ ip_hdr->saddr = htonl(0x0a0a0a10);
+ ip_hdr->daddr = htonl(0x0a0a0a20);
+
+ /* IP header checksum */
+ ip_hdr->check = 0;
+ ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl);
+
+ /* UDP header */
+ udp_hdr->source = htons(0x1000);
+ udp_hdr->dest = htons(0x1000);
+ udp_hdr->len = htons(UDP_PKT_SIZE);
+
+ /* UDP data */
+ memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern,
+ UDP_PKT_DATA_SIZE);
+
+ /* UDP header checksum */
+ udp_hdr->check = 0;
+ udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE,
+ IPPROTO_UDP, (u16 *)udp_hdr);
+}
+
+static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
{
memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data,
- sizeof(pkt_data) - 1);
- return sizeof(pkt_data) - 1;
+ PKT_SIZE);
}
static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size)
@@ -375,6 +626,11 @@ static struct option long_options[] = {
{"unaligned", no_argument, 0, 'u'},
{"shared-umem", no_argument, 0, 'M'},
{"force", no_argument, 0, 'F'},
+ {"duration", required_argument, 0, 'd'},
+ {"batch-size", required_argument, 0, 'b'},
+ {"tx-pkt-count", required_argument, 0, 'C'},
+ {"tx-pkt-size", required_argument, 0, 's'},
+ {"tx-pkt-pattern", required_argument, 0, 'P'},
{0, 0, 0, 0}
};
@@ -399,8 +655,21 @@ static void usage(const char *prog)
" -u, --unaligned Enable unaligned chunk placement\n"
" -M, --shared-umem Enable XDP_SHARED_UMEM\n"
" -F, --force Force loading the XDP prog\n"
+ " -d, --duration=n Duration in secs to run command.\n"
+ " Default: forever.\n"
+ " -b, --batch-size=n Batch size for sending or receiving\n"
+ " packets. Default: %d\n"
+ " -C, --tx-pkt-count=n Number of packets to send.\n"
+ " Default: Continuous packets.\n"
+ " -s, --tx-pkt-size=n Transmit packet size.\n"
+ " (Default: %d bytes)\n"
+ " Min size: %d, Max size %d.\n"
+ " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n"
"\n";
- fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE);
+ fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE,
+ opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE,
+ XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern);
+
exit(EXIT_FAILURE);
}
@@ -411,7 +680,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:muM",
+ c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:",
long_options, &option_index);
if (c == -1)
break;
@@ -440,7 +709,7 @@ static void parse_command_line(int argc, char **argv)
opt_xdp_bind_flags |= XDP_COPY;
break;
case 'N':
- opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+ /* default, set below */
break;
case 'n':
opt_interval = atoi(optarg);
@@ -469,11 +738,37 @@ static void parse_command_line(int argc, char **argv)
case 'M':
opt_num_xsks = MAX_SOCKS;
break;
+ case 'd':
+ opt_duration = atoi(optarg);
+ opt_duration *= 1000000000;
+ break;
+ case 'b':
+ opt_batch_size = atoi(optarg);
+ break;
+ case 'C':
+ opt_pkt_count = atoi(optarg);
+ break;
+ case 's':
+ opt_pkt_size = atoi(optarg);
+ if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) ||
+ opt_pkt_size < MIN_PKT_SIZE) {
+ fprintf(stderr,
+ "ERROR: Invalid frame size %d\n",
+ opt_pkt_size);
+ usage(basename(argv[0]));
+ }
+ break;
+ case 'P':
+ opt_pkt_fill_pattern = strtol(optarg, NULL, 16);
+ break;
default:
usage(basename(argv[0]));
}
}
+ if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE))
+ opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
+
opt_ifindex = if_nametoindex(opt_if);
if (!opt_ifindex) {
fprintf(stderr, "ERROR: interface \"%s\" does not exist\n",
@@ -513,7 +808,7 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
kick_tx(xsk);
- ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE :
+ ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size :
xsk->outstanding_tx;
/* re-add completed Tx buffers */
@@ -542,7 +837,8 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk,
}
}
-static inline void complete_tx_only(struct xsk_socket_info *xsk)
+static inline void complete_tx_only(struct xsk_socket_info *xsk,
+ int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -553,7 +849,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk)
if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx))
kick_tx(xsk);
- rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx);
+ rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
if (rcvd > 0) {
xsk_ring_cons__release(&xsk->umem->cq, rcvd);
xsk->outstanding_tx -= rcvd;
@@ -567,7 +863,7 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds)
u32 idx_rx = 0, idx_fq = 0;
int ret;
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
if (!rcvd) {
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
ret = poll(fds, num_socks, opt_timeout);
@@ -619,36 +915,68 @@ static void rx_drop_all(void)
for (i = 0; i < num_socks; i++)
rx_drop(xsks[i], fds);
+
+ if (benchmark_done)
+ break;
}
}
-static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb)
+static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size)
{
u32 idx;
+ unsigned int i;
- if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) {
- unsigned int i;
-
- for (i = 0; i < BATCH_SIZE; i++) {
- xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr =
- (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len =
- sizeof(pkt_data) - 1;
- }
+ while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) <
+ batch_size) {
+ complete_tx_only(xsk, batch_size);
+ }
- xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE);
- xsk->outstanding_tx += BATCH_SIZE;
- frame_nb += BATCH_SIZE;
- frame_nb %= NUM_FRAMES;
+ for (i = 0; i < batch_size; i++) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx,
+ idx + i);
+ tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
+ tx_desc->len = PKT_SIZE;
}
- complete_tx_only(xsk);
+ xsk_ring_prod__submit(&xsk->tx, batch_size);
+ xsk->outstanding_tx += batch_size;
+ frame_nb += batch_size;
+ frame_nb %= NUM_FRAMES;
+ complete_tx_only(xsk, batch_size);
+}
+
+static inline int get_batch_size(int pkt_cnt)
+{
+ if (!opt_pkt_count)
+ return opt_batch_size;
+
+ if (pkt_cnt + opt_batch_size <= opt_pkt_count)
+ return opt_batch_size;
+
+ return opt_pkt_count - pkt_cnt;
+}
+
+static void complete_tx_only_all(void)
+{
+ bool pending;
+ int i;
+
+ do {
+ pending = false;
+ for (i = 0; i < num_socks; i++) {
+ if (xsks[i]->outstanding_tx) {
+ complete_tx_only(xsks[i], opt_batch_size);
+ pending = !!xsks[i]->outstanding_tx;
+ }
+ }
+ } while (pending);
}
static void tx_only_all(void)
{
struct pollfd fds[MAX_SOCKS] = {};
u32 frame_nb[MAX_SOCKS] = {};
+ int pkt_cnt = 0;
int i, ret;
for (i = 0; i < num_socks; i++) {
@@ -656,7 +984,9 @@ static void tx_only_all(void)
fds[0].events = POLLOUT;
}
- for (;;) {
+ while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
+ int batch_size = get_batch_size(pkt_cnt);
+
if (opt_poll) {
ret = poll(fds, num_socks, opt_timeout);
if (ret <= 0)
@@ -667,8 +997,16 @@ static void tx_only_all(void)
}
for (i = 0; i < num_socks; i++)
- tx_only(xsks[i], frame_nb[i]);
+ tx_only(xsks[i], frame_nb[i], batch_size);
+
+ pkt_cnt += batch_size;
+
+ if (benchmark_done)
+ break;
}
+
+ if (opt_pkt_count)
+ complete_tx_only_all();
}
static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
@@ -679,7 +1017,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds)
complete_tx_l2fwd(xsk, fds);
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx);
if (!rcvd) {
if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq))
ret = poll(fds, num_socks, opt_timeout);
@@ -736,6 +1074,9 @@ static void l2fwd_all(void)
for (i = 0; i < num_socks; i++)
l2fwd(xsks[i], fds);
+
+ if (benchmark_done)
+ break;
}
}
@@ -831,9 +1172,12 @@ int main(int argc, char **argv)
for (i = 0; i < opt_num_xsks; i++)
xsks[num_socks++] = xsk_configure_socket(umem, rx, tx);
- if (opt_bench == BENCH_TXONLY)
+ if (opt_bench == BENCH_TXONLY) {
+ gen_eth_hdr_data();
+
for (i = 0; i < NUM_FRAMES; i++)
gen_eth_frame(umem, i * opt_xsk_frame_size);
+ }
if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY)
enter_xsks_into_map(obj);
@@ -849,6 +1193,7 @@ int main(int argc, char **argv)
exit_with_error(ret);
prev_time = get_nsecs();
+ start_time = prev_time;
if (opt_bench == BENCH_RXDROP)
rx_drop_all();
@@ -857,5 +1202,11 @@ int main(int argc, char **argv)
else
l2fwd_all();
+ benchmark_done = true;
+
+ pthread_join(pt, NULL);
+
+ xdpsock_cleanup();
+
return 0;
}
diff --git a/samples/seccomp/user-trap.c b/samples/seccomp/user-trap.c
index 6d0125ca8af7..20291ec6489f 100644
--- a/samples/seccomp/user-trap.c
+++ b/samples/seccomp/user-trap.c
@@ -298,14 +298,14 @@ int main(void)
req = malloc(sizes.seccomp_notif);
if (!req)
goto out_close;
- memset(req, 0, sizeof(*req));
resp = malloc(sizes.seccomp_notif_resp);
if (!resp)
goto out_req;
- memset(resp, 0, sizeof(*resp));
+ memset(resp, 0, sizes.seccomp_notif_resp);
while (1) {
+ memset(req, 0, sizes.seccomp_notif);
if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, req)) {
perror("ioctl recv");
goto out_resp;
diff --git a/samples/trace_printk/trace-printk.c b/samples/trace_printk/trace-printk.c
index 7affc3b50b61..cfc159580263 100644
--- a/samples/trace_printk/trace-printk.c
+++ b/samples/trace_printk/trace-printk.c
@@ -36,6 +36,7 @@ static int __init trace_printk_init(void)
/* Kick off printing in irq context */
irq_work_queue(&irqwork);
+ irq_work_sync(&irqwork);
trace_printk("This is a %s that will use trace_bprintk()\n",
"static string");
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 7cbe6e72e363..a63380c6b0d2 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -4125,15 +4125,6 @@ sub process {
"Prefer [subsystem eg: netdev]_$level2([subsystem]dev, ... then dev_$level2(dev, ... then pr_$level(... to printk(KERN_$orig ...\n" . $herecurr);
}
- if ($line =~ /\bpr_warning\s*\(/) {
- if (WARN("PREFER_PR_LEVEL",
- "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) &&
- $fix) {
- $fixed[$fixlinenr] =~
- s/\bpr_warning\b/pr_warn/;
- }
- }
-
if ($line =~ /\bdev_printk\s*\(\s*KERN_([A-Z]+)/) {
my $orig = $1;
my $level = lc($orig);
diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig
index d33de0b9f4f5..e3569543bdac 100644
--- a/scripts/gcc-plugins/Kconfig
+++ b/scripts/gcc-plugins/Kconfig
@@ -14,8 +14,8 @@ config HAVE_GCC_PLUGINS
An arch should select this symbol if it supports building with
GCC plugins.
-config GCC_PLUGINS
- bool
+menuconfig GCC_PLUGINS
+ bool "GCC plugins"
depends on HAVE_GCC_PLUGINS
depends on PLUGIN_HOSTCC != ""
default y
@@ -25,8 +25,7 @@ config GCC_PLUGINS
See Documentation/core-api/gcc-plugins.rst for details.
-menu "GCC plugins"
- depends on GCC_PLUGINS
+if GCC_PLUGINS
config GCC_PLUGIN_CYC_COMPLEXITY
bool "Compute the cyclomatic complexity of a function" if EXPERT
@@ -113,4 +112,4 @@ config GCC_PLUGIN_ARM_SSP_PER_TASK
bool
depends on GCC_PLUGINS && ARM
-endmenu
+endif
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index fb55f262f42d..94153732ec00 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -310,6 +310,15 @@ static void output_label(const char *label)
printf("%s:\n", label);
}
+/* Provide proper symbols relocatability by their '_text' relativeness. */
+static void output_address(unsigned long long addr)
+{
+ if (_text <= addr)
+ printf("\tPTR\t_text + %#llx\n", addr - _text);
+ else
+ printf("\tPTR\t_text - %#llx\n", _text - addr);
+}
+
/* uncompress a compressed symbol. When this function is called, the best table
* might still be compressed itself, so the function needs to be recursive */
static int expand_symbol(const unsigned char *data, int len, char *result)
@@ -360,19 +369,6 @@ static void write_src(void)
printf("\t.section .rodata, \"a\"\n");
- /* Provide proper symbols relocatability by their relativeness
- * to a fixed anchor point in the runtime image, either '_text'
- * for absolute address tables, in which case the linker will
- * emit the final addresses at build time. Otherwise, use the
- * offset relative to the lowest value encountered of all relative
- * symbols, and emit non-relocatable fixed offsets that will be fixed
- * up at runtime.
- *
- * The symbol names cannot be used to construct normal symbol
- * references as the list of symbols contains symbols that are
- * declared static and are private to their .o files. This prevents
- * .tmp_kallsyms.o or any other object from referencing them.
- */
if (!base_relative)
output_label("kallsyms_addresses");
else
@@ -380,6 +376,13 @@ static void write_src(void)
for (i = 0; i < table_cnt; i++) {
if (base_relative) {
+ /*
+ * Use the offset relative to the lowest value
+ * encountered of all relative symbols, and emit
+ * non-relocatable fixed offsets that will be fixed
+ * up at runtime.
+ */
+
long long offset;
int overflow;
@@ -402,12 +405,7 @@ static void write_src(void)
}
printf("\t.long\t%#x\n", (int)offset);
} else if (!symbol_absolute(&table[i])) {
- if (_text <= table[i].addr)
- printf("\tPTR\t_text + %#llx\n",
- table[i].addr - _text);
- else
- printf("\tPTR\t_text - %#llx\n",
- _text - table[i].addr);
+ output_address(table[i].addr);
} else {
printf("\tPTR\t%#llx\n", table[i].addr);
}
@@ -416,7 +414,7 @@ static void write_src(void)
if (base_relative) {
output_label("kallsyms_relative_base");
- printf("\tPTR\t_text - %#llx\n", _text - relative_base);
+ output_address(relative_base);
printf("\n");
}
diff --git a/scripts/kconfig/expr.c b/scripts/kconfig/expr.c
index 77ffff3a053c..9f1de58e9f0c 100644
--- a/scripts/kconfig/expr.c
+++ b/scripts/kconfig/expr.c
@@ -254,6 +254,13 @@ static int expr_eq(struct expr *e1, struct expr *e2)
{
int res, old_count;
+ /*
+ * A NULL expr is taken to be yes, but there's also a different way to
+ * represent yes. expr_is_yes() checks for either representation.
+ */
+ if (!e1 || !e2)
+ return expr_is_yes(e1) && expr_is_yes(e2);
+
if (e1->type != e2->type)
return 0;
switch (e1->type) {
diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h
index d1d757c6edf4..3a5a4b210c86 100755
--- a/scripts/mkcompile_h
+++ b/scripts/mkcompile_h
@@ -55,12 +55,10 @@ CONFIG_FLAGS=""
if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT_RT"; fi
-UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
# Truncate to maximum length
-
UTS_LEN=64
-UTS_TRUNCATE="cut -b -$UTS_LEN"
+UTS_VERSION="$(echo $UTS_VERSION $CONFIG_FLAGS $TIMESTAMP | cut -b -$UTS_LEN)"
# Generate a temporary compile.h
@@ -69,10 +67,10 @@ UTS_TRUNCATE="cut -b -$UTS_LEN"
echo \#define UTS_MACHINE \"$ARCH\"
- echo \#define UTS_VERSION \"`echo $UTS_VERSION | $UTS_TRUNCATE`\"
+ echo \#define UTS_VERSION \"$UTS_VERSION\"
- echo \#define LINUX_COMPILE_BY \"`echo $LINUX_COMPILE_BY | $UTS_TRUNCATE`\"
- echo \#define LINUX_COMPILE_HOST \"`echo $LINUX_COMPILE_HOST | $UTS_TRUNCATE`\"
+ printf '#define LINUX_COMPILE_BY "%s"\n' "$LINUX_COMPILE_BY"
+ echo \#define LINUX_COMPILE_HOST \"$LINUX_COMPILE_HOST\"
echo \#define LINUX_COMPILER \"`$CC -v 2>&1 | grep ' version ' | sed 's/[[:space:]]*$//'`\"
} > .tmpcompile
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index e0750b70453f..357dc56bcf30 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -136,7 +136,7 @@ mkdir -p debian/source/
echo "1.0" > debian/source/format
echo $debarch > debian/arch
-extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev)"
+extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev:native)"
extra_build_depends="$extra_build_depends, $(if_enabled_echo CONFIG_SYSTEM_TRUSTED_KEYRING libssl-dev:native)"
# Generate a simple changelog template
@@ -174,7 +174,7 @@ Source: $sourcename
Section: kernel
Priority: optional
Maintainer: $maintainer
-Build-Depends: bc, kmod, cpio, bison, flex | flex:native $extra_build_depends
+Build-Depends: bc, rsync, kmod, cpio, bison, flex | flex:native $extra_build_depends
Homepage: http://www.kernel.org/
Package: $packagename
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 09996f2552ee..47aff8700547 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -623,7 +623,7 @@ static __poll_t ns_revision_poll(struct file *file, poll_table *pt)
void __aa_bump_ns_revision(struct aa_ns *ns)
{
- ns->revision++;
+ WRITE_ONCE(ns->revision, ns->revision + 1);
wake_up_interruptible(&ns->wait);
}
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 9be7ccb8379e..6ceb74e0f789 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -317,6 +317,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
if (!bprm || !profile->xattr_count)
return 0;
+ might_sleep();
/* transition from exec match to xattr set */
state = aa_dfa_null_transition(profile->xmatch, state);
@@ -361,10 +362,11 @@ out:
}
/**
- * __attach_match_ - find an attachment match
+ * find_attach - do attachment search for unconfined processes
* @bprm - binprm structure of transitioning task
- * @name - to match against (NOT NULL)
+ * @ns: the current namespace (NOT NULL)
* @head - profile list to walk (NOT NULL)
+ * @name - to match against (NOT NULL)
* @info - info message if there was an error (NOT NULL)
*
* Do a linear search on the profiles in the list. There is a matching
@@ -374,12 +376,11 @@ out:
*
* Requires: @head not be shared or have appropriate locks held
*
- * Returns: profile or NULL if no match found
+ * Returns: label or NULL if no match found
*/
-static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
- const char *name,
- struct list_head *head,
- const char **info)
+static struct aa_label *find_attach(const struct linux_binprm *bprm,
+ struct aa_ns *ns, struct list_head *head,
+ const char *name, const char **info)
{
int candidate_len = 0, candidate_xattrs = 0;
bool conflict = false;
@@ -388,6 +389,8 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
AA_BUG(!name);
AA_BUG(!head);
+ rcu_read_lock();
+restart:
list_for_each_entry_rcu(profile, head, base.list) {
if (profile->label.flags & FLAG_NULL &&
&profile->label == ns_unconfined(profile->ns))
@@ -413,16 +416,32 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
perm = dfa_user_allow(profile->xmatch, state);
/* any accepting state means a valid match. */
if (perm & MAY_EXEC) {
- int ret;
+ int ret = 0;
if (count < candidate_len)
continue;
- ret = aa_xattrs_match(bprm, profile, state);
- /* Fail matching if the xattrs don't match */
- if (ret < 0)
- continue;
-
+ if (bprm && profile->xattr_count) {
+ long rev = READ_ONCE(ns->revision);
+
+ if (!aa_get_profile_not0(profile))
+ goto restart;
+ rcu_read_unlock();
+ ret = aa_xattrs_match(bprm, profile,
+ state);
+ rcu_read_lock();
+ aa_put_profile(profile);
+ if (rev !=
+ READ_ONCE(ns->revision))
+ /* policy changed */
+ goto restart;
+ /*
+ * Fail matching if the xattrs don't
+ * match
+ */
+ if (ret < 0)
+ continue;
+ }
/*
* TODO: allow for more flexible best match
*
@@ -445,43 +464,28 @@ static struct aa_profile *__attach_match(const struct linux_binprm *bprm,
candidate_xattrs = ret;
conflict = false;
}
- } else if (!strcmp(profile->base.name, name))
+ } else if (!strcmp(profile->base.name, name)) {
/*
* old exact non-re match, without conditionals such
* as xattrs. no more searching required
*/
- return profile;
+ candidate = profile;
+ goto out;
+ }
}
- if (conflict) {
- *info = "conflicting profile attachments";
+ if (!candidate || conflict) {
+ if (conflict)
+ *info = "conflicting profile attachments";
+ rcu_read_unlock();
return NULL;
}
- return candidate;
-}
-
-/**
- * find_attach - do attachment search for unconfined processes
- * @bprm - binprm structure of transitioning task
- * @ns: the current namespace (NOT NULL)
- * @list: list to search (NOT NULL)
- * @name: the executable name to match against (NOT NULL)
- * @info: info message if there was an error
- *
- * Returns: label or NULL if no match found
- */
-static struct aa_label *find_attach(const struct linux_binprm *bprm,
- struct aa_ns *ns, struct list_head *list,
- const char *name, const char **info)
-{
- struct aa_profile *profile;
-
- rcu_read_lock();
- profile = aa_get_profile(__attach_match(bprm, name, list, info));
+out:
+ candidate = aa_get_newest_profile(candidate);
rcu_read_unlock();
- return profile ? &profile->label : NULL;
+ return &candidate->label;
}
static const char *next_name(int xtype, const char *name)
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index fe2ebe5e865e..f1caf3674e1c 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -618,8 +618,7 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
fctx = file_ctx(file);
rcu_read_lock();
- flabel = aa_get_newest_label(rcu_dereference(fctx->label));
- rcu_read_unlock();
+ flabel = rcu_dereference(fctx->label);
AA_BUG(!flabel);
/* revalidate access, if task is unconfined, or the cached cred
@@ -631,9 +630,13 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
*/
denied = request & ~fctx->allow;
if (unconfined(label) || unconfined(flabel) ||
- (!denied && aa_label_is_subset(flabel, label)))
+ (!denied && aa_label_is_subset(flabel, label))) {
+ rcu_read_unlock();
goto done;
+ }
+ flabel = aa_get_newest_label(flabel);
+ rcu_read_unlock();
/* TODO: label cross check */
if (file->f_path.mnt && path_mediated_fs(file->f_path.dentry))
@@ -643,8 +646,9 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
else if (S_ISSOCK(file_inode(file)->i_mode))
error = __file_sock_perm(op, label, flabel, file, request,
denied);
-done:
aa_put_label(flabel);
+
+done:
return error;
}
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index 4ed6688f9d40..e0828ee7a345 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -442,7 +442,7 @@ int aa_bind_mount(struct aa_label *label, const struct path *path,
buffer = aa_get_buffer(false);
old_buffer = aa_get_buffer(false);
error = -ENOMEM;
- if (!buffer || old_buffer)
+ if (!buffer || !old_buffer)
goto out;
error = fn_for_each_confined(label, profile,
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 03104830c913..269f2f53c0b1 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -1125,8 +1125,8 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj,
if (!name) {
/* remove namespace - can only happen if fqname[0] == ':' */
mutex_lock_nested(&ns->parent->lock, ns->level);
- __aa_remove_ns(ns);
__aa_bump_ns_revision(ns);
+ __aa_remove_ns(ns);
mutex_unlock(&ns->parent->lock);
} else {
/* remove profile */
@@ -1138,9 +1138,9 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj,
goto fail_ns_lock;
}
name = profile->base.hname;
+ __aa_bump_ns_revision(ns);
__remove_profile(profile);
__aa_labelset_update_subtree(ns);
- __aa_bump_ns_revision(ns);
mutex_unlock(&ns->lock);
}
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index f19a895ad7cd..ef8dfd47c7e3 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -45,7 +45,7 @@
#define DONT_HASH 0x0200
#define INVALID_PCR(a) (((a) < 0) || \
- (a) >= (FIELD_SIZEOF(struct integrity_iint_cache, measured_pcrs) * 8))
+ (a) >= (sizeof_field(struct integrity_iint_cache, measured_pcrs) * 8))
int ima_policy_flag;
static int temp_ima_appraise;
@@ -274,7 +274,7 @@ static struct ima_rule_entry *ima_lsm_copy_rule(struct ima_rule_entry *entry)
* lsm rules can change
*/
memcpy(nentry, entry, sizeof(*nentry));
- memset(nentry->lsm, 0, FIELD_SIZEOF(struct ima_rule_entry, lsm));
+ memset(nentry->lsm, 0, sizeof_field(struct ima_rule_entry, lsm));
for (i = 0; i < MAX_LSM_RULES; i++) {
if (!entry->lsm[i].rule)
diff --git a/security/keys/Kconfig b/security/keys/Kconfig
index dd313438fecf..47c041563d41 100644
--- a/security/keys/Kconfig
+++ b/security/keys/Kconfig
@@ -21,10 +21,6 @@ config KEYS
If you are unsure as to whether this is required, answer N.
-config KEYS_COMPAT
- def_bool y
- depends on COMPAT && KEYS
-
config KEYS_REQUEST_CACHE
bool "Enable temporary caching of the last request_key() result"
depends on KEYS
diff --git a/security/keys/Makefile b/security/keys/Makefile
index 074f27538f55..5f40807f05b3 100644
--- a/security/keys/Makefile
+++ b/security/keys/Makefile
@@ -17,7 +17,7 @@ obj-y := \
request_key_auth.o \
user_defined.o
compat-obj-$(CONFIG_KEY_DH_OPERATIONS) += compat_dh.o
-obj-$(CONFIG_KEYS_COMPAT) += compat.o $(compat-obj-y)
+obj-$(CONFIG_COMPAT) += compat.o $(compat-obj-y)
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_SYSCTL) += sysctl.o
obj-$(CONFIG_PERSISTENT_KEYRINGS) += persistent.o
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 9bcc404131aa..b975f8f11124 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -46,11 +46,6 @@ static long compat_keyctl_instantiate_key_iov(
/*
* The key control system call, 32-bit compatibility version for 64-bit archs
- *
- * This should only be called if the 64-bit arch uses weird pointers in 32-bit
- * mode or doesn't guarantee that the top 32-bits of the argument registers on
- * taking a 32-bit syscall are zero. If you can, you should call sys_keyctl()
- * directly.
*/
COMPAT_SYSCALL_DEFINE5(keyctl, u32, option,
u32, arg2, u32, arg3, u32, arg4, u32, arg5)
diff --git a/security/keys/internal.h b/security/keys/internal.h
index c039373488bd..ba3e2da14cef 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -264,7 +264,7 @@ extern long keyctl_dh_compute(struct keyctl_dh_params __user *, char __user *,
size_t, struct keyctl_kdf_params __user *);
extern long __keyctl_dh_compute(struct keyctl_dh_params __user *, char __user *,
size_t, struct keyctl_kdf_params *);
-#ifdef CONFIG_KEYS_COMPAT
+#ifdef CONFIG_COMPAT
extern long compat_keyctl_dh_compute(struct keyctl_dh_params __user *params,
char __user *buffer, size_t buflen,
struct compat_keyctl_kdf_params __user *kdf);
@@ -279,7 +279,7 @@ static inline long keyctl_dh_compute(struct keyctl_dh_params __user *params,
return -EOPNOTSUPP;
}
-#ifdef CONFIG_KEYS_COMPAT
+#ifdef CONFIG_COMPAT
static inline long compat_keyctl_dh_compute(
struct keyctl_dh_params __user *params,
char __user *buffer, size_t buflen,
diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
index a9810ac2776f..08ec7f48f01d 100644
--- a/security/keys/trusted-keys/trusted_tpm2.c
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -309,6 +309,7 @@ int tpm2_unseal_trusted(struct tpm_chip *chip,
return rc;
rc = tpm2_unseal_cmd(chip, payload, options, blob_handle);
+ tpm2_flush_context(chip, blob_handle);
return rc;
}
diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c
index dd3d5942e669..c36bafbcd77e 100644
--- a/security/tomoyo/common.c
+++ b/security/tomoyo/common.c
@@ -951,7 +951,8 @@ static bool tomoyo_manager(void)
exe = tomoyo_get_exe();
if (!exe)
return false;
- list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.policy_list[TOMOYO_ID_MANAGER], head.list) {
+ list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace.policy_list[TOMOYO_ID_MANAGER], head.list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (!ptr->head.is_deleted &&
(!tomoyo_pathcmp(domainname, ptr->manager) ||
!strcmp(exe, ptr->manager->name))) {
@@ -1095,7 +1096,8 @@ static int tomoyo_delete_domain(char *domainname)
if (mutex_lock_interruptible(&tomoyo_policy_lock))
return -EINTR;
/* Is there an active domain? */
- list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
+ list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
+ srcu_read_lock_held(&tomoyo_ss)) {
/* Never delete tomoyo_kernel_domain */
if (domain == &tomoyo_kernel_domain)
continue;
@@ -2778,7 +2780,8 @@ void tomoyo_check_profile(void)
tomoyo_policy_loaded = true;
pr_info("TOMOYO: 2.6.0\n");
- list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
+ list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
+ srcu_read_lock_held(&tomoyo_ss)) {
const u8 profile = domain->profile;
struct tomoyo_policy_namespace *ns = domain->ns;
diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c
index 8526a0a74023..7869d6a9980b 100644
--- a/security/tomoyo/domain.c
+++ b/security/tomoyo/domain.c
@@ -41,7 +41,8 @@ int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size,
if (mutex_lock_interruptible(&tomoyo_policy_lock))
return -ENOMEM;
- list_for_each_entry_rcu(entry, list, list) {
+ list_for_each_entry_rcu(entry, list, list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (entry->is_deleted == TOMOYO_GC_IN_PROGRESS)
continue;
if (!check_duplicate(entry, new_entry))
@@ -119,7 +120,8 @@ int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size,
}
if (mutex_lock_interruptible(&tomoyo_policy_lock))
goto out;
- list_for_each_entry_rcu(entry, list, list) {
+ list_for_each_entry_rcu(entry, list, list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (entry->is_deleted == TOMOYO_GC_IN_PROGRESS)
continue;
if (!tomoyo_same_acl_head(entry, new_entry) ||
@@ -166,7 +168,8 @@ void tomoyo_check_acl(struct tomoyo_request_info *r,
u16 i = 0;
retry:
- list_for_each_entry_rcu(ptr, list, list) {
+ list_for_each_entry_rcu(ptr, list, list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (ptr->is_deleted || ptr->type != r->param_type)
continue;
if (!check_entry(r, ptr))
@@ -298,7 +301,8 @@ static inline bool tomoyo_scan_transition
{
const struct tomoyo_transition_control *ptr;
- list_for_each_entry_rcu(ptr, list, head.list) {
+ list_for_each_entry_rcu(ptr, list, head.list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (ptr->head.is_deleted || ptr->type != type)
continue;
if (ptr->domainname) {
@@ -735,7 +739,8 @@ retry:
/* Check 'aggregator' directive. */
candidate = &exename;
- list_for_each_entry_rcu(ptr, list, head.list) {
+ list_for_each_entry_rcu(ptr, list, head.list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (ptr->head.is_deleted ||
!tomoyo_path_matches_pattern(&exename,
ptr->original_name))
diff --git a/security/tomoyo/group.c b/security/tomoyo/group.c
index a37c7dc66e44..1cecdd797597 100644
--- a/security/tomoyo/group.c
+++ b/security/tomoyo/group.c
@@ -133,7 +133,8 @@ tomoyo_path_matches_group(const struct tomoyo_path_info *pathname,
{
struct tomoyo_path_group *member;
- list_for_each_entry_rcu(member, &group->member_list, head.list) {
+ list_for_each_entry_rcu(member, &group->member_list, head.list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (member->head.is_deleted)
continue;
if (!tomoyo_path_matches_pattern(pathname, member->member_name))
@@ -161,7 +162,8 @@ bool tomoyo_number_matches_group(const unsigned long min,
struct tomoyo_number_group *member;
bool matched = false;
- list_for_each_entry_rcu(member, &group->member_list, head.list) {
+ list_for_each_entry_rcu(member, &group->member_list, head.list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (member->head.is_deleted)
continue;
if (min > member->number.values[1] ||
@@ -191,7 +193,8 @@ bool tomoyo_address_matches_group(const bool is_ipv6, const __be32 *address,
bool matched = false;
const u8 size = is_ipv6 ? 16 : 4;
- list_for_each_entry_rcu(member, &group->member_list, head.list) {
+ list_for_each_entry_rcu(member, &group->member_list, head.list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (member->head.is_deleted)
continue;
if (member->address.is_ipv6 != is_ipv6)
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c
index e7832448d721..bf38fc1b59b2 100644
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -218,31 +218,6 @@ out:
}
/**
- * tomoyo_get_socket_name - Get the name of a socket.
- *
- * @path: Pointer to "struct path".
- * @buffer: Pointer to buffer to return value in.
- * @buflen: Sizeof @buffer.
- *
- * Returns the buffer.
- */
-static char *tomoyo_get_socket_name(const struct path *path, char * const buffer,
- const int buflen)
-{
- struct inode *inode = d_backing_inode(path->dentry);
- struct socket *sock = inode ? SOCKET_I(inode) : NULL;
- struct sock *sk = sock ? sock->sk : NULL;
-
- if (sk) {
- snprintf(buffer, buflen, "socket:[family=%u:type=%u:protocol=%u]",
- sk->sk_family, sk->sk_type, sk->sk_protocol);
- } else {
- snprintf(buffer, buflen, "socket:[unknown]");
- }
- return buffer;
-}
-
-/**
* tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root.
*
* @path: Pointer to "struct path".
@@ -279,12 +254,7 @@ char *tomoyo_realpath_from_path(const struct path *path)
break;
/* To make sure that pos is '\0' terminated. */
buf[buf_len - 1] = '\0';
- /* Get better name for socket. */
- if (sb->s_magic == SOCKFS_MAGIC) {
- pos = tomoyo_get_socket_name(path, buf, buf_len - 1);
- goto encode;
- }
- /* For "pipe:[\$]". */
+ /* For "pipe:[\$]" and "socket:[\$]". */
if (dentry->d_op && dentry->d_op->d_dname) {
pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1);
goto encode;
diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c
index 52752e1a84ed..eba0b3395851 100644
--- a/security/tomoyo/util.c
+++ b/security/tomoyo/util.c
@@ -594,7 +594,8 @@ struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname)
name.name = domainname;
tomoyo_fill_path_info(&name);
- list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) {
+ list_for_each_entry_rcu(domain, &tomoyo_domain_list, list,
+ srcu_read_lock_held(&tomoyo_ss)) {
if (!domain->is_deleted &&
!tomoyo_pathcmp(&name, domain->domainname))
return domain;
@@ -1028,7 +1029,8 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r)
return false;
if (!domain)
return true;
- list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) {
+ list_for_each_entry_rcu(ptr, &domain->acl_info_list, list,
+ srcu_read_lock_held(&tomoyo_ss)) {
u16 perm;
u8 i;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 1fe581167b7b..d083225344a0 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -739,6 +739,10 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
while (runtime->boundary * 2 <= LONG_MAX - runtime->buffer_size)
runtime->boundary *= 2;
+ /* clear the buffer for avoiding possible kernel info leaks */
+ if (runtime->dma_area && !substream->ops->copy_user)
+ memset(runtime->dma_area, 0, runtime->dma_bytes);
+
snd_pcm_timer_resolution_change(substream);
snd_pcm_set_state(substream, SNDRV_PCM_STATE_SETUP);
diff --git a/sound/firewire/fireface/ff-pcm.c b/sound/firewire/fireface/ff-pcm.c
index 4e3bd9a2bec0..bd91c6ecb112 100644
--- a/sound/firewire/fireface/ff-pcm.c
+++ b/sound/firewire/fireface/ff-pcm.c
@@ -247,7 +247,7 @@ static int pcm_hw_params(struct snd_pcm_substream *substream,
mutex_unlock(&ff->mutex);
}
- return 0;
+ return err;
}
static int pcm_hw_free(struct snd_pcm_substream *substream)
diff --git a/sound/firewire/motu/motu-pcm.c b/sound/firewire/motu/motu-pcm.c
index 349b4d09e84f..005970931030 100644
--- a/sound/firewire/motu/motu-pcm.c
+++ b/sound/firewire/motu/motu-pcm.c
@@ -177,18 +177,14 @@ static int pcm_open(struct snd_pcm_substream *substream)
err = snd_pcm_hw_constraint_minmax(substream->runtime,
SNDRV_PCM_HW_PARAM_PERIOD_SIZE,
frames_per_period, frames_per_period);
- if (err < 0) {
- mutex_unlock(&motu->mutex);
+ if (err < 0)
goto err_locked;
- }
err = snd_pcm_hw_constraint_minmax(substream->runtime,
SNDRV_PCM_HW_PARAM_BUFFER_SIZE,
frames_per_buffer, frames_per_buffer);
- if (err < 0) {
- mutex_unlock(&motu->mutex);
+ if (err < 0)
goto err_locked;
- }
}
}
diff --git a/sound/firewire/oxfw/oxfw-pcm.c b/sound/firewire/oxfw/oxfw-pcm.c
index 9124603edabe..67fd3e844dd6 100644
--- a/sound/firewire/oxfw/oxfw-pcm.c
+++ b/sound/firewire/oxfw/oxfw-pcm.c
@@ -285,7 +285,7 @@ static int pcm_playback_hw_params(struct snd_pcm_substream *substream,
mutex_unlock(&oxfw->mutex);
}
- return 0;
+ return err;
}
static int pcm_capture_hw_free(struct snd_pcm_substream *substream)
diff --git a/sound/hda/hdac_stream.c b/sound/hda/hdac_stream.c
index f9707fb05efe..682ed39f79b0 100644
--- a/sound/hda/hdac_stream.c
+++ b/sound/hda/hdac_stream.c
@@ -120,10 +120,8 @@ void snd_hdac_stream_clear(struct hdac_stream *azx_dev)
snd_hdac_stream_updateb(azx_dev, SD_CTL,
SD_CTL_DMA_START | SD_INT_MASK, 0);
snd_hdac_stream_writeb(azx_dev, SD_STS, SD_INT_MASK); /* to be sure */
- if (azx_dev->stripe) {
+ if (azx_dev->stripe)
snd_hdac_stream_updateb(azx_dev, SD_CTL_3B, SD_CTL_STRIPE_MASK, 0);
- azx_dev->stripe = 0;
- }
azx_dev->running = false;
}
EXPORT_SYMBOL_GPL(snd_hdac_stream_clear);
diff --git a/sound/pci/echoaudio/echoaudio_dsp.c b/sound/pci/echoaudio/echoaudio_dsp.c
index 50d4a87a6bb3..f02f5b1568de 100644
--- a/sound/pci/echoaudio/echoaudio_dsp.c
+++ b/sound/pci/echoaudio/echoaudio_dsp.c
@@ -635,36 +635,30 @@ This function assumes there are no more than 16 in/out busses or pipes
Meters is an array [3][16][2] of long. */
static void get_audio_meters(struct echoaudio *chip, long *meters)
{
- int i, m, n;
+ unsigned int i, m, n;
- m = 0;
- n = 0;
- for (i = 0; i < num_busses_out(chip); i++, m++) {
+ for (i = 0 ; i < 96; i++)
+ meters[i] = 0;
+
+ for (m = 0, n = 0, i = 0; i < num_busses_out(chip); i++, m++) {
meters[n++] = chip->comm_page->vu_meter[m];
meters[n++] = chip->comm_page->peak_meter[m];
}
- for (; n < 32; n++)
- meters[n] = 0;
#ifdef ECHOCARD_ECHO3G
m = E3G_MAX_OUTPUTS; /* Skip unused meters */
#endif
- for (i = 0; i < num_busses_in(chip); i++, m++) {
+ for (n = 32, i = 0; i < num_busses_in(chip); i++, m++) {
meters[n++] = chip->comm_page->vu_meter[m];
meters[n++] = chip->comm_page->peak_meter[m];
}
- for (; n < 64; n++)
- meters[n] = 0;
-
#ifdef ECHOCARD_HAS_VMIXER
- for (i = 0; i < num_pipes_out(chip); i++, m++) {
+ for (n = 64, i = 0; i < num_pipes_out(chip); i++, m++) {
meters[n++] = chip->comm_page->vu_meter[m];
meters[n++] = chip->comm_page->peak_meter[m];
}
#endif
- for (; n < 96; n++)
- meters[n] = 0;
}
diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 2f3b7a35f2d9..ba56b59b3e17 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c
@@ -883,7 +883,7 @@ static int azx_rirb_get_response(struct hdac_bus *bus, unsigned int addr,
return -EAGAIN; /* give a chance to retry */
}
- dev_WARN(chip->card->dev,
+ dev_err(chip->card->dev,
"azx_get_response timeout, switching to single_cmd mode: last cmd=0x%08x\n",
bus->last_cmd[addr]);
chip->single_cmd = 1;
diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 35b4526f0d28..5b92f290cbb0 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -282,12 +282,13 @@ enum {
/* quirks for old Intel chipsets */
#define AZX_DCAPS_INTEL_ICH \
- (AZX_DCAPS_OLD_SSYNC | AZX_DCAPS_NO_ALIGN_BUFSIZE)
+ (AZX_DCAPS_OLD_SSYNC | AZX_DCAPS_NO_ALIGN_BUFSIZE |\
+ AZX_DCAPS_SYNC_WRITE)
/* quirks for Intel PCH */
#define AZX_DCAPS_INTEL_PCH_BASE \
(AZX_DCAPS_NO_ALIGN_BUFSIZE | AZX_DCAPS_COUNT_LPIB_DELAY |\
- AZX_DCAPS_SNOOP_TYPE(SCH))
+ AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE)
/* PCH up to IVB; no runtime PM; bind with i915 gfx */
#define AZX_DCAPS_INTEL_PCH_NOPM \
@@ -302,13 +303,13 @@ enum {
#define AZX_DCAPS_INTEL_HASWELL \
(/*AZX_DCAPS_ALIGN_BUFSIZE |*/ AZX_DCAPS_COUNT_LPIB_DELAY |\
AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_COMPONENT |\
- AZX_DCAPS_SNOOP_TYPE(SCH))
+ AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE)
/* Broadwell HDMI can't use position buffer reliably, force to use LPIB */
#define AZX_DCAPS_INTEL_BROADWELL \
(/*AZX_DCAPS_ALIGN_BUFSIZE |*/ AZX_DCAPS_POSFIX_LPIB |\
AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_COMPONENT |\
- AZX_DCAPS_SNOOP_TYPE(SCH))
+ AZX_DCAPS_SNOOP_TYPE(SCH) | AZX_DCAPS_SYNC_WRITE)
#define AZX_DCAPS_INTEL_BAYTRAIL \
(AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_I915_COMPONENT)
@@ -1410,7 +1411,17 @@ static bool atpx_present(void)
acpi_handle dhandle, atpx_handle;
acpi_status status;
- while ((pdev = pci_get_class(PCI_BASE_CLASS_DISPLAY << 16, pdev)) != NULL) {
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
+ dhandle = ACPI_HANDLE(&pdev->dev);
+ if (dhandle) {
+ status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
+ if (!ACPI_FAILURE(status)) {
+ pci_dev_put(pdev);
+ return true;
+ }
+ }
+ }
+ while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) {
dhandle = ACPI_HANDLE(&pdev->dev);
if (dhandle) {
status = acpi_get_handle(dhandle, "ATPX", &atpx_handle);
@@ -1419,7 +1430,6 @@ static bool atpx_present(void)
return true;
}
}
- pci_dev_put(pdev);
}
return false;
}
diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index b7a1abb3e231..32ed46464af7 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -1809,13 +1809,14 @@ struct scp_msg {
static void dspio_clear_response_queue(struct hda_codec *codec)
{
+ unsigned long timeout = jiffies + msecs_to_jiffies(1000);
unsigned int dummy = 0;
- int status = -1;
+ int status;
/* clear all from the response queue */
do {
status = dspio_read(codec, &dummy);
- } while (status == 0);
+ } while (status == 0 && time_before(jiffies, timeout));
}
static int dspio_get_response_data(struct hda_codec *codec)
@@ -7588,12 +7589,14 @@ static void ca0132_process_dsp_response(struct hda_codec *codec,
struct ca0132_spec *spec = codec->spec;
codec_dbg(codec, "ca0132_process_dsp_response\n");
+ snd_hda_power_up_pm(codec);
if (spec->wait_scp) {
if (dspio_get_response_data(codec) >= 0)
spec->wait_scp = 0;
}
dspio_clear_response_queue(codec);
+ snd_hda_power_down_pm(codec);
}
static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
@@ -7604,11 +7607,10 @@ static void hp_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
/* Delay enabling the HP amp, to let the mic-detection
* state machine run.
*/
- cancel_delayed_work(&spec->unsol_hp_work);
- schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500));
tbl = snd_hda_jack_tbl_get(codec, cb->nid);
if (tbl)
tbl->block_report = 1;
+ schedule_delayed_work(&spec->unsol_hp_work, msecs_to_jiffies(500));
}
static void amic_callback(struct hda_codec *codec, struct hda_jack_callback *cb)
@@ -8454,12 +8456,25 @@ static void ca0132_reboot_notify(struct hda_codec *codec)
codec->patch_ops.free(codec);
}
+#ifdef CONFIG_PM
+static int ca0132_suspend(struct hda_codec *codec)
+{
+ struct ca0132_spec *spec = codec->spec;
+
+ cancel_delayed_work_sync(&spec->unsol_hp_work);
+ return 0;
+}
+#endif
+
static const struct hda_codec_ops ca0132_patch_ops = {
.build_controls = ca0132_build_controls,
.build_pcms = ca0132_build_pcms,
.init = ca0132_init,
.free = ca0132_free,
.unsol_event = snd_hda_jack_unsol_event,
+#ifdef CONFIG_PM
+ .suspend = ca0132_suspend,
+#endif
.reboot_notify = ca0132_reboot_notify,
};
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 78647ee02339..630b1f5c276d 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -2021,6 +2021,8 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo,
per_cvt->assigned = 0;
hinfo->nid = 0;
+ azx_stream(get_azx_dev(substream))->stripe = 0;
+
mutex_lock(&spec->pcm_lock);
snd_hda_spdif_ctls_unassign(codec, pcm_idx);
clear_bit(pcm_idx, &spec->pcm_in_use);
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 6d6e34b3b3aa..1cd4906a67e1 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -501,6 +501,7 @@ static void alc_shutup_pins(struct hda_codec *codec)
struct alc_spec *spec = codec->spec;
switch (codec->core.vendor_id) {
+ case 0x10ec0283:
case 0x10ec0286:
case 0x10ec0288:
case 0x10ec0298:
@@ -5904,9 +5905,12 @@ enum {
ALC256_FIXUP_ASUS_HEADSET_MIC,
ALC256_FIXUP_ASUS_MIC_NO_PRESENCE,
ALC299_FIXUP_PREDATOR_SPK,
- ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC,
ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE,
- ALC294_FIXUP_ASUS_INTSPK_GPIO,
+ ALC289_FIXUP_DELL_SPK2,
+ ALC289_FIXUP_DUAL_SPK,
+ ALC294_FIXUP_SPK2_TO_DAC1,
+ ALC294_FIXUP_ASUS_DUAL_SPK,
+
};
static const struct hda_fixup alc269_fixups[] = {
@@ -6981,33 +6985,45 @@ static const struct hda_fixup alc269_fixups[] = {
{ }
}
},
- [ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC] = {
+ [ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
- { 0x14, 0x411111f0 }, /* disable confusing internal speaker */
- { 0x19, 0x04a11150 }, /* use as headset mic, without its own jack detect */
+ { 0x19, 0x04a11040 },
+ { 0x21, 0x04211020 },
{ }
},
.chained = true,
- .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+ .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
},
- [ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE] = {
+ [ALC289_FIXUP_DELL_SPK2] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
- { 0x19, 0x04a11040 },
- { 0x21, 0x04211020 },
+ { 0x17, 0x90170130 }, /* bass spk */
{ }
},
.chained = true,
- .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE
+ .chain_id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE
},
- [ALC294_FIXUP_ASUS_INTSPK_GPIO] = {
+ [ALC289_FIXUP_DUAL_SPK] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC289_FIXUP_DELL_SPK2
+ },
+ [ALC294_FIXUP_SPK2_TO_DAC1] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc285_fixup_speaker2_to_dac1,
+ .chained = true,
+ .chain_id = ALC294_FIXUP_ASUS_HEADSET_MIC
+ },
+ [ALC294_FIXUP_ASUS_DUAL_SPK] = {
.type = HDA_FIXUP_FUNC,
/* The GPIO must be pulled to initialize the AMP */
.v.func = alc_fixup_gpio4,
.chained = true,
- .chain_id = ALC294_FIXUP_ASUS_INTSPK_HEADSET_MIC
+ .chain_id = ALC294_FIXUP_SPK2_TO_DAC1
},
+
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7080,6 +7096,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x08ad, "Dell WYSE AIO", ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x08ae, "Dell WYSE NB", ALC225_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x0935, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB),
+ SND_PCI_QUIRK(0x1028, 0x097e, "Dell Precision", ALC289_FIXUP_DUAL_SPK),
+ SND_PCI_QUIRK(0x1028, 0x097d, "Dell Precision", ALC289_FIXUP_DUAL_SPK),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -7167,7 +7185,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
- SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_INTSPK_GPIO),
+ SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1a13, "Asus G73Jw", ALC269_FIXUP_ASUS_G73JW),
SND_PCI_QUIRK(0x1043, 0x1a30, "ASUS X705UD", ALC256_FIXUP_ASUS_MIC),
@@ -7643,11 +7661,6 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = {
{0x1a, 0x90a70130},
{0x1b, 0x90170110},
{0x21, 0x03211020}),
- SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
- {0x12, 0xb7a60130},
- {0x13, 0xb8a61140},
- {0x16, 0x90170110},
- {0x21, 0x04211020}),
SND_HDA_PIN_QUIRK(0x10ec0280, 0x103c, "HP", ALC280_FIXUP_HP_GPIO4,
{0x12, 0x90a60130},
{0x14, 0x90170110},
@@ -7841,6 +7854,9 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
{0x19, 0x40000000},
{0x1a, 0x40000000}),
+ SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
+ {0x19, 0x40000000},
+ {0x1a, 0x40000000}),
{}
};
diff --git a/sound/pci/ice1712/ice1724.c b/sound/pci/ice1712/ice1724.c
index c80a16ee6e76..242542e23d28 100644
--- a/sound/pci/ice1712/ice1724.c
+++ b/sound/pci/ice1712/ice1724.c
@@ -647,6 +647,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
unsigned long flags;
unsigned char mclk_change;
unsigned int i, old_rate;
+ bool call_set_rate = false;
if (rate > ice->hw_rates->list[ice->hw_rates->count - 1])
return -EINVAL;
@@ -670,7 +671,7 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
* setting clock rate for internal clock mode */
old_rate = ice->get_rate(ice);
if (force || (old_rate != rate))
- ice->set_rate(ice, rate);
+ call_set_rate = true;
else if (rate == ice->cur_rate) {
spin_unlock_irqrestore(&ice->reg_lock, flags);
return 0;
@@ -678,12 +679,14 @@ static int snd_vt1724_set_pro_rate(struct snd_ice1712 *ice, unsigned int rate,
}
ice->cur_rate = rate;
+ spin_unlock_irqrestore(&ice->reg_lock, flags);
+
+ if (call_set_rate)
+ ice->set_rate(ice, rate);
/* setting master clock */
mclk_change = ice->set_mclk(ice, rate);
- spin_unlock_irqrestore(&ice->reg_lock, flags);
-
if (mclk_change && ice->gpio.i2s_mclk_changed)
ice->gpio.i2s_mclk_changed(ice);
if (ice->gpio.set_pro_rate)
diff --git a/sound/soc/amd/acp-da7219-max98357a.c b/sound/soc/amd/acp-da7219-max98357a.c
index f4ee6798154a..7a5621e5e233 100644
--- a/sound/soc/amd/acp-da7219-max98357a.c
+++ b/sound/soc/amd/acp-da7219-max98357a.c
@@ -96,14 +96,19 @@ static int cz_da7219_init(struct snd_soc_pcm_runtime *rtd)
return 0;
}
-static int da7219_clk_enable(struct snd_pcm_substream *substream,
- int wclk_rate, int bclk_rate)
+static int da7219_clk_enable(struct snd_pcm_substream *substream)
{
int ret = 0;
struct snd_soc_pcm_runtime *rtd = substream->private_data;
- clk_set_rate(da7219_dai_wclk, wclk_rate);
- clk_set_rate(da7219_dai_bclk, bclk_rate);
+ /*
+ * Set wclk to 48000 because the rate constraint of this driver is
+ * 48000. ADAU7002 spec: "The ADAU7002 requires a BCLK rate that is
+ * minimum of 64x the LRCLK sample rate." DA7219 is the only clk
+ * source so for all codecs we have to limit bclk to 64X lrclk.
+ */
+ clk_set_rate(da7219_dai_wclk, 48000);
+ clk_set_rate(da7219_dai_bclk, 48000 * 64);
ret = clk_prepare_enable(da7219_dai_bclk);
if (ret < 0) {
dev_err(rtd->dev, "can't enable master clock %d\n", ret);
@@ -156,7 +161,7 @@ static int cz_da7219_play_startup(struct snd_pcm_substream *substream)
&constraints_rates);
machine->play_i2s_instance = I2S_SP_INSTANCE;
- return 0;
+ return da7219_clk_enable(substream);
}
static int cz_da7219_cap_startup(struct snd_pcm_substream *substream)
@@ -178,7 +183,7 @@ static int cz_da7219_cap_startup(struct snd_pcm_substream *substream)
machine->cap_i2s_instance = I2S_SP_INSTANCE;
machine->capture_channel = CAP_CHANNEL1;
- return 0;
+ return da7219_clk_enable(substream);
}
static int cz_max_startup(struct snd_pcm_substream *substream)
@@ -199,7 +204,7 @@ static int cz_max_startup(struct snd_pcm_substream *substream)
&constraints_rates);
machine->play_i2s_instance = I2S_BT_INSTANCE;
- return 0;
+ return da7219_clk_enable(substream);
}
static int cz_dmic0_startup(struct snd_pcm_substream *substream)
@@ -220,7 +225,7 @@ static int cz_dmic0_startup(struct snd_pcm_substream *substream)
&constraints_rates);
machine->cap_i2s_instance = I2S_BT_INSTANCE;
- return 0;
+ return da7219_clk_enable(substream);
}
static int cz_dmic1_startup(struct snd_pcm_substream *substream)
@@ -242,25 +247,7 @@ static int cz_dmic1_startup(struct snd_pcm_substream *substream)
machine->cap_i2s_instance = I2S_SP_INSTANCE;
machine->capture_channel = CAP_CHANNEL0;
- return 0;
-}
-
-static int cz_da7219_params(struct snd_pcm_substream *substream,
- struct snd_pcm_hw_params *params)
-{
- int wclk, bclk;
-
- wclk = params_rate(params);
- bclk = wclk * params_channels(params) *
- snd_pcm_format_width(params_format(params));
- /* ADAU7002 spec: "The ADAU7002 requires a BCLK rate
- * that is minimum of 64x the LRCLK sample rate."
- * DA7219 is the only clk source so for all codecs
- * we have to limit bclk to 64X lrclk.
- */
- if (bclk < (wclk * 64))
- bclk = wclk * 64;
- return da7219_clk_enable(substream, wclk, bclk);
+ return da7219_clk_enable(substream);
}
static void cz_da7219_shutdown(struct snd_pcm_substream *substream)
@@ -271,31 +258,26 @@ static void cz_da7219_shutdown(struct snd_pcm_substream *substream)
static const struct snd_soc_ops cz_da7219_play_ops = {
.startup = cz_da7219_play_startup,
.shutdown = cz_da7219_shutdown,
- .hw_params = cz_da7219_params,
};
static const struct snd_soc_ops cz_da7219_cap_ops = {
.startup = cz_da7219_cap_startup,
.shutdown = cz_da7219_shutdown,
- .hw_params = cz_da7219_params,
};
static const struct snd_soc_ops cz_max_play_ops = {
.startup = cz_max_startup,
.shutdown = cz_da7219_shutdown,
- .hw_params = cz_da7219_params,
};
static const struct snd_soc_ops cz_dmic0_cap_ops = {
.startup = cz_dmic0_startup,
.shutdown = cz_da7219_shutdown,
- .hw_params = cz_da7219_params,
};
static const struct snd_soc_ops cz_dmic1_cap_ops = {
.startup = cz_dmic1_startup,
.shutdown = cz_da7219_shutdown,
- .hw_params = cz_da7219_params,
};
SND_SOC_DAILINK_DEF(designware1,
diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index f8b5b960e597..4eaa2b5b20a5 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -292,7 +292,7 @@ static int hdmi_eld_ctl_info(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
uinfo->type = SNDRV_CTL_ELEM_TYPE_BYTES;
- uinfo->count = FIELD_SIZEOF(struct hdmi_codec_priv, eld);
+ uinfo->count = sizeof_field(struct hdmi_codec_priv, eld);
return 0;
}
diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c
index f6bf4cfbea23..e46b6ada13b1 100644
--- a/sound/soc/codecs/max98090.c
+++ b/sound/soc/codecs/max98090.c
@@ -2103,26 +2103,40 @@ static void max98090_pll_det_disable_work(struct work_struct *work)
M98090_IULK_MASK, 0);
}
-static void max98090_pll_work(struct work_struct *work)
+static void max98090_pll_work(struct max98090_priv *max98090)
{
- struct max98090_priv *max98090 =
- container_of(work, struct max98090_priv, pll_work);
struct snd_soc_component *component = max98090->component;
+ unsigned int pll;
+ int i;
if (!snd_soc_component_is_active(component))
return;
dev_info_ratelimited(component->dev, "PLL unlocked\n");
+ /*
+ * As the datasheet suggested, the maximum PLL lock time should be
+ * 7 msec. The workaround resets the codec softly by toggling SHDN
+ * off and on if PLL failed to lock for 10 msec. Notably, there is
+ * no suggested hold time for SHDN off.
+ */
+
/* Toggle shutdown OFF then ON */
snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN,
M98090_SHDNN_MASK, 0);
- msleep(10);
snd_soc_component_update_bits(component, M98090_REG_DEVICE_SHUTDOWN,
M98090_SHDNN_MASK, M98090_SHDNN_MASK);
- /* Give PLL time to lock */
- msleep(10);
+ for (i = 0; i < 10; ++i) {
+ /* Give PLL time to lock */
+ usleep_range(1000, 1200);
+
+ /* Check lock status */
+ pll = snd_soc_component_read32(
+ component, M98090_REG_DEVICE_STATUS);
+ if (!(pll & M98090_ULK_MASK))
+ break;
+ }
}
static void max98090_jack_work(struct work_struct *work)
@@ -2259,7 +2273,7 @@ static irqreturn_t max98090_interrupt(int irq, void *data)
if (active & M98090_ULK_MASK) {
dev_dbg(component->dev, "M98090_ULK_MASK\n");
- schedule_work(&max98090->pll_work);
+ max98090_pll_work(max98090);
}
if (active & M98090_JDET_MASK) {
@@ -2422,7 +2436,6 @@ static int max98090_probe(struct snd_soc_component *component)
max98090_pll_det_enable_work);
INIT_WORK(&max98090->pll_det_disable_work,
max98090_pll_det_disable_work);
- INIT_WORK(&max98090->pll_work, max98090_pll_work);
/* Enable jack detection */
snd_soc_component_write(component, M98090_REG_JACK_DETECT,
@@ -2475,7 +2488,6 @@ static void max98090_remove(struct snd_soc_component *component)
cancel_delayed_work_sync(&max98090->jack_work);
cancel_delayed_work_sync(&max98090->pll_det_enable_work);
cancel_work_sync(&max98090->pll_det_disable_work);
- cancel_work_sync(&max98090->pll_work);
max98090->component = NULL;
}
diff --git a/sound/soc/codecs/max98090.h b/sound/soc/codecs/max98090.h
index 57965cd678b4..a197114b0dad 100644
--- a/sound/soc/codecs/max98090.h
+++ b/sound/soc/codecs/max98090.h
@@ -1530,7 +1530,6 @@ struct max98090_priv {
struct delayed_work jack_work;
struct delayed_work pll_det_enable_work;
struct work_struct pll_det_disable_work;
- struct work_struct pll_work;
struct snd_soc_jack *jack;
unsigned int dai_fmt;
int tdm_slots;
diff --git a/sound/soc/codecs/rt5677-spi.h b/sound/soc/codecs/rt5677-spi.h
index 3af36ec928e9..088b77931727 100644
--- a/sound/soc/codecs/rt5677-spi.h
+++ b/sound/soc/codecs/rt5677-spi.h
@@ -9,9 +9,25 @@
#ifndef __RT5677_SPI_H__
#define __RT5677_SPI_H__
+#if IS_ENABLED(CONFIG_SND_SOC_RT5677_SPI)
int rt5677_spi_read(u32 addr, void *rxbuf, size_t len);
int rt5677_spi_write(u32 addr, const void *txbuf, size_t len);
int rt5677_spi_write_firmware(u32 addr, const struct firmware *fw);
void rt5677_spi_hotword_detected(void);
+#else
+static inline int rt5677_spi_read(u32 addr, void *rxbuf, size_t len)
+{
+ return -EINVAL;
+}
+static inline int rt5677_spi_write(u32 addr, const void *txbuf, size_t len)
+{
+ return -EINVAL;
+}
+static inline int rt5677_spi_write_firmware(u32 addr, const struct firmware *fw)
+{
+ return -EINVAL;
+}
+static inline void rt5677_spi_hotword_detected(void){}
+#endif
#endif /* __RT5677_SPI_H__ */
diff --git a/sound/soc/codecs/rt5682.c b/sound/soc/codecs/rt5682.c
index b1713fffa3eb..ae6f6121bc1b 100644
--- a/sound/soc/codecs/rt5682.c
+++ b/sound/soc/codecs/rt5682.c
@@ -73,6 +73,7 @@ struct rt5682_priv {
static const struct reg_sequence patch_list[] = {
{RT5682_HP_IMP_SENS_CTRL_19, 0x1000},
{RT5682_DAC_ADC_DIG_VOL1, 0xa020},
+ {RT5682_I2C_CTRL, 0x000f},
};
static const struct reg_default rt5682_reg[] = {
@@ -2474,6 +2475,7 @@ static void rt5682_calibrate(struct rt5682_priv *rt5682)
mutex_lock(&rt5682->calibrate_mutex);
rt5682_reset(rt5682->regmap);
+ regmap_write(rt5682->regmap, RT5682_I2C_CTRL, 0x000f);
regmap_write(rt5682->regmap, RT5682_PWR_ANLG_1, 0xa2af);
usleep_range(15000, 20000);
regmap_write(rt5682->regmap, RT5682_PWR_ANLG_1, 0xf2af);
diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index 7d7ea15d73e0..5ffbaddd6e49 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -1806,6 +1806,12 @@ static int wm8904_set_sysclk(struct snd_soc_dai *dai, int clk_id,
switch (clk_id) {
case WM8904_CLK_AUTO:
+ /* We don't have any rate constraints, so just ignore the
+ * request to disable constraining.
+ */
+ if (!freq)
+ return 0;
+
mclk_freq = clk_get_rate(priv->mclk);
/* enable FLL if a different sysclk is desired */
if (mclk_freq != freq) {
diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 3e5c69fbc33a..d9d59f45833f 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c
@@ -2788,7 +2788,7 @@ static int fll_factors(struct _fll_div *fll_div, unsigned int Fref,
if (target % Fref == 0) {
fll_div->theta = 0;
- fll_div->lambda = 0;
+ fll_div->lambda = 1;
} else {
gcd_fll = gcd(target, fratio * Fref);
@@ -2858,7 +2858,7 @@ static int wm8962_set_fll(struct snd_soc_component *component, int fll_id, int s
return -EINVAL;
}
- if (fll_div.theta || fll_div.lambda)
+ if (fll_div.theta)
fll1 |= WM8962_FLL_FRAC;
/* Stop the FLL while we reconfigure */
diff --git a/sound/soc/generic/simple-card.c b/sound/soc/generic/simple-card.c
index 10b82bf043d1..55e9f8800b3e 100644
--- a/sound/soc/generic/simple-card.c
+++ b/sound/soc/generic/simple-card.c
@@ -371,6 +371,7 @@ static int simple_for_each_link(struct asoc_simple_priv *priv,
do {
struct asoc_simple_data adata;
struct device_node *codec;
+ struct device_node *plat;
struct device_node *np;
int num = of_get_child_count(node);
@@ -381,6 +382,9 @@ static int simple_for_each_link(struct asoc_simple_priv *priv,
ret = -ENODEV;
goto error;
}
+ /* get platform */
+ plat = of_get_child_by_name(node, is_top ?
+ PREFIX "plat" : "plat");
/* get convert-xxx property */
memset(&adata, 0, sizeof(adata));
@@ -389,6 +393,8 @@ static int simple_for_each_link(struct asoc_simple_priv *priv,
/* loop for all CPU/Codec node */
for_each_child_of_node(node, np) {
+ if (plat == np)
+ continue;
/*
* It is DPCM
* if it has many CPUs,
diff --git a/sound/soc/intel/atom/sst/sst.c b/sound/soc/intel/atom/sst/sst.c
index fbecbb74350b..68bcec5241f7 100644
--- a/sound/soc/intel/atom/sst/sst.c
+++ b/sound/soc/intel/atom/sst/sst.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/interrupt.h>
+#include <linux/io.h>
#include <linux/firmware.h>
#include <linux/pm_runtime.h>
#include <linux/pm_qos.h>
diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
index dd2b5ad08659..243f683bc02a 100644
--- a/sound/soc/intel/boards/bytcr_rt5640.c
+++ b/sound/soc/intel/boards/bytcr_rt5640.c
@@ -707,13 +707,17 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
BYT_RT5640_MCLK_EN),
},
{
+ /* Teclast X89 */
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "TECLAST"),
DMI_MATCH(DMI_BOARD_NAME, "tPAD"),
},
.driver_data = (void *)(BYT_RT5640_IN3_MAP |
- BYT_RT5640_MCLK_EN |
- BYT_RT5640_SSP0_AIF1),
+ BYT_RT5640_JD_SRC_JD1_IN4P |
+ BYT_RT5640_OVCD_TH_2000UA |
+ BYT_RT5640_OVCD_SF_1P0 |
+ BYT_RT5640_SSP0_AIF1 |
+ BYT_RT5640_MCLK_EN),
},
{ /* Toshiba Satellite Click Mini L9W-B */
.matches = {
diff --git a/sound/soc/intel/common/soc-acpi-intel-cml-match.c b/sound/soc/intel/common/soc-acpi-intel-cml-match.c
index 5d08ae066738..fb9ba8819706 100644
--- a/sound/soc/intel/common/soc-acpi-intel-cml-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-cml-match.c
@@ -9,45 +9,52 @@
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
-static struct snd_soc_acpi_codecs cml_codecs = {
+static struct snd_soc_acpi_codecs rt1011_spk_codecs = {
.num_codecs = 1,
- .codecs = {"10EC5682"}
+ .codecs = {"10EC1011"}
};
-static struct snd_soc_acpi_codecs cml_spk_codecs = {
+static struct snd_soc_acpi_codecs max98357a_spk_codecs = {
.num_codecs = 1,
.codecs = {"MX98357A"}
};
+/*
+ * The order of the three entries with .id = "10EC5682" matters
+ * here, because DSDT tables expose an ACPI HID for the MAX98357A
+ * speaker amplifier which is not populated on the board.
+ */
struct snd_soc_acpi_mach snd_soc_acpi_intel_cml_machines[] = {
{
- .id = "DLGS7219",
- .drv_name = "cml_da7219_max98357a",
- .quirk_data = &cml_spk_codecs,
+ .id = "10EC5682",
+ .drv_name = "cml_rt1011_rt5682",
+ .machine_quirk = snd_soc_acpi_codec_list,
+ .quirk_data = &rt1011_spk_codecs,
.sof_fw_filename = "sof-cml.ri",
- .sof_tplg_filename = "sof-cml-da7219-max98357a.tplg",
+ .sof_tplg_filename = "sof-cml-rt1011-rt5682.tplg",
},
{
- .id = "MX98357A",
+ .id = "10EC5682",
.drv_name = "sof_rt5682",
- .quirk_data = &cml_codecs,
+ .machine_quirk = snd_soc_acpi_codec_list,
+ .quirk_data = &max98357a_spk_codecs,
.sof_fw_filename = "sof-cml.ri",
.sof_tplg_filename = "sof-cml-rt5682-max98357a.tplg",
},
{
- .id = "10EC1011",
- .drv_name = "cml_rt1011_rt5682",
- .quirk_data = &cml_codecs,
- .sof_fw_filename = "sof-cml.ri",
- .sof_tplg_filename = "sof-cml-rt1011-rt5682.tplg",
- },
- {
.id = "10EC5682",
.drv_name = "sof_rt5682",
.sof_fw_filename = "sof-cml.ri",
.sof_tplg_filename = "sof-cml-rt5682.tplg",
},
-
+ {
+ .id = "DLGS7219",
+ .drv_name = "cml_da7219_max98357a",
+ .machine_quirk = snd_soc_acpi_codec_list,
+ .quirk_data = &max98357a_spk_codecs,
+ .sof_fw_filename = "sof-cml.ri",
+ .sof_tplg_filename = "sof-cml-da7219-max98357a.tplg",
+ },
{},
};
EXPORT_SYMBOL_GPL(snd_soc_acpi_intel_cml_machines);
diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c
index 61f230324164..6615ef64c7f5 100644
--- a/sound/soc/soc-compress.c
+++ b/sound/soc/soc-compress.c
@@ -214,10 +214,8 @@ be_err:
* This is to ensure there are no pops or clicks in between any music tracks
* due to DAPM power cycling.
*/
-static void close_delayed_work(struct work_struct *work)
+static void close_delayed_work(struct snd_soc_pcm_runtime *rtd)
{
- struct snd_soc_pcm_runtime *rtd =
- container_of(work, struct snd_soc_pcm_runtime, delayed_work.work);
struct snd_soc_dai *codec_dai = rtd->codec_dai;
mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
@@ -929,7 +927,7 @@ int snd_soc_new_compress(struct snd_soc_pcm_runtime *rtd, int num)
}
/* DAPM dai link stream work */
- INIT_DELAYED_WORK(&rtd->delayed_work, close_delayed_work);
+ rtd->close_delayed_work_func = close_delayed_work;
rtd->compr = compr;
compr->private_data = rtd;
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 062653ab03a3..1c84ff1a5bf9 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -419,7 +419,8 @@ static void soc_free_pcm_runtime(struct snd_soc_pcm_runtime *rtd)
list_del(&rtd->list);
- flush_delayed_work(&rtd->delayed_work);
+ if (delayed_work_pending(&rtd->delayed_work))
+ flush_delayed_work(&rtd->delayed_work);
snd_soc_pcm_component_free(rtd);
/*
@@ -435,6 +436,15 @@ static void soc_free_pcm_runtime(struct snd_soc_pcm_runtime *rtd)
device_unregister(rtd->dev);
}
+static void close_delayed_work(struct work_struct *work) {
+ struct snd_soc_pcm_runtime *rtd =
+ container_of(work, struct snd_soc_pcm_runtime,
+ delayed_work.work);
+
+ if (rtd->close_delayed_work_func)
+ rtd->close_delayed_work_func(rtd);
+}
+
static struct snd_soc_pcm_runtime *soc_new_pcm_runtime(
struct snd_soc_card *card, struct snd_soc_dai_link *dai_link)
{
@@ -470,6 +480,7 @@ static struct snd_soc_pcm_runtime *soc_new_pcm_runtime(
rtd->dev = dev;
dev_set_drvdata(dev, rtd);
+ INIT_DELAYED_WORK(&rtd->delayed_work, close_delayed_work);
/*
* for rtd->codec_dais
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 76b7ee637e86..01e7bc03d92f 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -637,10 +637,8 @@ out:
* This is to ensure there are no pops or clicks in between any music tracks
* due to DAPM power cycling.
*/
-static void close_delayed_work(struct work_struct *work)
+static void close_delayed_work(struct snd_soc_pcm_runtime *rtd)
{
- struct snd_soc_pcm_runtime *rtd =
- container_of(work, struct snd_soc_pcm_runtime, delayed_work.work);
struct snd_soc_dai *codec_dai = rtd->codec_dais[0];
mutex_lock_nested(&rtd->card->pcm_mutex, rtd->card->pcm_subclass);
@@ -660,7 +658,7 @@ static void close_delayed_work(struct work_struct *work)
mutex_unlock(&rtd->card->pcm_mutex);
}
-static void codec2codec_close_delayed_work(struct work_struct *work)
+static void codec2codec_close_delayed_work(struct snd_soc_pcm_runtime *rtd)
{
/*
* Currently nothing to do for c2c links
@@ -2974,10 +2972,9 @@ int soc_new_pcm(struct snd_soc_pcm_runtime *rtd, int num)
/* DAPM dai link stream work */
if (rtd->dai_link->params)
- INIT_DELAYED_WORK(&rtd->delayed_work,
- codec2codec_close_delayed_work);
+ rtd->close_delayed_work_func = codec2codec_close_delayed_work;
else
- INIT_DELAYED_WORK(&rtd->delayed_work, close_delayed_work);
+ rtd->close_delayed_work_func = close_delayed_work;
pcm->nonatomic = rtd->dai_link->nonatomic;
rtd->pcm = pcm;
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 81d2af000a5c..b28613149b0c 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1933,11 +1933,13 @@ static int soc_tplg_fe_link_create(struct soc_tplg *tplg,
ret = soc_tplg_dai_link_load(tplg, link, NULL);
if (ret < 0) {
dev_err(tplg->comp->dev, "ASoC: FE link loading failed\n");
- kfree(link->name);
- kfree(link->stream_name);
- kfree(link->cpus->dai_name);
- kfree(link);
- return ret;
+ goto err;
+ }
+
+ ret = snd_soc_add_dai_link(tplg->comp->card, link);
+ if (ret < 0) {
+ dev_err(tplg->comp->dev, "ASoC: adding FE link failed\n");
+ goto err;
}
link->dobj.index = tplg->index;
@@ -1945,8 +1947,13 @@ static int soc_tplg_fe_link_create(struct soc_tplg *tplg,
link->dobj.type = SND_SOC_DOBJ_DAI_LINK;
list_add(&link->dobj.list, &tplg->comp->dobj_list);
- snd_soc_add_dai_link(tplg->comp->card, link);
return 0;
+err:
+ kfree(link->name);
+ kfree(link->stream_name);
+ kfree(link->cpus->dai_name);
+ kfree(link);
+ return ret;
}
/* create a FE DAI and DAI link from the PCM object */
@@ -2039,6 +2046,7 @@ static int soc_tplg_pcm_elems_load(struct soc_tplg *tplg,
int size;
int i;
bool abi_match;
+ int ret;
count = le32_to_cpu(hdr->count);
@@ -2080,7 +2088,12 @@ static int soc_tplg_pcm_elems_load(struct soc_tplg *tplg,
}
/* create the FE DAIs and DAI links */
- soc_tplg_pcm_create(tplg, _pcm);
+ ret = soc_tplg_pcm_create(tplg, _pcm);
+ if (ret < 0) {
+ if (!abi_match)
+ kfree(_pcm);
+ return ret;
+ }
/* offset by version-specific struct size and
* real priv data size
diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c
index 2abf80b3eb52..92ef6a796fd5 100644
--- a/sound/soc/sof/intel/byt.c
+++ b/sound/soc/sof/intel/byt.c
@@ -24,7 +24,8 @@
#define DRAM_OFFSET 0x100000
#define DRAM_SIZE (160 * 1024)
#define SHIM_OFFSET 0x140000
-#define SHIM_SIZE 0x100
+#define SHIM_SIZE_BYT 0x100
+#define SHIM_SIZE_CHT 0x118
#define MBOX_OFFSET 0x144000
#define MBOX_SIZE 0x1000
#define EXCEPT_OFFSET 0x800
@@ -75,7 +76,7 @@ static const struct snd_sof_debugfs_map byt_debugfs[] = {
SOF_DEBUGFS_ACCESS_D0_ONLY},
{"dram", BYT_DSP_BAR, DRAM_OFFSET, DRAM_SIZE,
SOF_DEBUGFS_ACCESS_D0_ONLY},
- {"shim", BYT_DSP_BAR, SHIM_OFFSET, SHIM_SIZE,
+ {"shim", BYT_DSP_BAR, SHIM_OFFSET, SHIM_SIZE_BYT,
SOF_DEBUGFS_ACCESS_ALWAYS},
};
@@ -102,7 +103,7 @@ static const struct snd_sof_debugfs_map cht_debugfs[] = {
SOF_DEBUGFS_ACCESS_D0_ONLY},
{"dram", BYT_DSP_BAR, DRAM_OFFSET, DRAM_SIZE,
SOF_DEBUGFS_ACCESS_D0_ONLY},
- {"shim", BYT_DSP_BAR, SHIM_OFFSET, SHIM_SIZE,
+ {"shim", BYT_DSP_BAR, SHIM_OFFSET, SHIM_SIZE_CHT,
SOF_DEBUGFS_ACCESS_ALWAYS},
};
@@ -145,33 +146,33 @@ static void byt_dump(struct snd_sof_dev *sdev, u32 flags)
struct sof_ipc_dsp_oops_xtensa xoops;
struct sof_ipc_panic_info panic_info;
u32 stack[BYT_STACK_DUMP_SIZE];
- u32 status, panic, imrd, imrx;
+ u64 status, panic, imrd, imrx;
/* now try generic SOF status messages */
- status = snd_sof_dsp_read(sdev, BYT_DSP_BAR, SHIM_IPCD);
- panic = snd_sof_dsp_read(sdev, BYT_DSP_BAR, SHIM_IPCX);
+ status = snd_sof_dsp_read64(sdev, BYT_DSP_BAR, SHIM_IPCD);
+ panic = snd_sof_dsp_read64(sdev, BYT_DSP_BAR, SHIM_IPCX);
byt_get_registers(sdev, &xoops, &panic_info, stack,
BYT_STACK_DUMP_SIZE);
snd_sof_get_status(sdev, status, panic, &xoops, &panic_info, stack,
BYT_STACK_DUMP_SIZE);
/* provide some context for firmware debug */
- imrx = snd_sof_dsp_read(sdev, BYT_DSP_BAR, SHIM_IMRX);
- imrd = snd_sof_dsp_read(sdev, BYT_DSP_BAR, SHIM_IMRD);
+ imrx = snd_sof_dsp_read64(sdev, BYT_DSP_BAR, SHIM_IMRX);
+ imrd = snd_sof_dsp_read64(sdev, BYT_DSP_BAR, SHIM_IMRD);
dev_err(sdev->dev,
- "error: ipc host -> DSP: pending %s complete %s raw 0x%8.8x\n",
+ "error: ipc host -> DSP: pending %s complete %s raw 0x%llx\n",
(panic & SHIM_IPCX_BUSY) ? "yes" : "no",
(panic & SHIM_IPCX_DONE) ? "yes" : "no", panic);
dev_err(sdev->dev,
- "error: mask host: pending %s complete %s raw 0x%8.8x\n",
+ "error: mask host: pending %s complete %s raw 0x%llx\n",
(imrx & SHIM_IMRX_BUSY) ? "yes" : "no",
(imrx & SHIM_IMRX_DONE) ? "yes" : "no", imrx);
dev_err(sdev->dev,
- "error: ipc DSP -> host: pending %s complete %s raw 0x%8.8x\n",
+ "error: ipc DSP -> host: pending %s complete %s raw 0x%llx\n",
(status & SHIM_IPCD_BUSY) ? "yes" : "no",
(status & SHIM_IPCD_DONE) ? "yes" : "no", status);
dev_err(sdev->dev,
- "error: mask DSP: pending %s complete %s raw 0x%8.8x\n",
+ "error: mask DSP: pending %s complete %s raw 0x%llx\n",
(imrd & SHIM_IMRD_BUSY) ? "yes" : "no",
(imrd & SHIM_IMRD_DONE) ? "yes" : "no", imrd);
diff --git a/sound/soc/sof/loader.c b/sound/soc/sof/loader.c
index 9a9a381a908d..432d12bd4937 100644
--- a/sound/soc/sof/loader.c
+++ b/sound/soc/sof/loader.c
@@ -50,8 +50,7 @@ int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 bar, u32 offset)
while (ext_hdr->hdr.cmd == SOF_IPC_FW_READY) {
/* read in ext structure */
- offset += sizeof(*ext_hdr);
- snd_sof_dsp_block_read(sdev, bar, offset,
+ snd_sof_dsp_block_read(sdev, bar, offset + sizeof(*ext_hdr),
(void *)((u8 *)ext_data + sizeof(*ext_hdr)),
ext_hdr->hdr.size - sizeof(*ext_hdr));
@@ -61,11 +60,15 @@ int snd_sof_fw_parse_ext_data(struct snd_sof_dev *sdev, u32 bar, u32 offset)
/* process structure data */
switch (ext_hdr->type) {
case SOF_IPC_EXT_DMA_BUFFER:
+ ret = 0;
break;
case SOF_IPC_EXT_WINDOW:
ret = get_ext_windows(sdev, ext_hdr);
break;
default:
+ dev_warn(sdev->dev, "warning: unknown ext header type %d size 0x%x\n",
+ ext_hdr->type, ext_hdr->hdr.size);
+ ret = 0;
break;
}
diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index d82ab981e840..e20b806ec80f 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -3132,7 +3132,9 @@ found:
case SOF_DAI_INTEL_SSP:
case SOF_DAI_INTEL_DMIC:
case SOF_DAI_INTEL_ALH:
- /* no resource needs to be released for SSP, DMIC and ALH */
+ case SOF_DAI_IMX_SAI:
+ case SOF_DAI_IMX_ESAI:
+ /* no resource needs to be released for all cases above */
break;
case SOF_DAI_INTEL_HDA:
ret = sof_link_hda_unload(sdev, link);
diff --git a/sound/usb/card.h b/sound/usb/card.h
index 2991b9986f66..395403a2d33f 100644
--- a/sound/usb/card.h
+++ b/sound/usb/card.h
@@ -145,6 +145,7 @@ struct snd_usb_substream {
struct snd_usb_endpoint *sync_endpoint;
unsigned long flags;
bool need_setup_ep; /* (re)configure EP at prepare? */
+ bool need_setup_fmt; /* (re)configure fmt after resume? */
unsigned int speed; /* USB_SPEED_XXX */
u64 formats; /* format bitmasks (all or'ed) */
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index 9c8930bb00c8..a11c8150af58 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -506,15 +506,15 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt)
if (WARN_ON(!iface))
return -EINVAL;
alts = usb_altnum_to_altsetting(iface, fmt->altsetting);
- altsd = get_iface_desc(alts);
- if (WARN_ON(altsd->bAlternateSetting != fmt->altsetting))
+ if (WARN_ON(!alts))
return -EINVAL;
+ altsd = get_iface_desc(alts);
- if (fmt == subs->cur_audiofmt)
+ if (fmt == subs->cur_audiofmt && !subs->need_setup_fmt)
return 0;
/* close the old interface */
- if (subs->interface >= 0 && subs->interface != fmt->iface) {
+ if (subs->interface >= 0 && (subs->interface != fmt->iface || subs->need_setup_fmt)) {
if (!subs->stream->chip->keep_iface) {
err = usb_set_interface(subs->dev, subs->interface, 0);
if (err < 0) {
@@ -528,6 +528,9 @@ static int set_format(struct snd_usb_substream *subs, struct audioformat *fmt)
subs->altset_idx = 0;
}
+ if (subs->need_setup_fmt)
+ subs->need_setup_fmt = false;
+
/* set interface */
if (iface->cur_altsetting != alts) {
err = snd_usb_select_mode_quirk(subs, fmt);
@@ -1728,6 +1731,13 @@ static int snd_usb_substream_playback_trigger(struct snd_pcm_substream *substrea
subs->data_endpoint->retire_data_urb = retire_playback_urb;
subs->running = 0;
return 0;
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ if (subs->stream->chip->setup_fmt_after_resume_quirk) {
+ stop_endpoints(subs, true);
+ subs->need_setup_fmt = true;
+ return 0;
+ }
+ break;
}
return -EINVAL;
@@ -1760,6 +1770,13 @@ static int snd_usb_substream_capture_trigger(struct snd_pcm_substream *substream
subs->data_endpoint->retire_data_urb = retire_capture_urb;
subs->running = 1;
return 0;
+ case SNDRV_PCM_TRIGGER_SUSPEND:
+ if (subs->stream->chip->setup_fmt_after_resume_quirk) {
+ stop_endpoints(subs, true);
+ subs->need_setup_fmt = true;
+ return 0;
+ }
+ break;
}
return -EINVAL;
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index 70c338f3ae24..d187aa6d50db 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -3466,7 +3466,8 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"),
.vendor_name = "Dell",
.product_name = "WD19 Dock",
.profile_name = "Dell-WD15-Dock",
- .ifnum = QUIRK_NO_INTERFACE
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_SETUP_FMT_AFTER_RESUME
}
},
/* MOTU Microbook II */
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 349e1e52996d..a81c2066499f 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -508,6 +508,16 @@ static int create_standard_mixer_quirk(struct snd_usb_audio *chip,
return snd_usb_create_mixer(chip, quirk->ifnum, 0);
}
+
+static int setup_fmt_after_resume_quirk(struct snd_usb_audio *chip,
+ struct usb_interface *iface,
+ struct usb_driver *driver,
+ const struct snd_usb_audio_quirk *quirk)
+{
+ chip->setup_fmt_after_resume_quirk = 1;
+ return 1; /* Continue with creating streams and mixer */
+}
+
/*
* audio-interface quirks
*
@@ -546,6 +556,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip,
[QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk,
[QUIRK_AUDIO_ALIGN_TRANSFER] = create_align_transfer_quirk,
[QUIRK_AUDIO_STANDARD_MIXER] = create_standard_mixer_quirk,
+ [QUIRK_SETUP_FMT_AFTER_RESUME] = setup_fmt_after_resume_quirk,
};
if (quirk->type < QUIRK_TYPE_COUNT) {
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index ff3cbf653de8..6fe3ab582ec6 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -33,7 +33,7 @@ struct snd_usb_audio {
wait_queue_head_t shutdown_wait;
unsigned int txfr_quirk:1; /* Subframe boundaries on transfers */
unsigned int tx_length_quirk:1; /* Put length specifier in transfers */
-
+ unsigned int setup_fmt_after_resume_quirk:1; /* setup the format to interface after resume */
int num_interfaces;
int num_suspended_intf;
int sample_rate_read_error;
@@ -98,6 +98,7 @@ enum quirk_type {
QUIRK_AUDIO_EDIROL_UAXX,
QUIRK_AUDIO_ALIGN_TRANSFER,
QUIRK_AUDIO_STANDARD_MIXER,
+ QUIRK_SETUP_FMT_AFTER_RESUME,
QUIRK_TYPE_COUNT
};
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
index 2769360f195c..03cd7c19a683 100644
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -131,8 +131,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
+ __u8 ext_dabt_pending;
/* Align it to 8 bytes */
- __u8 pad[6];
+ __u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index 67c21f9bdbad..820e5751ada7 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -164,8 +164,9 @@ struct kvm_vcpu_events {
struct {
__u8 serror_pending;
__u8 serror_has_esr;
+ __u8 ext_dabt_pending;
/* Align it to 8 bytes */
- __u8 pad[6];
+ __u8 pad[5];
__u64 serror_esr;
} exception;
__u32 reserved[12];
@@ -323,6 +324,8 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+#define KVM_ARM_VCPU_PVTIME_CTRL 2
+#define KVM_ARM_VCPU_PVTIME_IPA 0
/* KVM_IRQ_LINE irq field index values */
#define KVM_ARM_IRQ_VCPU2_SHIFT 28
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index b0f72dea8b11..264e266a85bf 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -667,6 +667,8 @@ struct kvm_ppc_cpu_char {
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
+#define KVM_DEV_XICS_GRP_CTRL 2
+#define KVM_DEV_XICS_NR_SERVERS 1
/* Layout of 64-bit source attribute values */
#define KVM_XICS_DESTINATION_SHIFT 0
@@ -683,6 +685,7 @@ struct kvm_ppc_cpu_char {
#define KVM_DEV_XIVE_GRP_CTRL 1
#define KVM_DEV_XIVE_RESET 1
#define KVM_DEV_XIVE_EQ_SYNC 2
+#define KVM_DEV_XIVE_NR_SERVERS 3
#define KVM_DEV_XIVE_GRP_SOURCE 2 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_SOURCE_CONFIG 3 /* 64-bit source identifier */
#define KVM_DEV_XIVE_GRP_EQ_CONFIG 4 /* 64-bit EQ identifier */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 0652d3eed9bd..e9b62498fe75 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -292,6 +292,7 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
@@ -399,5 +400,7 @@
#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 20ce682a2540..084e98da04a7 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -93,6 +93,18 @@
* Microarchitectural Data
* Sampling (MDS) vulnerabilities.
*/
+#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /*
+ * The processor is not susceptible to a
+ * machine check error due to modifying the
+ * code page size along with either the
+ * physical address or cache type
+ * without TLB invalidation.
+ */
+#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */
+#define ARCH_CAP_TAA_NO BIT(8) /*
+ * Not susceptible to
+ * TSX Async Abort (TAA) vulnerabilities.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
@@ -103,6 +115,10 @@
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+#define MSR_IA32_TSX_CTRL 0x00000122
+#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */
+#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */
+
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
#define MSR_IA32_SYSENTER_EIP 0x00000176
@@ -393,6 +409,8 @@
#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD_PPIN_CTL 0xc00102f0
+#define MSR_AMD_PPIN 0xc00102f1
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 92748660ba51..df767afc690f 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -28,8 +28,8 @@
* Output:
* rax original destination
*/
-ENTRY(__memcpy)
-ENTRY(memcpy)
+SYM_FUNC_START_ALIAS(__memcpy)
+SYM_FUNC_START_LOCAL(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
@@ -41,8 +41,8 @@ ENTRY(memcpy)
movl %edx, %ecx
rep movsb
ret
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
+SYM_FUNC_END(memcpy)
+SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(__memcpy)
@@ -50,14 +50,14 @@ EXPORT_SYMBOL(__memcpy)
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
-ENTRY(memcpy_erms)
+SYM_FUNC_START(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
-ENDPROC(memcpy_erms)
+SYM_FUNC_END(memcpy_erms)
-ENTRY(memcpy_orig)
+SYM_FUNC_START(memcpy_orig)
movq %rdi, %rax
cmpq $0x20, %rdx
@@ -182,7 +182,7 @@ ENTRY(memcpy_orig)
.Lend:
retq
-ENDPROC(memcpy_orig)
+SYM_FUNC_END(memcpy_orig)
#ifndef CONFIG_UML
@@ -193,7 +193,7 @@ MCSAFE_TEST_CTL
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(__memcpy_mcsafe)
+SYM_FUNC_START(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -260,7 +260,7 @@ ENTRY(__memcpy_mcsafe)
xorl %eax, %eax
.L_done:
ret
-ENDPROC(__memcpy_mcsafe)
+SYM_FUNC_END(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
index f8f3dc0a6690..fd5d25a474b7 100644
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -18,8 +18,8 @@
*
* rax original destination
*/
-ENTRY(memset)
-ENTRY(__memset)
+SYM_FUNC_START_ALIAS(memset)
+SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
@@ -42,8 +42,8 @@ ENTRY(__memset)
rep stosb
movq %r9,%rax
ret
-ENDPROC(memset)
-ENDPROC(__memset)
+SYM_FUNC_END(__memset)
+SYM_FUNC_END_ALIAS(memset)
/*
* ISO C memset - set a memory block to a byte value. This function uses
@@ -56,16 +56,16 @@ ENDPROC(__memset)
*
* rax original destination
*/
-ENTRY(memset_erms)
+SYM_FUNC_START(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
-ENDPROC(memset_erms)
+SYM_FUNC_END(memset_erms)
-ENTRY(memset_orig)
+SYM_FUNC_START(memset_orig)
movq %rdi,%r10
/* expand byte value */
@@ -136,4 +136,4 @@ ENTRY(memset_orig)
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
-ENDPROC(memset_orig)
+SYM_FUNC_END(memset_orig)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
new file mode 100644
index 000000000000..86a87da97d0b
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -0,0 +1,305 @@
+================
+bpftool-gen
+================
+-------------------------------------------------------------------------------
+tool for BPF code-generation
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **gen** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+
+ *COMMAND* := { **skeleton | **help** }
+
+GEN COMMANDS
+=============
+
+| **bpftool** **gen skeleton** *FILE*
+| **bpftool** **gen help**
+
+DESCRIPTION
+===========
+ **bpftool gen skeleton** *FILE*
+ Generate BPF skeleton C header file for a given *FILE*.
+
+ BPF skeleton is an alternative interface to existing libbpf
+ APIs for working with BPF objects. Skeleton code is intended
+ to significantly shorten and simplify code to load and work
+ with BPF programs from userspace side. Generated code is
+ tailored to specific input BPF object *FILE*, reflecting its
+ structure by listing out available maps, program, variables,
+ etc. Skeleton eliminates the need to lookup mentioned
+ components by name. Instead, if skeleton instantiation
+ succeeds, they are populated in skeleton structure as valid
+ libbpf types (e.g., struct bpf_map pointer) and can be
+ passed to existing generic libbpf APIs.
+
+ In addition to simple and reliable access to maps and
+ programs, skeleton provides a storage for BPF links (struct
+ bpf_link) for each BPF program within BPF object. When
+ requested, supported BPF programs will be automatically
+ attached and resulting BPF links stored for further use by
+ user in pre-allocated fields in skeleton struct. For BPF
+ programs that can't be automatically attached by libbpf,
+ user can attach them manually, but store resulting BPF link
+ in per-program link field. All such set up links will be
+ automatically destroyed on BPF skeleton destruction. This
+ eliminates the need for users to manage links manually and
+ rely on libbpf support to detach programs and free up
+ resources.
+
+ Another facility provided by BPF skeleton is an interface to
+ global variables of all supported kinds: mutable, read-only,
+ as well as extern ones. This interface allows to pre-setup
+ initial values of variables before BPF object is loaded and
+ verified by kernel. For non-read-only variables, the same
+ interface can be used to fetch values of global variables on
+ userspace side, even if they are modified by BPF code.
+
+ During skeleton generation, contents of source BPF object
+ *FILE* is embedded within generated code and is thus not
+ necessary to keep around. This ensures skeleton and BPF
+ object file are matching 1-to-1 and always stay in sync.
+ Generated code is dual-licensed under LGPL-2.1 and
+ BSD-2-Clause licenses.
+
+ It is a design goal and guarantee that skeleton interfaces
+ are interoperable with generic libbpf APIs. User should
+ always be able to use skeleton API to create and load BPF
+ object, and later use libbpf APIs to keep working with
+ specific maps, programs, etc.
+
+ As part of skeleton, few custom functions are generated.
+ Each of them is prefixed with object name, derived from
+ object file name. I.e., if BPF object file name is
+ **example.o**, BPF object name will be **example**. The
+ following custom functions are provided in such case:
+
+ - **example__open** and **example__open_opts**.
+ These functions are used to instantiate skeleton. It
+ corresponds to libbpf's **bpf_object__open()** API.
+ **_opts** variants accepts extra **bpf_object_open_opts**
+ options.
+
+ - **example__load**.
+ This function creates maps, loads and verifies BPF
+ programs, initializes global data maps. It corresponds to
+ libppf's **bpf_object__load** API.
+
+ - **example__open_and_load** combines **example__open** and
+ **example__load** invocations in one commonly used
+ operation.
+
+ - **example__attach** and **example__detach**
+ This pair of functions allow to attach and detach,
+ correspondingly, already loaded BPF object. Only BPF
+ programs of types supported by libbpf for auto-attachment
+ will be auto-attached and their corresponding BPF links
+ instantiated. For other BPF programs, user can manually
+ create a BPF link and assign it to corresponding fields in
+ skeleton struct. **example__detach** will detach both
+ links created automatically, as well as those populated by
+ user manually.
+
+ - **example__destroy**
+ Detach and unload BPF programs, free up all the resources
+ used by skeleton and BPF object.
+
+ If BPF object has global variables, corresponding structs
+ with memory layout corresponding to global data data section
+ layout will be created. Currently supported ones are: *.data*,
+ *.bss*, *.rodata*, and *.kconfig* structs/data sections.
+ These data sections/structs can be used to set up initial
+ values of variables, if set before **example__load**.
+ Afterwards, if target kernel supports memory-mapped BPF
+ arrays, same structs can be used to fetch and update
+ (non-read-only) data from userspace, with same simplicity
+ as for BPF side.
+
+ **bpftool gen help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -V, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON,
+ this option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+ -d, --debug
+ Print all logs available from libbpf, including debug-level
+ information.
+
+EXAMPLES
+========
+**$ cat example.c**
+::
+
+ #include <stdbool.h>
+ #include <linux/ptrace.h>
+ #include <linux/bpf.h>
+ #include "bpf_helpers.h"
+
+ const volatile int param1 = 42;
+ bool global_flag = true;
+ struct { int x; } data = {};
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 128);
+ __type(key, int);
+ __type(value, long);
+ } my_map SEC(".maps");
+
+ SEC("raw_tp/sys_enter")
+ int handle_sys_enter(struct pt_regs *ctx)
+ {
+ static long my_static_var;
+ if (global_flag)
+ my_static_var++;
+ else
+ data.x += param1;
+ return 0;
+ }
+
+ SEC("raw_tp/sys_exit")
+ int handle_sys_exit(struct pt_regs *ctx)
+ {
+ int zero = 0;
+ bpf_map_lookup_elem(&my_map, &zero);
+ return 0;
+ }
+
+This is example BPF application with two BPF programs and a mix of BPF maps
+and global variables.
+
+**$ bpftool gen skeleton example.o**
+::
+
+ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+ /* THIS FILE IS AUTOGENERATED! */
+ #ifndef __EXAMPLE_SKEL_H__
+ #define __EXAMPLE_SKEL_H__
+
+ #include <stdlib.h>
+ #include <libbpf.h>
+
+ struct example {
+ struct bpf_object_skeleton *skeleton;
+ struct bpf_object *obj;
+ struct {
+ struct bpf_map *rodata;
+ struct bpf_map *data;
+ struct bpf_map *bss;
+ struct bpf_map *my_map;
+ } maps;
+ struct {
+ struct bpf_program *handle_sys_enter;
+ struct bpf_program *handle_sys_exit;
+ } progs;
+ struct {
+ struct bpf_link *handle_sys_enter;
+ struct bpf_link *handle_sys_exit;
+ } links;
+ struct example__bss {
+ struct {
+ int x;
+ } data;
+ } *bss;
+ struct example__data {
+ _Bool global_flag;
+ long int handle_sys_enter_my_static_var;
+ } *data;
+ struct example__rodata {
+ int param1;
+ } *rodata;
+ };
+
+ static void example__destroy(struct example *obj);
+ static inline struct example *example__open_opts(
+ const struct bpf_object_open_opts *opts);
+ static inline struct example *example__open();
+ static inline int example__load(struct example *obj);
+ static inline struct example *example__open_and_load();
+ static inline int example__attach(struct example *obj);
+ static inline void example__detach(struct example *obj);
+
+ #endif /* __EXAMPLE_SKEL_H__ */
+
+**$ cat example_user.c**
+::
+
+ #include "example.skel.h"
+
+ int main()
+ {
+ struct example *skel;
+ int err = 0;
+
+ skel = example__open();
+ if (!skel)
+ goto cleanup;
+
+ skel->rodata->param1 = 128;
+
+ err = example__load(skel);
+ if (err)
+ goto cleanup;
+
+ err = example__attach(skel);
+ if (err)
+ goto cleanup;
+
+ /* all libbpf APIs are usable */
+ printf("my_map name: %s\n", bpf_map__name(skel->maps.my_map));
+ printf("sys_enter prog FD: %d\n",
+ bpf_program__fd(skel->progs.handle_sys_enter));
+
+ /* detach and re-attach sys_exit program */
+ bpf_link__destroy(skel->links.handle_sys_exit);
+ skel->links.handle_sys_exit =
+ bpf_program__attach(skel->progs.handle_sys_exit);
+
+ printf("my_static_var: %ld\n",
+ skel->bss->handle_sys_enter_my_static_var);
+
+ cleanup:
+ example__destroy(skel);
+ return err;
+ }
+
+**# ./example_user**
+::
+
+ my_map name: my_map
+ sys_enter prog FD: 8
+ my_static_var: 7
+
+This is a stripped-out version of skeleton generated for above example code.
+
+SEE ALSO
+========
+ **bpf**\ (2),
+ **bpf-helpers**\ (7),
+ **bpftool**\ (8),
+ **bpftool-map**\ (8),
+ **bpftool-prog**\ (8),
+ **bpftool-cgroup**\ (8),
+ **bpftool-feature**\ (8),
+ **bpftool-net**\ (8),
+ **bpftool-perf**\ (8),
+ **bpftool-btf**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 1c0f7146aab0..cdeae8ae90ba 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -39,9 +39,9 @@ MAP COMMANDS
| **bpftool** **map freeze** *MAP*
| **bpftool** **map help**
|
-| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* | **name** *MAP_NAME* }
| *DATA* := { [**hex**] *BYTES* }
-| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* }
| *VALUE* := { *DATA* | *MAP* | *PROG* }
| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
| *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash**
@@ -55,8 +55,9 @@ DESCRIPTION
===========
**bpftool map { show | list }** [*MAP*]
Show information about loaded maps. If *MAP* is specified
- show information only about given map, otherwise list all
- maps currently loaded on the system.
+ show information only about given maps, otherwise list all
+ maps currently loaded on the system. In case of **name**,
+ *MAP* may match several maps which will all be shown.
Output will start with map ID followed by map type and
zero or more named attributes (depending on kernel version).
@@ -66,7 +67,8 @@ DESCRIPTION
as *FILE*.
**bpftool map dump** *MAP*
- Dump all entries in a given *MAP*.
+ Dump all entries in a given *MAP*. In case of **name**,
+ *MAP* may match several maps which will all be dumped.
**bpftool map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*]
Update map entry for a given *KEY*.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 7a374b3c851d..64ddf8a4c518 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -33,7 +33,7 @@ PROG COMMANDS
| **bpftool** **prog help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
-| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* }
| *TYPE* := {
| **socket** | **kprobe** | **kretprobe** | **classifier** | **action** |
| **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** |
@@ -53,8 +53,10 @@ DESCRIPTION
===========
**bpftool prog { show | list }** [*PROG*]
Show information about loaded programs. If *PROG* is
- specified show information only about given program, otherwise
- list all programs currently loaded on the system.
+ specified show information only about given programs,
+ otherwise list all programs currently loaded on the system.
+ In case of **tag** or **name**, *PROG* may match several
+ programs which will all be shown.
Output will start with program ID followed by program type and
zero or more named attributes (depending on kernel version).
@@ -68,11 +70,15 @@ DESCRIPTION
performed via the **kernel.bpf_stats_enabled** sysctl knob.
**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
- Dump eBPF instructions of the program from the kernel. By
+ Dump eBPF instructions of the programs from the kernel. By
default, eBPF will be disassembled and printed to standard
output in human-readable format. In this case, **opcodes**
controls if raw opcodes should be printed as well.
+ In case of **tag** or **name**, *PROG* may match several
+ programs which will all be dumped. However, if **file** or
+ **visual** is specified, *PROG* must match a single program.
+
If **file** is specified, the binary image will instead be
written to *FILE*.
@@ -80,15 +86,17 @@ DESCRIPTION
built instead, and eBPF instructions will be presented with
CFG in DOT format, on standard output.
- If the prog has line_info available, the source line will
+ If the programs have line_info available, the source line will
be displayed by default. If **linum** is specified,
the filename, line number and line column will also be
displayed on top of the source line.
**bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }]
Dump jited image (host machine code) of the program.
+
If *FILE* is specified image will be written to a file,
otherwise it will be disassembled and printed to stdout.
+ *PROG* must match a single program when **file** is specified.
**opcodes** controls if raw opcodes will be printed.
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 6a9c52ef84a9..34239fda69ed 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -81,4 +81,5 @@ SEE ALSO
**bpftool-feature**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
+ **bpftool-btf**\ (8),
+ **bpftool-gen**\ (8),
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 70493a6da206..754d8395e451 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -59,6 +59,21 @@ _bpftool_get_map_ids_for_type()
command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
}
+_bpftool_get_map_names()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
+ command sed -n 's/.*"name": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
+# Takes map type and adds matching map names to the list of suggestions.
+_bpftool_get_map_names_for_type()
+{
+ local type="$1"
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
+ command grep -C2 "$type" | \
+ command sed -n 's/.*"name": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
_bpftool_get_prog_ids()
{
COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
@@ -71,6 +86,12 @@ _bpftool_get_prog_tags()
command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
}
+_bpftool_get_prog_names()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
+ command sed -n 's/.*"name": "\(.*\)",$/\1/p' )" -- "$cur" ) )
+}
+
_bpftool_get_btf_ids()
{
COMPREPLY+=( $( compgen -W "$( bpftool -jp btf 2>&1 | \
@@ -180,6 +201,52 @@ _bpftool_map_update_get_id()
esac
}
+_bpftool_map_update_get_name()
+{
+ local command="$1"
+
+ # Is it the map to update, or a map to insert into the map to update?
+ # Search for "value" keyword.
+ local idx value
+ for (( idx=7; idx < ${#words[@]}-1; idx++ )); do
+ if [[ ${words[idx]} == "value" ]]; then
+ value=1
+ break
+ fi
+ done
+ if [[ $value -eq 0 ]]; then
+ case "$command" in
+ push)
+ _bpftool_get_map_names_for_type stack
+ ;;
+ enqueue)
+ _bpftool_get_map_names_for_type queue
+ ;;
+ *)
+ _bpftool_get_map_names
+ ;;
+ esac
+ return 0
+ fi
+
+ # Name to complete is for a value. It can be either prog name or map name. This
+ # depends on the type of the map to update.
+ local type=$(_bpftool_map_guess_map_type)
+ case $type in
+ array_of_maps|hash_of_maps)
+ _bpftool_get_map_names
+ return 0
+ ;;
+ prog_array)
+ _bpftool_get_prog_names
+ return 0
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+}
+
_bpftool()
{
local cur prev words objword
@@ -251,7 +318,8 @@ _bpftool()
# Completion depends on object and command in use
case $object in
prog)
- # Complete id, only for subcommands that use prog (but no map) ids
+ # Complete id and name, only for subcommands that use prog (but no
+ # map) ids/names.
case $command in
show|list|dump|pin)
case $prev in
@@ -259,12 +327,16 @@ _bpftool()
_bpftool_get_prog_ids
return 0
;;
+ name)
+ _bpftool_get_prog_names
+ return 0
+ ;;
esac
;;
esac
- local PROG_TYPE='id pinned tag'
- local MAP_TYPE='id pinned'
+ local PROG_TYPE='id pinned tag name'
+ local MAP_TYPE='id pinned name'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0
@@ -315,6 +387,9 @@ _bpftool()
id)
_bpftool_get_prog_ids
;;
+ name)
+ _bpftool_get_map_names
+ ;;
pinned)
_filedir
;;
@@ -335,6 +410,9 @@ _bpftool()
id)
_bpftool_get_map_ids
;;
+ name)
+ _bpftool_get_map_names
+ ;;
pinned)
_filedir
;;
@@ -399,6 +477,10 @@ _bpftool()
_bpftool_get_map_ids
return 0
;;
+ name)
+ _bpftool_get_map_names
+ return 0
+ ;;
pinned|pinmaps)
_filedir
return 0
@@ -447,7 +529,7 @@ _bpftool()
esac
;;
map)
- local MAP_TYPE='id pinned'
+ local MAP_TYPE='id pinned name'
case $command in
show|list|dump|peek|pop|dequeue|freeze)
case $prev in
@@ -473,6 +555,24 @@ _bpftool()
esac
return 0
;;
+ name)
+ case "$command" in
+ peek)
+ _bpftool_get_map_names_for_type stack
+ _bpftool_get_map_names_for_type queue
+ ;;
+ pop)
+ _bpftool_get_map_names_for_type stack
+ ;;
+ dequeue)
+ _bpftool_get_map_names_for_type queue
+ ;;
+ *)
+ _bpftool_get_map_names
+ ;;
+ esac
+ return 0
+ ;;
*)
return 0
;;
@@ -520,6 +620,10 @@ _bpftool()
_bpftool_get_map_ids
return 0
;;
+ name)
+ _bpftool_get_map_names
+ return 0
+ ;;
key)
COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
;;
@@ -545,6 +649,10 @@ _bpftool()
_bpftool_map_update_get_id $command
return 0
;;
+ name)
+ _bpftool_map_update_get_name $command
+ return 0
+ ;;
key)
COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
;;
@@ -553,13 +661,13 @@ _bpftool()
# map, depending on the type of the map to update.
case "$(_bpftool_map_guess_map_type)" in
array_of_maps|hash_of_maps)
- local MAP_TYPE='id pinned'
+ local MAP_TYPE='id pinned name'
COMPREPLY+=( $( compgen -W "$MAP_TYPE" \
-- "$cur" ) )
return 0
;;
prog_array)
- local PROG_TYPE='id pinned tag'
+ local PROG_TYPE='id pinned tag name'
COMPREPLY+=( $( compgen -W "$PROG_TYPE" \
-- "$cur" ) )
return 0
@@ -621,6 +729,10 @@ _bpftool()
_bpftool_get_map_ids_for_type perf_event_array
return 0
;;
+ name)
+ _bpftool_get_map_names_for_type perf_event_array
+ return 0
+ ;;
cpu)
return 0
;;
@@ -644,8 +756,8 @@ _bpftool()
esac
;;
btf)
- local PROG_TYPE='id pinned tag'
- local MAP_TYPE='id pinned'
+ local PROG_TYPE='id pinned tag name'
+ local MAP_TYPE='id pinned name'
case $command in
dump)
case $prev in
@@ -676,6 +788,17 @@ _bpftool()
esac
return 0
;;
+ name)
+ case $pprev in
+ prog)
+ _bpftool_get_prog_names
+ ;;
+ map)
+ _bpftool_get_map_names
+ ;;
+ esac
+ return 0
+ ;;
format)
COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) )
;;
@@ -716,6 +839,17 @@ _bpftool()
;;
esac
;;
+ gen)
+ case $command in
+ skeleton)
+ _filedir
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
+ ;;
+ esac
+ ;;
cgroup)
case $command in
show|list|tree)
@@ -735,7 +869,7 @@ _bpftool()
connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \
getsockopt setsockopt'
local ATTACH_FLAGS='multi override'
- local PROG_TYPE='id pinned tag'
+ local PROG_TYPE='id pinned tag name'
case $prev in
$command)
_filedir
@@ -760,7 +894,7 @@ _bpftool()
elif [[ "$command" == "attach" ]]; then
# We have an attach type on the command line,
# but it is not the previous word, or
- # "id|pinned|tag" (we already checked for
+ # "id|pinned|tag|name" (we already checked for
# that). This should only leave the case when
# we need attach flags for "attach" commamnd.
_bpftool_one_of_list "$ATTACH_FLAGS"
@@ -786,7 +920,7 @@ _bpftool()
esac
;;
net)
- local PROG_TYPE='id pinned tag'
+ local PROG_TYPE='id pinned tag name'
local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload'
case $command in
show|list)
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 1ef45e55039e..2f017caa678d 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -117,6 +117,25 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
return prog_cnt;
}
+static int cgroup_has_attached_progs(int cgroup_fd)
+{
+ enum bpf_attach_type type;
+ bool no_prog = true;
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ int count = count_attached_bpf_progs(cgroup_fd, type);
+
+ if (count < 0 && errno != EINVAL)
+ return -1;
+
+ if (count > 0) {
+ no_prog = false;
+ break;
+ }
+ }
+
+ return no_prog ? 0 : 1;
+}
static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
int level)
{
@@ -161,6 +180,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
static int do_show(int argc, char **argv)
{
enum bpf_attach_type type;
+ int has_attached_progs;
const char *path;
int cgroup_fd;
int ret = -1;
@@ -192,6 +212,16 @@ static int do_show(int argc, char **argv)
goto exit;
}
+ has_attached_progs = cgroup_has_attached_progs(cgroup_fd);
+ if (has_attached_progs < 0) {
+ p_err("can't query bpf programs attached to %s: %s",
+ path, strerror(errno));
+ goto exit_cgroup;
+ } else if (!has_attached_progs) {
+ ret = 0;
+ goto exit_cgroup;
+ }
+
if (json_output)
jsonw_start_array(json_wtr);
else
@@ -212,6 +242,7 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+exit_cgroup:
close(cgroup_fd);
exit:
return ret;
@@ -228,7 +259,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
int typeflag, struct FTW *ftw)
{
enum bpf_attach_type type;
- bool skip = true;
+ int has_attached_progs;
int cgroup_fd;
if (typeflag != FTW_D)
@@ -240,22 +271,13 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
return SHOW_TREE_FN_ERR;
}
- for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
- int count = count_attached_bpf_progs(cgroup_fd, type);
-
- if (count < 0 && errno != EINVAL) {
- p_err("can't query bpf programs attached to %s: %s",
- fpath, strerror(errno));
- close(cgroup_fd);
- return SHOW_TREE_FN_ERR;
- }
- if (count > 0) {
- skip = false;
- break;
- }
- }
-
- if (skip) {
+ has_attached_progs = cgroup_has_attached_progs(cgroup_fd);
+ if (has_attached_progs < 0) {
+ p_err("can't query bpf programs attached to %s: %s",
+ fpath, strerror(errno));
+ close(cgroup_fd);
+ return SHOW_TREE_FN_ERR;
+ } else if (!has_attached_progs) {
close(cgroup_fd);
return 0;
}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
new file mode 100644
index 000000000000..7ce09a9a6999
--- /dev/null
+++ b/tools/bpf/bpftool/gen.c
@@ -0,0 +1,609 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Facebook */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/err.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf.h>
+#include <libbpf.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "btf.h"
+#include "libbpf_internal.h"
+#include "json_writer.h"
+#include "main.h"
+
+
+#define MAX_OBJ_NAME_LEN 64
+
+static void sanitize_identifier(char *name)
+{
+ int i;
+
+ for (i = 0; name[i]; i++)
+ if (!isalnum(name[i]) && name[i] != '_')
+ name[i] = '_';
+}
+
+static bool str_has_suffix(const char *str, const char *suffix)
+{
+ size_t i, n1 = strlen(str), n2 = strlen(suffix);
+
+ if (n1 < n2)
+ return false;
+
+ for (i = 0; i < n2; i++) {
+ if (str[n1 - i - 1] != suffix[n2 - i - 1])
+ return false;
+ }
+
+ return true;
+}
+
+static void get_obj_name(char *name, const char *file)
+{
+ /* Using basename() GNU version which doesn't modify arg. */
+ strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1);
+ name[MAX_OBJ_NAME_LEN - 1] = '\0';
+ if (str_has_suffix(name, ".o"))
+ name[strlen(name) - 2] = '\0';
+ sanitize_identifier(name);
+}
+
+static void get_header_guard(char *guard, const char *obj_name)
+{
+ int i;
+
+ sprintf(guard, "__%s_SKEL_H__", obj_name);
+ for (i = 0; guard[i]; i++)
+ guard[i] = toupper(guard[i]);
+}
+
+static const char *get_map_ident(const struct bpf_map *map)
+{
+ const char *name = bpf_map__name(map);
+
+ if (!bpf_map__is_internal(map))
+ return name;
+
+ if (str_has_suffix(name, ".data"))
+ return "data";
+ else if (str_has_suffix(name, ".rodata"))
+ return "rodata";
+ else if (str_has_suffix(name, ".bss"))
+ return "bss";
+ else if (str_has_suffix(name, ".kconfig"))
+ return "kconfig";
+ else
+ return NULL;
+}
+
+static void codegen_btf_dump_printf(void *ct, const char *fmt, va_list args)
+{
+ vprintf(fmt, args);
+}
+
+static int codegen_datasec_def(struct bpf_object *obj,
+ struct btf *btf,
+ struct btf_dump *d,
+ const struct btf_type *sec,
+ const char *obj_name)
+{
+ const char *sec_name = btf__name_by_offset(btf, sec->name_off);
+ const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec);
+ int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec);
+ const char *sec_ident;
+ char var_ident[256];
+
+ if (strcmp(sec_name, ".data") == 0)
+ sec_ident = "data";
+ else if (strcmp(sec_name, ".bss") == 0)
+ sec_ident = "bss";
+ else if (strcmp(sec_name, ".rodata") == 0)
+ sec_ident = "rodata";
+ else if (strcmp(sec_name, ".kconfig") == 0)
+ sec_ident = "kconfig";
+ else
+ return 0;
+
+ printf(" struct %s__%s {\n", obj_name, sec_ident);
+ for (i = 0; i < vlen; i++, sec_var++) {
+ const struct btf_type *var = btf__type_by_id(btf, sec_var->type);
+ const char *var_name = btf__name_by_offset(btf, var->name_off);
+ DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts,
+ .field_name = var_ident,
+ .indent_level = 2,
+ );
+ int need_off = sec_var->offset, align_off, align;
+ __u32 var_type_id = var->type;
+ const struct btf_type *t;
+
+ t = btf__type_by_id(btf, var_type_id);
+ while (btf_is_mod(t)) {
+ var_type_id = t->type;
+ t = btf__type_by_id(btf, var_type_id);
+ }
+
+ if (off > need_off) {
+ p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n",
+ sec_name, i, need_off, off);
+ return -EINVAL;
+ }
+
+ align = btf__align_of(btf, var->type);
+ if (align <= 0) {
+ p_err("Failed to determine alignment of variable '%s': %d",
+ var_name, align);
+ return -EINVAL;
+ }
+
+ align_off = (off + align - 1) / align * align;
+ if (align_off != need_off) {
+ printf("\t\tchar __pad%d[%d];\n",
+ pad_cnt, need_off - off);
+ pad_cnt++;
+ }
+
+ /* sanitize variable name, e.g., for static vars inside
+ * a function, it's name is '<function name>.<variable name>',
+ * which we'll turn into a '<function name>_<variable name>'
+ */
+ var_ident[0] = '\0';
+ strncat(var_ident, var_name, sizeof(var_ident) - 1);
+ sanitize_identifier(var_ident);
+
+ printf("\t\t");
+ err = btf_dump__emit_type_decl(d, var_type_id, &opts);
+ if (err)
+ return err;
+ printf(";\n");
+
+ off = sec_var->offset + sec_var->size;
+ }
+ printf(" } *%s;\n", sec_ident);
+ return 0;
+}
+
+static int codegen_datasecs(struct bpf_object *obj, const char *obj_name)
+{
+ struct btf *btf = bpf_object__btf(obj);
+ int n = btf__get_nr_types(btf);
+ struct btf_dump *d;
+ int i, err = 0;
+
+ d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf);
+ if (IS_ERR(d))
+ return PTR_ERR(d);
+
+ for (i = 1; i <= n; i++) {
+ const struct btf_type *t = btf__type_by_id(btf, i);
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ err = codegen_datasec_def(obj, btf, d, t, obj_name);
+ if (err)
+ goto out;
+ }
+out:
+ btf_dump__free(d);
+ return err;
+}
+
+static int codegen(const char *template, ...)
+{
+ const char *src, *end;
+ int skip_tabs = 0, n;
+ char *s, *dst;
+ va_list args;
+ char c;
+
+ n = strlen(template);
+ s = malloc(n + 1);
+ if (!s)
+ return -ENOMEM;
+ src = template;
+ dst = s;
+
+ /* find out "baseline" indentation to skip */
+ while ((c = *src++)) {
+ if (c == '\t') {
+ skip_tabs++;
+ } else if (c == '\n') {
+ break;
+ } else {
+ p_err("unrecognized character at pos %td in template '%s'",
+ src - template - 1, template);
+ return -EINVAL;
+ }
+ }
+
+ while (*src) {
+ /* skip baseline indentation tabs */
+ for (n = skip_tabs; n > 0; n--, src++) {
+ if (*src != '\t') {
+ p_err("not enough tabs at pos %td in template '%s'",
+ src - template - 1, template);
+ return -EINVAL;
+ }
+ }
+ /* trim trailing whitespace */
+ end = strchrnul(src, '\n');
+ for (n = end - src; n > 0 && isspace(src[n - 1]); n--)
+ ;
+ memcpy(dst, src, n);
+ dst += n;
+ if (*end)
+ *dst++ = '\n';
+ src = *end ? end + 1 : end;
+ }
+ *dst++ = '\0';
+
+ /* print out using adjusted template */
+ va_start(args, template);
+ n = vprintf(s, args);
+ va_end(args);
+
+ free(s);
+ return n;
+}
+
+static int do_skeleton(int argc, char **argv)
+{
+ char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
+ size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char obj_name[MAX_OBJ_NAME_LEN], *obj_data;
+ struct bpf_object *obj = NULL;
+ const char *file, *ident;
+ struct bpf_program *prog;
+ int fd, len, err = -1;
+ struct bpf_map *map;
+ struct btf *btf;
+ struct stat st;
+
+ if (!REQ_ARGS(1)) {
+ usage();
+ return -1;
+ }
+ file = GET_ARG();
+
+ if (argc) {
+ p_err("extra unknown arguments");
+ return -1;
+ }
+
+ if (stat(file, &st)) {
+ p_err("failed to stat() %s: %s", file, strerror(errno));
+ return -1;
+ }
+ file_sz = st.st_size;
+ mmap_sz = roundup(file_sz, sysconf(_SC_PAGE_SIZE));
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ p_err("failed to open() %s: %s", file, strerror(errno));
+ return -1;
+ }
+ obj_data = mmap(NULL, mmap_sz, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (obj_data == MAP_FAILED) {
+ obj_data = NULL;
+ p_err("failed to mmap() %s: %s", file, strerror(errno));
+ goto out;
+ }
+ get_obj_name(obj_name, file);
+ opts.object_name = obj_name;
+ obj = bpf_object__open_mem(obj_data, file_sz, &opts);
+ if (IS_ERR(obj)) {
+ obj = NULL;
+ p_err("failed to open BPF object file: %ld", PTR_ERR(obj));
+ goto out;
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ ident = get_map_ident(map);
+ if (!ident) {
+ p_err("ignoring unrecognized internal map '%s'...",
+ bpf_map__name(map));
+ continue;
+ }
+ map_cnt++;
+ }
+ bpf_object__for_each_program(prog, obj) {
+ prog_cnt++;
+ }
+
+ get_header_guard(header_guard, obj_name);
+ codegen("\
+ \n\
+ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\
+ \n\
+ /* THIS FILE IS AUTOGENERATED! */ \n\
+ #ifndef %2$s \n\
+ #define %2$s \n\
+ \n\
+ #include <stdlib.h> \n\
+ #include <libbpf.h> \n\
+ \n\
+ struct %1$s { \n\
+ struct bpf_object_skeleton *skeleton; \n\
+ struct bpf_object *obj; \n\
+ ",
+ obj_name, header_guard
+ );
+
+ if (map_cnt) {
+ printf("\tstruct {\n");
+ bpf_object__for_each_map(map, obj) {
+ ident = get_map_ident(map);
+ if (!ident)
+ continue;
+ printf("\t\tstruct bpf_map *%s;\n", ident);
+ }
+ printf("\t} maps;\n");
+ }
+
+ if (prog_cnt) {
+ printf("\tstruct {\n");
+ bpf_object__for_each_program(prog, obj) {
+ printf("\t\tstruct bpf_program *%s;\n",
+ bpf_program__name(prog));
+ }
+ printf("\t} progs;\n");
+ printf("\tstruct {\n");
+ bpf_object__for_each_program(prog, obj) {
+ printf("\t\tstruct bpf_link *%s;\n",
+ bpf_program__name(prog));
+ }
+ printf("\t} links;\n");
+ }
+
+ btf = bpf_object__btf(obj);
+ if (btf) {
+ err = codegen_datasecs(obj, obj_name);
+ if (err)
+ goto out;
+ }
+
+ codegen("\
+ \n\
+ }; \n\
+ \n\
+ static void \n\
+ %1$s__destroy(struct %1$s *obj) \n\
+ { \n\
+ if (!obj) \n\
+ return; \n\
+ if (obj->skeleton) \n\
+ bpf_object__destroy_skeleton(obj->skeleton);\n\
+ free(obj); \n\
+ } \n\
+ \n\
+ static inline int \n\
+ %1$s__create_skeleton(struct %1$s *obj); \n\
+ \n\
+ static inline struct %1$s * \n\
+ %1$s__open_opts(const struct bpf_object_open_opts *opts) \n\
+ { \n\
+ struct %1$s *obj; \n\
+ \n\
+ obj = (typeof(obj))calloc(1, sizeof(*obj)); \n\
+ if (!obj) \n\
+ return NULL; \n\
+ if (%1$s__create_skeleton(obj)) \n\
+ goto err; \n\
+ if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\
+ goto err; \n\
+ \n\
+ return obj; \n\
+ err: \n\
+ %1$s__destroy(obj); \n\
+ return NULL; \n\
+ } \n\
+ \n\
+ static inline struct %1$s * \n\
+ %1$s__open(void) \n\
+ { \n\
+ return %1$s__open_opts(NULL); \n\
+ } \n\
+ \n\
+ static inline int \n\
+ %1$s__load(struct %1$s *obj) \n\
+ { \n\
+ return bpf_object__load_skeleton(obj->skeleton); \n\
+ } \n\
+ \n\
+ static inline struct %1$s * \n\
+ %1$s__open_and_load(void) \n\
+ { \n\
+ struct %1$s *obj; \n\
+ \n\
+ obj = %1$s__open(); \n\
+ if (!obj) \n\
+ return NULL; \n\
+ if (%1$s__load(obj)) { \n\
+ %1$s__destroy(obj); \n\
+ return NULL; \n\
+ } \n\
+ return obj; \n\
+ } \n\
+ \n\
+ static inline int \n\
+ %1$s__attach(struct %1$s *obj) \n\
+ { \n\
+ return bpf_object__attach_skeleton(obj->skeleton); \n\
+ } \n\
+ \n\
+ static inline void \n\
+ %1$s__detach(struct %1$s *obj) \n\
+ { \n\
+ return bpf_object__detach_skeleton(obj->skeleton); \n\
+ } \n\
+ ",
+ obj_name
+ );
+
+ codegen("\
+ \n\
+ \n\
+ static inline int \n\
+ %1$s__create_skeleton(struct %1$s *obj) \n\
+ { \n\
+ struct bpf_object_skeleton *s; \n\
+ \n\
+ s = (typeof(s))calloc(1, sizeof(*s)); \n\
+ if (!s) \n\
+ return -1; \n\
+ obj->skeleton = s; \n\
+ \n\
+ s->sz = sizeof(*s); \n\
+ s->name = \"%1$s\"; \n\
+ s->obj = &obj->obj; \n\
+ ",
+ obj_name
+ );
+ if (map_cnt) {
+ codegen("\
+ \n\
+ \n\
+ /* maps */ \n\
+ s->map_cnt = %zu; \n\
+ s->map_skel_sz = sizeof(*s->maps); \n\
+ s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\
+ if (!s->maps) \n\
+ goto err; \n\
+ ",
+ map_cnt
+ );
+ i = 0;
+ bpf_object__for_each_map(map, obj) {
+ ident = get_map_ident(map);
+
+ if (!ident)
+ continue;
+
+ codegen("\
+ \n\
+ \n\
+ s->maps[%zu].name = \"%s\"; \n\
+ s->maps[%zu].map = &obj->maps.%s; \n\
+ ",
+ i, bpf_map__name(map), i, ident);
+ /* memory-mapped internal maps */
+ if (bpf_map__is_internal(map) &&
+ (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) {
+ printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n",
+ i, ident);
+ }
+ i++;
+ }
+ }
+ if (prog_cnt) {
+ codegen("\
+ \n\
+ \n\
+ /* programs */ \n\
+ s->prog_cnt = %zu; \n\
+ s->prog_skel_sz = sizeof(*s->progs); \n\
+ s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\
+ if (!s->progs) \n\
+ goto err; \n\
+ ",
+ prog_cnt
+ );
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ codegen("\
+ \n\
+ \n\
+ s->progs[%1$zu].name = \"%2$s\"; \n\
+ s->progs[%1$zu].prog = &obj->progs.%2$s;\n\
+ s->progs[%1$zu].link = &obj->links.%2$s;\n\
+ ",
+ i, bpf_program__name(prog));
+ i++;
+ }
+ }
+ codegen("\
+ \n\
+ \n\
+ s->data_sz = %d; \n\
+ s->data = (void *)\"\\ \n\
+ ",
+ file_sz);
+
+ /* embed contents of BPF object file */
+ for (i = 0, len = 0; i < file_sz; i++) {
+ int w = obj_data[i] ? 4 : 2;
+
+ len += w;
+ if (len > 78) {
+ printf("\\\n");
+ len = w;
+ }
+ if (!obj_data[i])
+ printf("\\0");
+ else
+ printf("\\x%02x", (unsigned char)obj_data[i]);
+ }
+
+ codegen("\
+ \n\
+ \"; \n\
+ \n\
+ return 0; \n\
+ err: \n\
+ bpf_object__destroy_skeleton(s); \n\
+ return -1; \n\
+ } \n\
+ \n\
+ #endif /* %s */ \n\
+ ",
+ header_guard);
+ err = 0;
+out:
+ bpf_object__close(obj);
+ if (obj_data)
+ munmap(obj_data, mmap_sz);
+ close(fd);
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %1$s gen skeleton FILE\n"
+ " %1$s gen help\n"
+ "\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "skeleton", do_skeleton },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_gen(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 4764581ff9ea..1fe91c558508 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -58,7 +58,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n"
+ " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -227,6 +227,7 @@ static const struct cmd cmds[] = {
{ "net", do_net },
{ "feature", do_feature },
{ "btf", do_btf },
+ { "gen", do_gen },
{ "version", do_version },
{ 0 }
};
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 2899095f8254..4e75b58d3989 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -42,12 +42,12 @@
#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
#define HELP_SPEC_PROGRAM \
- "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
+ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }"
#define HELP_SPEC_OPTIONS \
"OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \
"\t {-m|--mapcompat} | {-n|--nomount} }"
#define HELP_SPEC_MAP \
- "MAP := { id MAP_ID | pinned FILE }"
+ "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_UNSPEC] = "unspec",
@@ -155,6 +155,7 @@ int do_net(int argc, char **arg);
int do_tracelog(int argc, char **arg);
int do_feature(int argc, char **argv);
int do_btf(int argc, char **argv);
+int do_gen(int argc, char **argv);
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index de61d73b9030..c01f76fa6876 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -91,10 +91,66 @@ static void *alloc_value(struct bpf_map_info *info)
return malloc(info->value_size);
}
-int map_parse_fd(int *argc, char ***argv)
+static int map_fd_by_name(char *name, int **fds)
{
- int fd;
+ unsigned int id = 0;
+ int fd, nb_fds = 0;
+ void *tmp;
+ int err;
+
+ while (true) {
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno != ENOENT) {
+ p_err("%s", strerror(errno));
+ goto err_close_fds;
+ }
+ return nb_fds;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't get map by id (%u): %s",
+ id, strerror(errno));
+ goto err_close_fds;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get map info (%u): %s",
+ id, strerror(errno));
+ goto err_close_fd;
+ }
+
+ if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
+ close(fd);
+ continue;
+ }
+
+ if (nb_fds > 0) {
+ tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+ if (!tmp) {
+ p_err("failed to realloc");
+ goto err_close_fd;
+ }
+ *fds = tmp;
+ }
+ (*fds)[nb_fds++] = fd;
+ }
+
+err_close_fd:
+ close(fd);
+err_close_fds:
+ while (--nb_fds >= 0)
+ close((*fds)[nb_fds]);
+ return -1;
+}
+static int map_parse_fds(int *argc, char ***argv, int **fds)
+{
if (is_prefix(**argv, "id")) {
unsigned int id;
char *endptr;
@@ -108,10 +164,25 @@ int map_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
- fd = bpf_map_get_fd_by_id(id);
- if (fd < 0)
+ (*fds)[0] = bpf_map_get_fd_by_id(id);
+ if ((*fds)[0] < 0) {
p_err("get map by id (%u): %s", id, strerror(errno));
- return fd;
+ return -1;
+ }
+ return 1;
+ } else if (is_prefix(**argv, "name")) {
+ char *name;
+
+ NEXT_ARGP();
+
+ name = **argv;
+ if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ p_err("can't parse name");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return map_fd_by_name(name, fds);
} else if (is_prefix(**argv, "pinned")) {
char *path;
@@ -120,13 +191,43 @@ int map_parse_fd(int *argc, char ***argv)
path = **argv;
NEXT_ARGP();
- return open_obj_pinned_any(path, BPF_OBJ_MAP);
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
+ if ((*fds)[0] < 0)
+ return -1;
+ return 1;
}
- p_err("expected 'id' or 'pinned', got: '%s'?", **argv);
+ p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
return -1;
}
+int map_parse_fd(int *argc, char ***argv)
+{
+ int *fds = NULL;
+ int nb_fds, fd;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = map_parse_fds(argc, argv, &fds);
+ if (nb_fds != 1) {
+ if (nb_fds > 1) {
+ p_err("several maps match this handle");
+ while (nb_fds--)
+ close(fds[nb_fds]);
+ }
+ fd = -1;
+ goto exit_free;
+ }
+
+ fd = fds[0];
+exit_free:
+ free(fds);
+ return fd;
+}
+
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
{
int err;
@@ -479,6 +580,21 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
return -1;
}
+static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr)
+{
+ jsonw_uint_field(wtr, "id", info->id);
+ if (info->type < ARRAY_SIZE(map_type_name))
+ jsonw_string_field(wtr, "type", map_type_name[info->type]);
+ else
+ jsonw_uint_field(wtr, "type", info->type);
+
+ if (*info->name)
+ jsonw_string_field(wtr, "name", info->name);
+
+ jsonw_name(wtr, "flags");
+ jsonw_printf(wtr, "%d", info->map_flags);
+}
+
static int show_map_close_json(int fd, struct bpf_map_info *info)
{
char *memlock, *frozen_str;
@@ -489,18 +605,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
jsonw_start_object(json_wtr);
- jsonw_uint_field(json_wtr, "id", info->id);
- if (info->type < ARRAY_SIZE(map_type_name))
- jsonw_string_field(json_wtr, "type",
- map_type_name[info->type]);
- else
- jsonw_uint_field(json_wtr, "type", info->type);
-
- if (*info->name)
- jsonw_string_field(json_wtr, "name", info->name);
-
- jsonw_name(json_wtr, "flags");
- jsonw_printf(json_wtr, "%d", info->map_flags);
+ show_map_header_json(info, json_wtr);
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
@@ -561,14 +666,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
return 0;
}
-static int show_map_close_plain(int fd, struct bpf_map_info *info)
+static void show_map_header_plain(struct bpf_map_info *info)
{
- char *memlock, *frozen_str;
- int frozen = 0;
-
- memlock = get_fdinfo(fd, "memlock");
- frozen_str = get_fdinfo(fd, "frozen");
-
printf("%u: ", info->id);
if (info->type < ARRAY_SIZE(map_type_name))
printf("%s ", map_type_name[info->type]);
@@ -581,6 +680,17 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
printf("flags 0x%x", info->map_flags);
print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
printf("\n");
+}
+
+static int show_map_close_plain(int fd, struct bpf_map_info *info)
+{
+ char *memlock, *frozen_str;
+ int frozen = 0;
+
+ memlock = get_fdinfo(fd, "memlock");
+ frozen_str = get_fdinfo(fd, "frozen");
+
+ show_map_header_plain(info);
printf("\tkey %uB value %uB max_entries %u",
info->key_size, info->value_size, info->max_entries);
@@ -642,6 +752,50 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
return 0;
}
+static int do_show_subset(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int *fds = NULL;
+ int nb_fds, i;
+ int err = -1;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = map_parse_fds(&argc, &argv, &fds);
+ if (nb_fds < 1)
+ goto exit_free;
+
+ if (json_output && nb_fds > 1)
+ jsonw_start_array(json_wtr); /* root array */
+ for (i = 0; i < nb_fds; i++) {
+ err = bpf_obj_get_info_by_fd(fds[i], &info, &len);
+ if (err) {
+ p_err("can't get map info: %s",
+ strerror(errno));
+ for (; i < nb_fds; i++)
+ close(fds[i]);
+ break;
+ }
+
+ if (json_output)
+ show_map_close_json(fds[i], &info);
+ else
+ show_map_close_plain(fds[i], &info);
+
+ close(fds[i]);
+ }
+ if (json_output && nb_fds > 1)
+ jsonw_end_array(json_wtr); /* root array */
+
+exit_free:
+ free(fds);
+ return err;
+}
+
static int do_show(int argc, char **argv)
{
struct bpf_map_info info = {};
@@ -653,16 +807,8 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&map_table, BPF_OBJ_MAP);
- if (argc == 2) {
- fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
- if (fd < 0)
- return -1;
-
- if (json_output)
- return show_map_close_json(fd, &info);
- else
- return show_map_close_plain(fd, &info);
- }
+ if (argc == 2)
+ return do_show_subset(argc, argv);
if (argc)
return BAD_ARG();
@@ -765,26 +911,49 @@ static int dump_map_elem(int fd, void *key, void *value,
return 0;
}
-static int do_dump(int argc, char **argv)
+static int maps_have_btf(int *fds, int nb_fds)
{
struct bpf_map_info info = {};
- void *key, *value, *prev_key;
- unsigned int num_elems = 0;
__u32 len = sizeof(info);
- json_writer_t *btf_wtr;
struct btf *btf = NULL;
- int err;
- int fd;
+ int err, i;
- if (argc != 2)
- usage();
+ for (i = 0; i < nb_fds; i++) {
+ err = bpf_obj_get_info_by_fd(fds[i], &info, &len);
+ if (err) {
+ p_err("can't get map info: %s", strerror(errno));
+ goto err_close;
+ }
- fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
- if (fd < 0)
- return -1;
+ err = btf__get_from_id(info.btf_id, &btf);
+ if (err) {
+ p_err("failed to get btf");
+ goto err_close;
+ }
- key = malloc(info.key_size);
- value = alloc_value(&info);
+ if (!btf)
+ return 0;
+ }
+
+ return 1;
+
+err_close:
+ for (; i < nb_fds; i++)
+ close(fds[i]);
+ return -1;
+}
+
+static int
+map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr,
+ bool enable_btf, bool show_header)
+{
+ void *key, *value, *prev_key;
+ unsigned int num_elems = 0;
+ struct btf *btf = NULL;
+ int err;
+
+ key = malloc(info->key_size);
+ value = alloc_value(info);
if (!key || !value) {
p_err("mem alloc failed");
err = -1;
@@ -793,30 +962,32 @@ static int do_dump(int argc, char **argv)
prev_key = NULL;
- err = btf__get_from_id(info.btf_id, &btf);
- if (err) {
- p_err("failed to get btf");
- goto exit_free;
+ if (enable_btf) {
+ err = btf__get_from_id(info->btf_id, &btf);
+ if (err || !btf) {
+ /* enable_btf is true only if we've already checked
+ * that all maps have BTF information.
+ */
+ p_err("failed to get btf");
+ goto exit_free;
+ }
}
- if (json_output)
- jsonw_start_array(json_wtr);
- else
- if (btf) {
- btf_wtr = get_btf_writer();
- if (!btf_wtr) {
- p_info("failed to create json writer for btf. falling back to plain output");
- btf__free(btf);
- btf = NULL;
- } else {
- jsonw_start_array(btf_wtr);
- }
+ if (wtr) {
+ if (show_header) {
+ jsonw_start_object(wtr); /* map object */
+ show_map_header_json(info, wtr);
+ jsonw_name(wtr, "elements");
}
+ jsonw_start_array(wtr); /* elements */
+ } else if (show_header) {
+ show_map_header_plain(info);
+ }
- if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
- info.value_size != 8)
+ if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
+ info->value_size != 8)
p_info("Warning: cannot read values from %s map with value_size != 8",
- map_type_name[info.type]);
+ map_type_name[info->type]);
while (true) {
err = bpf_map_get_next_key(fd, prev_key, key);
if (err) {
@@ -824,15 +995,14 @@ static int do_dump(int argc, char **argv)
err = 0;
break;
}
- num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr);
+ num_elems += dump_map_elem(fd, key, value, info, btf, wtr);
prev_key = key;
}
- if (json_output)
- jsonw_end_array(json_wtr);
- else if (btf) {
- jsonw_end_array(btf_wtr);
- jsonw_destroy(&btf_wtr);
+ if (wtr) {
+ jsonw_end_array(wtr); /* elements */
+ if (show_header)
+ jsonw_end_object(wtr); /* map object */
} else {
printf("Found %u element%s\n", num_elems,
num_elems != 1 ? "s" : "");
@@ -847,6 +1017,72 @@ exit_free:
return err;
}
+static int do_dump(int argc, char **argv)
+{
+ json_writer_t *wtr = NULL, *btf_wtr = NULL;
+ struct bpf_map_info info = {};
+ int nb_fds, i = 0, btf = 0;
+ __u32 len = sizeof(info);
+ int *fds = NULL;
+ int err = -1;
+
+ if (argc != 2)
+ usage();
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = map_parse_fds(&argc, &argv, &fds);
+ if (nb_fds < 1)
+ goto exit_free;
+
+ if (json_output) {
+ wtr = json_wtr;
+ } else {
+ btf = maps_have_btf(fds, nb_fds);
+ if (btf < 0)
+ goto exit_close;
+ if (btf) {
+ btf_wtr = get_btf_writer();
+ if (btf_wtr) {
+ wtr = btf_wtr;
+ } else {
+ p_info("failed to create json writer for btf. falling back to plain output");
+ btf = 0;
+ }
+ }
+ }
+
+ if (wtr && nb_fds > 1)
+ jsonw_start_array(wtr); /* root array */
+ for (i = 0; i < nb_fds; i++) {
+ if (bpf_obj_get_info_by_fd(fds[i], &info, &len)) {
+ p_err("can't get map info: %s", strerror(errno));
+ break;
+ }
+ err = map_dump(fds[i], &info, wtr, btf, nb_fds > 1);
+ if (!wtr && i != nb_fds - 1)
+ printf("\n");
+
+ if (err)
+ break;
+ close(fds[i]);
+ }
+ if (wtr && nb_fds > 1)
+ jsonw_end_array(wtr); /* root array */
+
+ if (btf)
+ jsonw_destroy(&btf_wtr);
+exit_close:
+ for (; i < nb_fds; i++)
+ close(fds[i]);
+exit_free:
+ free(fds);
+ return err;
+}
+
static int alloc_key_value(struct bpf_map_info *info, void **key, void **value)
{
*key = NULL;
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index 4f52d3151616..d93bee298e54 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -18,6 +18,7 @@
#include <bpf.h>
#include <nlattr.h>
+#include "libbpf_internal.h"
#include "main.h"
#include "netlink_dumper.h"
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 4535c863d2cd..47a61ac42dc0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -25,6 +25,11 @@
#include "main.h"
#include "xlated_dumper.h"
+enum dump_mode {
+ DUMP_JITED,
+ DUMP_XLATED,
+};
+
static const char * const attach_type_strings[] = {
[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
@@ -77,11 +82,12 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
strftime(buf, size, "%FT%T%z", &load_tm);
}
-static int prog_fd_by_tag(unsigned char *tag)
+static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
{
unsigned int id = 0;
+ int fd, nb_fds = 0;
+ void *tmp;
int err;
- int fd;
while (true) {
struct bpf_prog_info info = {};
@@ -89,36 +95,54 @@ static int prog_fd_by_tag(unsigned char *tag)
err = bpf_prog_get_next_id(id, &id);
if (err) {
- p_err("%s", strerror(errno));
- return -1;
+ if (errno != ENOENT) {
+ p_err("%s", strerror(errno));
+ goto err_close_fds;
+ }
+ return nb_fds;
}
fd = bpf_prog_get_fd_by_id(id);
if (fd < 0) {
p_err("can't get prog by id (%u): %s",
id, strerror(errno));
- return -1;
+ goto err_close_fds;
}
err = bpf_obj_get_info_by_fd(fd, &info, &len);
if (err) {
p_err("can't get prog info (%u): %s",
id, strerror(errno));
- close(fd);
- return -1;
+ goto err_close_fd;
}
- if (!memcmp(tag, info.tag, BPF_TAG_SIZE))
- return fd;
+ if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
+ (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+ close(fd);
+ continue;
+ }
- close(fd);
+ if (nb_fds > 0) {
+ tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+ if (!tmp) {
+ p_err("failed to realloc");
+ goto err_close_fd;
+ }
+ *fds = tmp;
+ }
+ (*fds)[nb_fds++] = fd;
}
+
+err_close_fd:
+ close(fd);
+err_close_fds:
+ while (--nb_fds >= 0)
+ close((*fds)[nb_fds]);
+ return -1;
}
-int prog_parse_fd(int *argc, char ***argv)
+static int prog_parse_fds(int *argc, char ***argv, int **fds)
{
- int fd;
-
if (is_prefix(**argv, "id")) {
unsigned int id;
char *endptr;
@@ -132,10 +156,12 @@ int prog_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
- fd = bpf_prog_get_fd_by_id(id);
- if (fd < 0)
+ (*fds)[0] = bpf_prog_get_fd_by_id(id);
+ if ((*fds)[0] < 0) {
p_err("get by id (%u): %s", id, strerror(errno));
- return fd;
+ return -1;
+ }
+ return 1;
} else if (is_prefix(**argv, "tag")) {
unsigned char tag[BPF_TAG_SIZE];
@@ -149,7 +175,20 @@ int prog_parse_fd(int *argc, char ***argv)
}
NEXT_ARGP();
- return prog_fd_by_tag(tag);
+ return prog_fd_by_nametag(tag, fds, true);
+ } else if (is_prefix(**argv, "name")) {
+ char *name;
+
+ NEXT_ARGP();
+
+ name = **argv;
+ if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ p_err("can't parse name");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_nametag(name, fds, false);
} else if (is_prefix(**argv, "pinned")) {
char *path;
@@ -158,13 +197,43 @@ int prog_parse_fd(int *argc, char ***argv)
path = **argv;
NEXT_ARGP();
- return open_obj_pinned_any(path, BPF_OBJ_PROG);
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
+ if ((*fds)[0] < 0)
+ return -1;
+ return 1;
}
- p_err("expected 'id', 'tag' or 'pinned', got: '%s'?", **argv);
+ p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
return -1;
}
+int prog_parse_fd(int *argc, char ***argv)
+{
+ int *fds = NULL;
+ int nb_fds, fd;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = prog_parse_fds(argc, argv, &fds);
+ if (nb_fds != 1) {
+ if (nb_fds > 1) {
+ p_err("several programs match this handle");
+ while (nb_fds--)
+ close(fds[nb_fds]);
+ }
+ fd = -1;
+ goto exit_free;
+ }
+
+ fd = fds[0];
+exit_free:
+ free(fds);
+ return fd;
+}
+
static void show_prog_maps(int fd, u32 num_maps)
{
struct bpf_prog_info info = {};
@@ -194,11 +263,8 @@ static void show_prog_maps(int fd, u32 num_maps)
}
}
-static void print_prog_json(struct bpf_prog_info *info, int fd)
+static void print_prog_header_json(struct bpf_prog_info *info)
{
- char *memlock;
-
- jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info->id);
if (info->type < ARRAY_SIZE(prog_type_name))
jsonw_string_field(json_wtr, "type",
@@ -219,7 +285,14 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns);
jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt);
}
+}
+static void print_prog_json(struct bpf_prog_info *info, int fd)
+{
+ char *memlock;
+
+ jsonw_start_object(json_wtr);
+ print_prog_header_json(info);
print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
if (info->load_time) {
@@ -268,10 +341,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_end_object(json_wtr);
}
-static void print_prog_plain(struct bpf_prog_info *info, int fd)
+static void print_prog_header_plain(struct bpf_prog_info *info)
{
- char *memlock;
-
printf("%u: ", info->id);
if (info->type < ARRAY_SIZE(prog_type_name))
printf("%s ", prog_type_name[info->type]);
@@ -289,6 +360,13 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
printf(" run_time_ns %lld run_cnt %lld",
info->run_time_ns, info->run_cnt);
printf("\n");
+}
+
+static void print_prog_plain(struct bpf_prog_info *info, int fd)
+{
+ char *memlock;
+
+ print_prog_header_plain(info);
if (info->load_time) {
char buf[32];
@@ -349,6 +427,40 @@ static int show_prog(int fd)
return 0;
}
+static int do_show_subset(int argc, char **argv)
+{
+ int *fds = NULL;
+ int nb_fds, i;
+ int err = -1;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = prog_parse_fds(&argc, &argv, &fds);
+ if (nb_fds < 1)
+ goto exit_free;
+
+ if (json_output && nb_fds > 1)
+ jsonw_start_array(json_wtr); /* root array */
+ for (i = 0; i < nb_fds; i++) {
+ err = show_prog(fds[i]);
+ if (err) {
+ for (; i < nb_fds; i++)
+ close(fds[i]);
+ break;
+ }
+ close(fds[i]);
+ }
+ if (json_output && nb_fds > 1)
+ jsonw_end_array(json_wtr); /* root array */
+
+exit_free:
+ free(fds);
+ return err;
+}
+
static int do_show(int argc, char **argv)
{
__u32 id = 0;
@@ -358,15 +470,8 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&prog_table, BPF_OBJ_PROG);
- if (argc == 2) {
- fd = prog_parse_fd(&argc, &argv);
- if (fd < 0)
- return -1;
-
- err = show_prog(fd);
- close(fd);
- return err;
- }
+ if (argc == 2)
+ return do_show_subset(argc, argv);
if (argc)
return BAD_ARG();
@@ -408,101 +513,32 @@ static int do_show(int argc, char **argv)
return err;
}
-static int do_dump(int argc, char **argv)
+static int
+prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
+ char *filepath, bool opcodes, bool visual, bool linum)
{
- struct bpf_prog_info_linear *info_linear;
struct bpf_prog_linfo *prog_linfo = NULL;
- enum {DUMP_JITED, DUMP_XLATED} mode;
const char *disasm_opt = NULL;
- struct bpf_prog_info *info;
struct dump_data dd = {};
void *func_info = NULL;
struct btf *btf = NULL;
- char *filepath = NULL;
- bool opcodes = false;
- bool visual = false;
char func_sig[1024];
unsigned char *buf;
- bool linum = false;
__u32 member_len;
- __u64 arrays;
ssize_t n;
int fd;
- if (is_prefix(*argv, "jited")) {
- if (disasm_init())
- return -1;
- mode = DUMP_JITED;
- } else if (is_prefix(*argv, "xlated")) {
- mode = DUMP_XLATED;
- } else {
- p_err("expected 'xlated' or 'jited', got: %s", *argv);
- return -1;
- }
- NEXT_ARG();
-
- if (argc < 2)
- usage();
-
- fd = prog_parse_fd(&argc, &argv);
- if (fd < 0)
- return -1;
-
- if (is_prefix(*argv, "file")) {
- NEXT_ARG();
- if (!argc) {
- p_err("expected file path");
- return -1;
- }
-
- filepath = *argv;
- NEXT_ARG();
- } else if (is_prefix(*argv, "opcodes")) {
- opcodes = true;
- NEXT_ARG();
- } else if (is_prefix(*argv, "visual")) {
- visual = true;
- NEXT_ARG();
- } else if (is_prefix(*argv, "linum")) {
- linum = true;
- NEXT_ARG();
- }
-
- if (argc) {
- usage();
- return -1;
- }
-
- if (mode == DUMP_JITED)
- arrays = 1UL << BPF_PROG_INFO_JITED_INSNS;
- else
- arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS;
-
- arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS;
- arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
- arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
- arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
- arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
-
- info_linear = bpf_program__get_prog_info_linear(fd, arrays);
- close(fd);
- if (IS_ERR_OR_NULL(info_linear)) {
- p_err("can't get prog info: %s", strerror(errno));
- return -1;
- }
-
- info = &info_linear->info;
if (mode == DUMP_JITED) {
- if (info->jited_prog_len == 0) {
+ if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
p_info("no instructions returned");
- goto err_free;
+ return -1;
}
buf = (unsigned char *)(info->jited_prog_insns);
member_len = info->jited_prog_len;
} else { /* DUMP_XLATED */
if (info->xlated_prog_len == 0) {
p_err("error retrieving insn dump: kernel.kptr_restrict set?");
- goto err_free;
+ return -1;
}
buf = (unsigned char *)info->xlated_prog_insns;
member_len = info->xlated_prog_len;
@@ -510,7 +546,7 @@ static int do_dump(int argc, char **argv)
if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) {
p_err("failed to get btf");
- goto err_free;
+ return -1;
}
func_info = (void *)info->func_info;
@@ -526,7 +562,7 @@ static int do_dump(int argc, char **argv)
if (fd < 0) {
p_err("can't open file %s: %s", filepath,
strerror(errno));
- goto err_free;
+ return -1;
}
n = write(fd, buf, member_len);
@@ -534,7 +570,7 @@ static int do_dump(int argc, char **argv)
if (n != member_len) {
p_err("error writing output file: %s",
n < 0 ? strerror(errno) : "short write");
- goto err_free;
+ return -1;
}
if (json_output)
@@ -548,7 +584,7 @@ static int do_dump(int argc, char **argv)
info->netns_ino,
&disasm_opt);
if (!name)
- goto err_free;
+ return -1;
}
if (info->nr_jited_func_lens && info->jited_func_lens) {
@@ -643,12 +679,130 @@ static int do_dump(int argc, char **argv)
kernel_syms_destroy(&dd);
}
- free(info_linear);
return 0;
+}
-err_free:
- free(info_linear);
- return -1;
+static int do_dump(int argc, char **argv)
+{
+ struct bpf_prog_info_linear *info_linear;
+ char *filepath = NULL;
+ bool opcodes = false;
+ bool visual = false;
+ enum dump_mode mode;
+ bool linum = false;
+ int *fds = NULL;
+ int nb_fds, i = 0;
+ int err = -1;
+ __u64 arrays;
+
+ if (is_prefix(*argv, "jited")) {
+ if (disasm_init())
+ return -1;
+ mode = DUMP_JITED;
+ } else if (is_prefix(*argv, "xlated")) {
+ mode = DUMP_XLATED;
+ } else {
+ p_err("expected 'xlated' or 'jited', got: %s", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+
+ if (argc < 2)
+ usage();
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = prog_parse_fds(&argc, &argv, &fds);
+ if (nb_fds < 1)
+ goto exit_free;
+
+ if (is_prefix(*argv, "file")) {
+ NEXT_ARG();
+ if (!argc) {
+ p_err("expected file path");
+ goto exit_close;
+ }
+ if (nb_fds > 1) {
+ p_err("several programs matched");
+ goto exit_close;
+ }
+
+ filepath = *argv;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "opcodes")) {
+ opcodes = true;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "visual")) {
+ if (nb_fds > 1) {
+ p_err("several programs matched");
+ goto exit_close;
+ }
+
+ visual = true;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "linum")) {
+ linum = true;
+ NEXT_ARG();
+ }
+
+ if (argc) {
+ usage();
+ goto exit_close;
+ }
+
+ if (mode == DUMP_JITED)
+ arrays = 1UL << BPF_PROG_INFO_JITED_INSNS;
+ else
+ arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS;
+
+ arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
+ arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
+ arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
+
+ if (json_output && nb_fds > 1)
+ jsonw_start_array(json_wtr); /* root array */
+ for (i = 0; i < nb_fds; i++) {
+ info_linear = bpf_program__get_prog_info_linear(fds[i], arrays);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ p_err("can't get prog info: %s", strerror(errno));
+ break;
+ }
+
+ if (json_output && nb_fds > 1) {
+ jsonw_start_object(json_wtr); /* prog object */
+ print_prog_header_json(&info_linear->info);
+ jsonw_name(json_wtr, "insns");
+ } else if (nb_fds > 1) {
+ print_prog_header_plain(&info_linear->info);
+ }
+
+ err = prog_dump(&info_linear->info, mode, filepath, opcodes,
+ visual, linum);
+
+ if (json_output && nb_fds > 1)
+ jsonw_end_object(json_wtr); /* prog object */
+ else if (i != nb_fds - 1 && nb_fds > 1)
+ printf("\n");
+
+ free(info_linear);
+ if (err)
+ break;
+ close(fds[i]);
+ }
+ if (json_output && nb_fds > 1)
+ jsonw_end_array(json_wtr); /* root array */
+
+exit_close:
+ for (; i < nb_fds; i++)
+ close(fds[i]);
+exit_free:
+ free(fds);
+ return err;
}
static int do_pin(int argc, char **argv)
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 494d7ae3614d..5b91ee65a080 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -174,7 +174,7 @@ static const char *print_call(void *private_data,
struct kernel_sym *sym;
if (insn->src_reg == BPF_PSEUDO_CALL &&
- (__u32) insn->imm < dd->nr_jited_ksyms)
+ (__u32) insn->imm < dd->nr_jited_ksyms && dd->jited_ksyms)
address = dd->jited_ksyms[insn->imm];
sym = kernel_syms_search(dd, address);
diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h
index 13a58531e6fa..39acc149d843 100644
--- a/tools/include/uapi/asm/bpf_perf_event.h
+++ b/tools/include/uapi/asm/bpf_perf_event.h
@@ -2,6 +2,8 @@
#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h"
#elif defined(__s390__)
#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h"
+#elif defined(__riscv)
+#include "../../arch/riscv/include/uapi/asm/bpf_perf_event.h"
#else
#include <uapi/asm-generic/bpf_perf_event.h>
#endif
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 8a5b2f8f8eb9..868bf7996c0f 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -778,11 +778,12 @@ struct drm_syncobj_array {
__u32 pad;
};
+#define DRM_SYNCOBJ_QUERY_FLAGS_LAST_SUBMITTED (1 << 0) /* last available point on timeline syncobj */
struct drm_syncobj_timeline_array {
__u64 handles;
__u64 points;
__u32 count_handles;
- __u32 pad;
+ __u32 flags;
};
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 469dc512cca3..5400d7e057f1 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -611,6 +611,13 @@ typedef struct drm_i915_irq_wait {
* See I915_EXEC_FENCE_OUT and I915_EXEC_FENCE_SUBMIT.
*/
#define I915_PARAM_HAS_EXEC_SUBMIT_FENCE 53
+
+/*
+ * Revision of the i915-perf uAPI. The value returned helps determine what
+ * i915-perf features are available. See drm_i915_perf_property_id.
+ */
+#define I915_PARAM_PERF_REVISION 54
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1565,6 +1572,21 @@ struct drm_i915_gem_context_param {
* i915_context_engines_bond (I915_CONTEXT_ENGINES_EXT_BOND)
*/
#define I915_CONTEXT_PARAM_ENGINES 0xa
+
+/*
+ * I915_CONTEXT_PARAM_PERSISTENCE:
+ *
+ * Allow the context and active rendering to survive the process until
+ * completion. Persistence allows fire-and-forget clients to queue up a
+ * bunch of work, hand the output over to a display server and then quit.
+ * If the context is marked as not persistent, upon closing (either via
+ * an explicit DRM_I915_GEM_CONTEXT_DESTROY or implicitly from file closure
+ * or process termination), the context and any outstanding requests will be
+ * cancelled (and exported fences for cancelled requests marked as -EIO).
+ *
+ * By default, new contexts allow persistence.
+ */
+#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
/* Must be kept compact -- no holes and well documented */
__u64 value;
@@ -1844,23 +1866,31 @@ enum drm_i915_perf_property_id {
* Open the stream for a specific context handle (as used with
* execbuffer2). A stream opened for a specific context this way
* won't typically require root privileges.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_CTX_HANDLE = 1,
/**
* A value of 1 requests the inclusion of raw OA unit reports as
* part of stream samples.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_SAMPLE_OA,
/**
* The value specifies which set of OA unit metrics should be
* be configured, defining the contents of any OA unit reports.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_METRICS_SET,
/**
* The value specifies the size and layout of OA unit reports.
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_FORMAT,
@@ -1870,9 +1900,22 @@ enum drm_i915_perf_property_id {
* from this exponent as follows:
*
* 80ns * 2^(period_exponent + 1)
+ *
+ * This property is available in perf revision 1.
*/
DRM_I915_PERF_PROP_OA_EXPONENT,
+ /**
+ * Specifying this property is only valid when specify a context to
+ * filter with DRM_I915_PERF_PROP_CTX_HANDLE. Specifying this property
+ * will hold preemption of the particular context we want to gather
+ * performance data about. The execbuf2 submissions must include a
+ * drm_i915_gem_execbuffer_ext_perf parameter for this to apply.
+ *
+ * This property is available in perf revision 3.
+ */
+ DRM_I915_PERF_PROP_HOLD_PREEMPTION,
+
DRM_I915_PERF_PROP_MAX /* non-ABI */
};
@@ -1901,6 +1944,8 @@ struct drm_i915_perf_open_param {
* to close and re-open a stream with the same configuration.
*
* It's undefined whether any pending data for the stream will be lost.
+ *
+ * This ioctl is available in perf revision 1.
*/
#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
@@ -1908,10 +1953,25 @@ struct drm_i915_perf_open_param {
* Disable data capture for a stream.
*
* It is an error to try and read a stream that is disabled.
+ *
+ * This ioctl is available in perf revision 1.
*/
#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1)
/**
+ * Change metrics_set captured by a stream.
+ *
+ * If the stream is bound to a specific context, the configuration change
+ * will performed inline with that context such that it takes effect before
+ * the next execbuf submission.
+ *
+ * Returns the previously bound metrics set id, or a negative error code.
+ *
+ * This ioctl is available in perf revision 2.
+ */
+#define I915_PERF_IOCTL_CONFIG _IO('i', 0x2)
+
+/**
* Common to all i915 perf records
*/
struct drm_i915_perf_record_header {
@@ -1984,6 +2044,7 @@ struct drm_i915_query_item {
__u64 query_id;
#define DRM_I915_QUERY_TOPOLOGY_INFO 1
#define DRM_I915_QUERY_ENGINE_INFO 2
+#define DRM_I915_QUERY_PERF_CONFIG 3
/* Must be kept compact -- no holes and well documented */
/*
@@ -1995,9 +2056,18 @@ struct drm_i915_query_item {
__s32 length;
/*
- * Unused for now. Must be cleared to zero.
+ * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
+ *
+ * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the
+ * following :
+ * - DRM_I915_QUERY_PERF_CONFIG_LIST
+ * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
+ * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
*/
__u32 flags;
+#define DRM_I915_QUERY_PERF_CONFIG_LIST 1
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID 2
+#define DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID 3
/*
* Data will be written at the location pointed by data_ptr when the
@@ -2033,8 +2103,10 @@ struct drm_i915_query {
* (data[X / 8] >> (X % 8)) & 1
*
* - the subslice mask for each slice with one bit per subslice telling
- * whether a subslice is available. The availability of subslice Y in slice
- * X can be queried with the following formula :
+ * whether a subslice is available. Gen12 has dual-subslices, which are
+ * similar to two gen11 subslices. For gen12, this array represents dual-
+ * subslices. The availability of subslice Y in slice X can be queried
+ * with the following formula :
*
* (data[subslice_offset +
* X * subslice_stride +
@@ -2123,6 +2195,56 @@ struct drm_i915_query_engine_info {
struct drm_i915_engine_info engines[];
};
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG.
+ */
+struct drm_i915_query_perf_config {
+ union {
+ /*
+ * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets
+ * this fields to the number of configurations available.
+ */
+ __u64 n_configs;
+
+ /*
+ * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID,
+ * i915 will use the value in this field as configuration
+ * identifier to decide what data to write into config_ptr.
+ */
+ __u64 config;
+
+ /*
+ * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
+ * i915 will use the value in this field as configuration
+ * identifier to decide what data to write into config_ptr.
+ *
+ * String formatted like "%08x-%04x-%04x-%04x-%012x"
+ */
+ char uuid[36];
+ };
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /*
+ * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will
+ * write an array of __u64 of configuration identifiers.
+ *
+ * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will
+ * write a struct drm_i915_perf_oa_config. If the following fields of
+ * drm_i915_perf_oa_config are set not set to 0, i915 will write into
+ * the associated pointers the values of submitted when the
+ * configuration was created :
+ *
+ * - n_mux_regs
+ * - n_boolean_regs
+ * - n_flex_regs
+ */
+ __u8 data[];
+};
+
#if defined(__cplusplus)
}
#endif
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dbbcf0b02970..7df436da542d 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -231,6 +231,11 @@ enum bpf_attach_type {
* When children program makes decision (like picking TCP CA or sock bind)
* parent program has a chance to override it.
*
+ * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of
+ * programs for a cgroup. Though it's possible to replace an old program at
+ * any position by also specifying BPF_F_REPLACE flag and position itself in
+ * replace_bpf_fd attribute. Old program at this position will be released.
+ *
* A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
* A cgroup with NONE doesn't allow any programs in sub-cgroups.
* Ex1:
@@ -249,6 +254,7 @@ enum bpf_attach_type {
*/
#define BPF_F_ALLOW_OVERRIDE (1U << 0)
#define BPF_F_ALLOW_MULTI (1U << 1)
+#define BPF_F_REPLACE (1U << 2)
/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
* verifier will perform strict alignment checking as if the kernel
@@ -442,6 +448,10 @@ union bpf_attr {
__u32 attach_bpf_fd; /* eBPF program to attach */
__u32 attach_type;
__u32 attach_flags;
+ __u32 replace_bpf_fd; /* previously attached eBPF
+ * program to replace if
+ * BPF_F_REPLACE is used
+ */
};
struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 63ae4a39e58b..1a2898c482ee 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -22,9 +22,9 @@ struct btf_header {
};
/* Max # of type identifier */
-#define BTF_MAX_TYPE 0x0000ffff
+#define BTF_MAX_TYPE 0x000fffff
/* Max offset into the string section */
-#define BTF_MAX_NAME_OFFSET 0x0000ffff
+#define BTF_MAX_NAME_OFFSET 0x00ffffff
/* Max # of struct/union/enum members or func args */
#define BTF_MAX_VLEN 0xffff
@@ -142,7 +142,8 @@ struct btf_param {
enum {
BTF_VAR_STATIC = 0,
- BTF_VAR_GLOBAL_ALLOCATED,
+ BTF_VAR_GLOBAL_ALLOCATED = 1,
+ BTF_VAR_GLOBAL_EXTERN = 2,
};
/* BTF_KIND_VAR is followed by a single "struct btf_var" to describe
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index 39ccfe9311c3..1beb174ad950 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -17,7 +17,8 @@
#define FSCRYPT_POLICY_FLAGS_PAD_32 0x03
#define FSCRYPT_POLICY_FLAGS_PAD_MASK 0x03
#define FSCRYPT_POLICY_FLAG_DIRECT_KEY 0x04
-#define FSCRYPT_POLICY_FLAGS_VALID 0x07
+#define FSCRYPT_POLICY_FLAG_IV_INO_LBLK_64 0x08
+#define FSCRYPT_POLICY_FLAGS_VALID 0x0F
/* Encryption algorithms */
#define FSCRYPT_MODE_AES_256_XTS 1
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index e7ad9d350a28..1521073b6348 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -76,6 +76,8 @@ enum {
#define IPPROTO_MPLS IPPROTO_MPLS
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_MPTCP = 262, /* Multipath TCP connection */
+#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
};
#endif
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 52641d8ca9e8..f0a16b4adbbd 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -235,6 +235,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_S390_STSI 25
#define KVM_EXIT_IOAPIC_EOI 26
#define KVM_EXIT_HYPERV 27
+#define KVM_EXIT_ARM_NISV 28
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -394,6 +395,11 @@ struct kvm_run {
} eoi;
/* KVM_EXIT_HYPERV */
struct kvm_hyperv_exit hyperv;
+ /* KVM_EXIT_ARM_NISV */
+ struct {
+ __u64 esr_iss;
+ __u64 fault_ipa;
+ } arm_nisv;
/* Fix the size of the union. */
char padding[256];
};
@@ -1000,6 +1006,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PMU_EVENT_FILTER 173
#define KVM_CAP_ARM_IRQ_LINE_LAYOUT_2 174
#define KVM_CAP_HYPERV_DIRECT_TLBFLUSH 175
+#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 176
+#define KVM_CAP_ARM_NISV_TO_USER 177
+#define KVM_CAP_ARM_INJECT_EXT_DABT 178
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1227,6 +1236,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS
KVM_DEV_TYPE_XIVE,
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
+ KVM_DEV_TYPE_ARM_PV_TIME,
+#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
KVM_DEV_TYPE_MAX,
};
@@ -1337,6 +1348,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
/* Available with KVM_CAP_PMU_EVENT_FILTER */
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
+#define KVM_PPC_SVM_OFF _IO(KVMIO, 0xb3)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
index 99335e1f4a27..4a0217832464 100644
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h
@@ -33,27 +33,48 @@
#define CLONE_NEWNET 0x40000000 /* New network namespace */
#define CLONE_IO 0x80000000 /* Clone io context */
+/* Flags for the clone3() syscall. */
+#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
+
#ifndef __ASSEMBLY__
/**
* struct clone_args - arguments for the clone3 syscall
- * @flags: Flags for the new process as listed above.
- * All flags are valid except for CSIGNAL and
- * CLONE_DETACHED.
- * @pidfd: If CLONE_PIDFD is set, a pidfd will be
- * returned in this argument.
- * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
- * child process will be returned in the child's
- * memory.
- * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
- * the child process will be returned in the
- * parent's memory.
- * @exit_signal: The exit_signal the parent process will be
- * sent when the child exits.
- * @stack: Specify the location of the stack for the
- * child process.
- * @stack_size: The size of the stack for the child process.
- * @tls: If CLONE_SETTLS is set, the tls descriptor
- * is set to tls.
+ * @flags: Flags for the new process as listed above.
+ * All flags are valid except for CSIGNAL and
+ * CLONE_DETACHED.
+ * @pidfd: If CLONE_PIDFD is set, a pidfd will be
+ * returned in this argument.
+ * @child_tid: If CLONE_CHILD_SETTID is set, the TID of the
+ * child process will be returned in the child's
+ * memory.
+ * @parent_tid: If CLONE_PARENT_SETTID is set, the TID of
+ * the child process will be returned in the
+ * parent's memory.
+ * @exit_signal: The exit_signal the parent process will be
+ * sent when the child exits.
+ * @stack: Specify the location of the stack for the
+ * child process.
+ * Note, @stack is expected to point to the
+ * lowest address. The stack direction will be
+ * determined by the kernel and set up
+ * appropriately based on @stack_size.
+ * @stack_size: The size of the stack for the child process.
+ * @tls: If CLONE_SETTLS is set, the tls descriptor
+ * is set to tls.
+ * @set_tid: Pointer to an array of type *pid_t. The size
+ * of the array is defined using @set_tid_size.
+ * This array is used to select PIDs/TIDs for
+ * newly created processes. The first element in
+ * this defines the PID in the most nested PID
+ * namespace. Each additional element in the array
+ * defines the PID in the parent PID namespace of
+ * the original PID namespace. If the array has
+ * less entries than the number of currently
+ * nested PID namespaces only the PIDs in the
+ * corresponding namespaces are set.
+ * @set_tid_size: This defines the size of the array referenced
+ * in @set_tid. This cannot be larger than the
+ * kernel's limit of nested PID namespaces.
*
* The structure is versioned by size and thus extensible.
* New struct members must go at the end of the struct and
@@ -68,10 +89,13 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
+ __aligned_u64 set_tid;
+ __aligned_u64 set_tid_size;
};
#endif
#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
/*
* Scheduling policies
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 7b35e98d3c58..ad80a5c885d5 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -167,8 +167,8 @@ struct statx {
#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
-
#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
+#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index defae23a0169..d87830e7ea63 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -56,8 +56,8 @@ ifndef VERBOSE
endif
FEATURE_USER = .libbpf
-FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
-FEATURE_DISPLAY = libelf bpf
+FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
+FEATURE_DISPLAY = libelf zlib bpf
INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
@@ -138,6 +138,7 @@ STATIC_OBJDIR := $(OUTPUT)staticobjs/
BPF_IN_SHARED := $(SHARED_OBJDIR)libbpf-in.o
BPF_IN_STATIC := $(STATIC_OBJDIR)libbpf-in.o
VERSION_SCRIPT := libbpf.map
+BPF_HELPER_DEFS := $(OUTPUT)bpf_helper_defs.h
LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
@@ -147,6 +148,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags)
GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
+ sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
@@ -159,7 +161,7 @@ all: fixdep
all_cmd: $(CMD_TARGETS) check
-$(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h
+$(BPF_IN_SHARED): force elfdep zdep bpfdep $(BPF_HELPER_DEFS)
@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@@ -177,19 +179,19 @@ $(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h
echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
-$(BPF_IN_STATIC): force elfdep bpfdep bpf_helper_defs.h
+$(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS)
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
-bpf_helper_defs.h: $(srctree)/tools/include/uapi/linux/bpf.h
+$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
$(Q)$(srctree)/scripts/bpf_helpers_doc.py --header \
- --file $(srctree)/tools/include/uapi/linux/bpf.h > bpf_helper_defs.h
+ --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED)
$(QUIET_LINK)$(CC) $(LDFLAGS) \
--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \
- -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@
+ -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@
@ln -sf $(@F) $(OUTPUT)libbpf.so
@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)
@@ -213,6 +215,7 @@ check_abi: $(OUTPUT)libbpf.so
"versioned in $(VERSION_SCRIPT)." >&2; \
readelf -s --wide $(BPF_IN_SHARED) | \
cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \
+ sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf -s --wide $(OUTPUT)libbpf.so | \
@@ -243,15 +246,16 @@ install_lib: all_cmd
$(call do_install_mkdir,$(libdir_SQ)); \
cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
-install_headers: bpf_helper_defs.h
+install_headers: $(BPF_HELPER_DEFS)
$(call QUIET_INSTALL, headers) \
$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
$(call do_install,btf.h,$(prefix)/include/bpf,644); \
$(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
+ $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
- $(call do_install,bpf_helper_defs.h,$(prefix)/include/bpf,644); \
+ $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
$(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
$(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
$(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
@@ -271,18 +275,21 @@ config-clean:
clean:
$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \
*.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \
- *.pc LIBBPF-CFLAGS bpf_helper_defs.h \
+ *.pc LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \
$(SHARED_OBJDIR) $(STATIC_OBJDIR)
$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
-PHONY += force elfdep bpfdep cscope tags
+PHONY += force elfdep zdep bpfdep cscope tags
force:
elfdep:
@if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi
+zdep:
+ @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
+
bpfdep:
@if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 98596e15390f..a787d53699c8 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -467,13 +467,28 @@ int bpf_obj_get(const char *pathname)
int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
unsigned int flags)
{
+ DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts,
+ .flags = flags,
+ );
+
+ return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts);
+}
+
+int bpf_prog_attach_xattr(int prog_fd, int target_fd,
+ enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts)
+{
union bpf_attr attr;
+ if (!OPTS_VALID(opts, bpf_prog_attach_opts))
+ return -EINVAL;
+
memset(&attr, 0, sizeof(attr));
attr.target_fd = target_fd;
attr.attach_bpf_fd = prog_fd;
attr.attach_type = type;
- attr.attach_flags = flags;
+ attr.attach_flags = OPTS_GET(opts, flags, 0);
+ attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 3c791fa8e68e..f0ab8519986e 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -28,14 +28,12 @@
#include <stddef.h>
#include <stdint.h>
+#include "libbpf_common.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-#ifndef LIBBPF_API
-#define LIBBPF_API __attribute__((visibility("default")))
-#endif
-
struct bpf_create_map_attr {
const char *name;
enum bpf_map_type map_type;
@@ -128,8 +126,19 @@ LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
LIBBPF_API int bpf_map_freeze(int fd);
LIBBPF_API int bpf_obj_pin(int fd, const char *pathname);
LIBBPF_API int bpf_obj_get(const char *pathname);
+
+struct bpf_prog_attach_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ unsigned int flags;
+ int replace_prog_fd;
+};
+#define bpf_prog_attach_opts__last_field replace_prog_fd
+
LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd,
enum bpf_attach_type type, unsigned int flags);
+LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd,
+ enum bpf_attach_type type,
+ const struct bpf_prog_attach_opts *opts);
LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 0c7d28292898..f69cc208778a 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -25,6 +25,9 @@
#ifndef __always_inline
#define __always_inline __attribute__((always_inline))
#endif
+#ifndef __weak
+#define __weak __attribute__((weak))
+#endif
/*
* Helper structure used by eBPF C program
@@ -44,4 +47,12 @@ enum libbpf_pin_type {
LIBBPF_PIN_BY_NAME,
};
+enum libbpf_tristate {
+ TRI_NO = 0,
+ TRI_YES = 1,
+ TRI_MODULE = 2,
+};
+
+#define __kconfig __attribute__((section(".kconfig")))
+
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 88efa2bb7137..5f04f56e1eb6 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -278,6 +278,45 @@ done:
return nelems * size;
}
+int btf__align_of(const struct btf *btf, __u32 id)
+{
+ const struct btf_type *t = btf__type_by_id(btf, id);
+ __u16 kind = btf_kind(t);
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ case BTF_KIND_ENUM:
+ return min(sizeof(void *), t->size);
+ case BTF_KIND_PTR:
+ return sizeof(void *);
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ return btf__align_of(btf, t->type);
+ case BTF_KIND_ARRAY:
+ return btf__align_of(btf, btf_array(t)->type);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+ __u16 vlen = btf_vlen(t);
+ int i, max_align = 1, align;
+
+ for (i = 0; i < vlen; i++, m++) {
+ align = btf__align_of(btf, m->type);
+ if (align <= 0)
+ return align;
+ max_align = max(max_align, align);
+ }
+
+ return max_align;
+ }
+ default:
+ pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
+ return 0;
+ }
+}
+
int btf__resolve_type(const struct btf *btf, __u32 type_id)
{
const struct btf_type *t;
@@ -539,6 +578,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
return -ENOENT;
}
+ /* .extern datasec size and var offsets were set correctly during
+ * extern collection step, so just skip straight to sorting variables
+ */
+ if (t->size)
+ goto sort_vars;
+
ret = bpf_object__section_size(obj, name, &size);
if (ret || !size || (t->size && t->size != size)) {
pr_debug("Invalid size for section %s: %u bytes\n", name, size);
@@ -575,7 +620,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
vsi->offset = off;
}
- qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off);
+sort_vars:
+ qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
return 0;
}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index d9ac73a02cde..8d73f7f5551f 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -8,14 +8,12 @@
#include <linux/btf.h>
#include <linux/types.h>
+#include "libbpf_common.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-#ifndef LIBBPF_API
-#define LIBBPF_API __attribute__((visibility("default")))
-#endif
-
#define BTF_ELF_SEC ".BTF"
#define BTF_EXT_ELF_SEC ".BTF.ext"
#define MAPS_ELF_SEC ".maps"
@@ -79,6 +77,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
__u32 id);
LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
+LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
LIBBPF_API int btf__fd(const struct btf *btf);
LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size);
LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
@@ -127,6 +126,28 @@ LIBBPF_API void btf_dump__free(struct btf_dump *d);
LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id);
+struct btf_dump_emit_type_decl_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+ /* optional field name for type declaration, e.g.:
+ * - struct my_struct <FNAME>
+ * - void (*<FNAME>)(int)
+ * - char (*<FNAME>)[123]
+ */
+ const char *field_name;
+ /* extra indentation level (in number of tabs) to emit for multi-line
+ * type declarations (e.g., anonymous struct); applies for lines
+ * starting from the second one (first line is assumed to have
+ * necessary indentation already
+ */
+ int indent_level;
+};
+#define btf_dump_emit_type_decl_opts__last_field indent_level
+
+LIBBPF_API int
+btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
+ const struct btf_dump_emit_type_decl_opts *opts);
+
/*
* A set of helpers for easier BTF types handling
*/
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index cb126d8fcf75..e95f7710f210 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -116,6 +116,8 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)
va_end(args);
}
+static int btf_dump_mark_referenced(struct btf_dump *d);
+
struct btf_dump *btf_dump__new(const struct btf *btf,
const struct btf_ext *btf_ext,
const struct btf_dump_opts *opts,
@@ -137,18 +139,39 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
if (IS_ERR(d->type_names)) {
err = PTR_ERR(d->type_names);
d->type_names = NULL;
- btf_dump__free(d);
- return ERR_PTR(err);
}
d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
if (IS_ERR(d->ident_names)) {
err = PTR_ERR(d->ident_names);
d->ident_names = NULL;
- btf_dump__free(d);
- return ERR_PTR(err);
+ goto err;
+ }
+ d->type_states = calloc(1 + btf__get_nr_types(d->btf),
+ sizeof(d->type_states[0]));
+ if (!d->type_states) {
+ err = -ENOMEM;
+ goto err;
+ }
+ d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
+ sizeof(d->cached_names[0]));
+ if (!d->cached_names) {
+ err = -ENOMEM;
+ goto err;
}
+ /* VOID is special */
+ d->type_states[0].order_state = ORDERED;
+ d->type_states[0].emit_state = EMITTED;
+
+ /* eagerly determine referenced types for anon enums */
+ err = btf_dump_mark_referenced(d);
+ if (err)
+ goto err;
+
return d;
+err:
+ btf_dump__free(d);
+ return ERR_PTR(err);
}
void btf_dump__free(struct btf_dump *d)
@@ -175,7 +198,6 @@ void btf_dump__free(struct btf_dump *d)
free(d);
}
-static int btf_dump_mark_referenced(struct btf_dump *d);
static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr);
static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id);
@@ -202,27 +224,6 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
if (id > btf__get_nr_types(d->btf))
return -EINVAL;
- /* type states are lazily allocated, as they might not be needed */
- if (!d->type_states) {
- d->type_states = calloc(1 + btf__get_nr_types(d->btf),
- sizeof(d->type_states[0]));
- if (!d->type_states)
- return -ENOMEM;
- d->cached_names = calloc(1 + btf__get_nr_types(d->btf),
- sizeof(d->cached_names[0]));
- if (!d->cached_names)
- return -ENOMEM;
-
- /* VOID is special */
- d->type_states[0].order_state = ORDERED;
- d->type_states[0].emit_state = EMITTED;
-
- /* eagerly determine referenced types for anon enums */
- err = btf_dump_mark_referenced(d);
- if (err)
- return err;
- }
-
d->emit_queue_cnt = 0;
err = btf_dump_order_type(d, id, false);
if (err < 0)
@@ -752,41 +753,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
}
}
-static int btf_align_of(const struct btf *btf, __u32 id)
-{
- const struct btf_type *t = btf__type_by_id(btf, id);
- __u16 kind = btf_kind(t);
-
- switch (kind) {
- case BTF_KIND_INT:
- case BTF_KIND_ENUM:
- return min(sizeof(void *), t->size);
- case BTF_KIND_PTR:
- return sizeof(void *);
- case BTF_KIND_TYPEDEF:
- case BTF_KIND_VOLATILE:
- case BTF_KIND_CONST:
- case BTF_KIND_RESTRICT:
- return btf_align_of(btf, t->type);
- case BTF_KIND_ARRAY:
- return btf_align_of(btf, btf_array(t)->type);
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION: {
- const struct btf_member *m = btf_members(t);
- __u16 vlen = btf_vlen(t);
- int i, align = 1;
-
- for (i = 0; i < vlen; i++, m++)
- align = max(align, btf_align_of(btf, m->type));
-
- return align;
- }
- default:
- pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
- return 1;
- }
-}
-
static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
const struct btf_type *t)
{
@@ -794,18 +760,18 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id,
int align, i, bit_sz;
__u16 vlen;
- align = btf_align_of(btf, id);
+ align = btf__align_of(btf, id);
/* size of a non-packed struct has to be a multiple of its alignment*/
- if (t->size % align)
+ if (align && t->size % align)
return true;
m = btf_members(t);
vlen = btf_vlen(t);
/* all non-bitfield fields have to be naturally aligned */
for (i = 0; i < vlen; i++, m++) {
- align = btf_align_of(btf, m->type);
+ align = btf__align_of(btf, m->type);
bit_sz = btf_member_bitfield_size(t, i);
- if (bit_sz == 0 && m->offset % (8 * align) != 0)
+ if (align && bit_sz == 0 && m->offset % (8 * align) != 0)
return true;
}
@@ -889,7 +855,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
fname = btf_name_of(d, m->name_off);
m_sz = btf_member_bitfield_size(t, i);
m_off = btf_member_bit_offset(t, i);
- align = packed ? 1 : btf_align_of(d->btf, m->type);
+ align = packed ? 1 : btf__align_of(d->btf, m->type);
btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1);
btf_dump_printf(d, "\n%s", pfx(lvl + 1));
@@ -907,7 +873,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
/* pad at the end, if necessary */
if (is_struct) {
- align = packed ? 1 : btf_align_of(d->btf, id);
+ align = packed ? 1 : btf__align_of(d->btf, id);
btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align,
lvl + 1);
}
@@ -1051,6 +1017,21 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id)
* of a stack frame. Some care is required to "pop" stack frames after
* processing type declaration chain.
*/
+int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
+ const struct btf_dump_emit_type_decl_opts *opts)
+{
+ const char *fname;
+ int lvl;
+
+ if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
+ return -EINVAL;
+
+ fname = OPTS_GET(opts, field_name, NULL);
+ lvl = OPTS_GET(opts, indent_level, 0);
+ btf_dump_emit_type_decl(d, id, fname, lvl);
+ return 0;
+}
+
static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,
const char *fname, int lvl)
{
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3f09772192f1..7513165b104f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -18,6 +18,7 @@
#include <stdarg.h>
#include <libgen.h>
#include <inttypes.h>
+#include <limits.h>
#include <string.h>
#include <unistd.h>
#include <endian.h>
@@ -41,9 +42,11 @@
#include <sys/types.h>
#include <sys/vfs.h>
#include <sys/utsname.h>
+#include <sys/resource.h>
#include <tools/libc_compat.h>
#include <libelf.h>
#include <gelf.h>
+#include <zlib.h>
#include "libbpf.h"
#include "bpf.h"
@@ -99,14 +102,33 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)
va_end(args);
}
-#define STRERR_BUFSIZE 128
+static void pr_perm_msg(int err)
+{
+ struct rlimit limit;
+ char buf[100];
+
+ if (err != -EPERM || geteuid() != 0)
+ return;
+
+ err = getrlimit(RLIMIT_MEMLOCK, &limit);
+ if (err)
+ return;
+
+ if (limit.rlim_cur == RLIM_INFINITY)
+ return;
+
+ if (limit.rlim_cur < 1024)
+ snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
+ else if (limit.rlim_cur < 1024*1024)
+ snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
+ else
+ snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
-#define CHECK_ERR(action, err, out) do { \
- err = action; \
- if (err) \
- goto out; \
-} while (0)
+ pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
+ buf);
+}
+#define STRERR_BUFSIZE 128
/* Copied from tools/perf/util/util.h */
#ifndef zfree
@@ -146,6 +168,20 @@ struct bpf_capabilities {
__u32 array_mmap:1;
};
+enum reloc_type {
+ RELO_LD64,
+ RELO_CALL,
+ RELO_DATA,
+ RELO_EXTERN,
+};
+
+struct reloc_desc {
+ enum reloc_type type;
+ int insn_idx;
+ int map_idx;
+ int sym_off;
+};
+
/*
* bpf_prog should be a better name but it has been used in
* linux/filter.h.
@@ -164,16 +200,7 @@ struct bpf_program {
size_t insns_cnt, main_prog_cnt;
enum bpf_prog_type type;
- struct reloc_desc {
- enum {
- RELO_LD64,
- RELO_CALL,
- RELO_DATA,
- } type;
- int insn_idx;
- int map_idx;
- int sym_off;
- } *reloc_desc;
+ struct reloc_desc *reloc_desc;
int nr_reloc;
int log_level;
@@ -202,22 +229,29 @@ struct bpf_program {
__u32 prog_flags;
};
+#define DATA_SEC ".data"
+#define BSS_SEC ".bss"
+#define RODATA_SEC ".rodata"
+#define KCONFIG_SEC ".kconfig"
+
enum libbpf_map_type {
LIBBPF_MAP_UNSPEC,
LIBBPF_MAP_DATA,
LIBBPF_MAP_BSS,
LIBBPF_MAP_RODATA,
+ LIBBPF_MAP_KCONFIG,
};
static const char * const libbpf_type_to_btf_name[] = {
- [LIBBPF_MAP_DATA] = ".data",
- [LIBBPF_MAP_BSS] = ".bss",
- [LIBBPF_MAP_RODATA] = ".rodata",
+ [LIBBPF_MAP_DATA] = DATA_SEC,
+ [LIBBPF_MAP_BSS] = BSS_SEC,
+ [LIBBPF_MAP_RODATA] = RODATA_SEC,
+ [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC,
};
struct bpf_map {
- int fd;
char *name;
+ int fd;
int sec_idx;
size_t sec_offset;
int map_ifindex;
@@ -228,14 +262,32 @@ struct bpf_map {
void *priv;
bpf_map_clear_priv_t clear_priv;
enum libbpf_map_type libbpf_type;
+ void *mmaped;
char *pin_path;
bool pinned;
bool reused;
};
-struct bpf_secdata {
- void *rodata;
- void *data;
+enum extern_type {
+ EXT_UNKNOWN,
+ EXT_CHAR,
+ EXT_BOOL,
+ EXT_INT,
+ EXT_TRISTATE,
+ EXT_CHAR_ARR,
+};
+
+struct extern_desc {
+ const char *name;
+ int sym_idx;
+ int btf_id;
+ enum extern_type type;
+ int sz;
+ int align;
+ int data_off;
+ bool is_signed;
+ bool is_weak;
+ bool is_set;
};
static LIST_HEAD(bpf_objects_list);
@@ -250,7 +302,11 @@ struct bpf_object {
struct bpf_map *maps;
size_t nr_maps;
size_t maps_cap;
- struct bpf_secdata sections;
+
+ char *kconfig;
+ struct extern_desc *externs;
+ int nr_extern;
+ int kconfig_map_idx;
bool loaded;
bool has_pseudo_calls;
@@ -279,6 +335,7 @@ struct bpf_object {
int maps_shndx;
int btf_maps_shndx;
int text_shndx;
+ int symbols_shndx;
int data_shndx;
int rodata_shndx;
int bss_shndx;
@@ -550,6 +607,7 @@ static struct bpf_object *bpf_object__new(const char *path,
obj->efile.data_shndx = -1;
obj->efile.rodata_shndx = -1;
obj->efile.bss_shndx = -1;
+ obj->kconfig_map_idx = -1;
obj->kern_version = get_kernel_version();
obj->loaded = false;
@@ -748,13 +806,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name,
*size = 0;
if (!name) {
return -EINVAL;
- } else if (!strcmp(name, ".data")) {
+ } else if (!strcmp(name, DATA_SEC)) {
if (obj->efile.data)
*size = obj->efile.data->d_size;
- } else if (!strcmp(name, ".bss")) {
+ } else if (!strcmp(name, BSS_SEC)) {
if (obj->efile.bss)
*size = obj->efile.bss->d_size;
- } else if (!strcmp(name, ".rodata")) {
+ } else if (!strcmp(name, RODATA_SEC)) {
if (obj->efile.rodata)
*size = obj->efile.rodata->d_size;
} else {
@@ -835,13 +893,38 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
return &obj->maps[obj->nr_maps++];
}
+static size_t bpf_map_mmap_sz(const struct bpf_map *map)
+{
+ long page_sz = sysconf(_SC_PAGE_SIZE);
+ size_t map_sz;
+
+ map_sz = roundup(map->def.value_size, 8) * map->def.max_entries;
+ map_sz = roundup(map_sz, page_sz);
+ return map_sz;
+}
+
+static char *internal_map_name(struct bpf_object *obj,
+ enum libbpf_map_type type)
+{
+ char map_name[BPF_OBJ_NAME_LEN];
+ const char *sfx = libbpf_type_to_btf_name[type];
+ int sfx_len = max((size_t)7, strlen(sfx));
+ int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1,
+ strlen(obj->name));
+
+ snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
+ sfx_len, libbpf_type_to_btf_name[type]);
+
+ return strdup(map_name);
+}
+
static int
bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
- int sec_idx, Elf_Data *data, void **data_buff)
+ int sec_idx, void *data, size_t data_sz)
{
- char map_name[BPF_OBJ_NAME_LEN];
struct bpf_map_def *def;
struct bpf_map *map;
+ int err;
map = bpf_object__add_map(obj);
if (IS_ERR(map))
@@ -850,9 +933,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
map->libbpf_type = type;
map->sec_idx = sec_idx;
map->sec_offset = 0;
- snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
- libbpf_type_to_btf_name[type]);
- map->name = strdup(map_name);
+ map->name = internal_map_name(obj, type);
if (!map->name) {
pr_warn("failed to alloc map name\n");
return -ENOMEM;
@@ -861,25 +942,29 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
def = &map->def;
def->type = BPF_MAP_TYPE_ARRAY;
def->key_size = sizeof(int);
- def->value_size = data->d_size;
+ def->value_size = data_sz;
def->max_entries = 1;
- def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
- if (obj->caps.array_mmap)
- def->map_flags |= BPF_F_MMAPABLE;
+ def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
+ ? BPF_F_RDONLY_PROG : 0;
+ def->map_flags |= BPF_F_MMAPABLE;
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
- map_name, map->sec_idx, map->sec_offset, def->map_flags);
+ map->name, map->sec_idx, map->sec_offset, def->map_flags);
- if (data_buff) {
- *data_buff = malloc(data->d_size);
- if (!*data_buff) {
- zfree(&map->name);
- pr_warn("failed to alloc map content buffer\n");
- return -ENOMEM;
- }
- memcpy(*data_buff, data->d_buf, data->d_size);
+ map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (map->mmaped == MAP_FAILED) {
+ err = -errno;
+ map->mmaped = NULL;
+ pr_warn("failed to alloc map '%s' content buffer: %d\n",
+ map->name, err);
+ zfree(&map->name);
+ return err;
}
+ if (data)
+ memcpy(map->mmaped, data, data_sz);
+
pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
return 0;
}
@@ -888,37 +973,332 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
{
int err;
- if (!obj->caps.global_data)
- return 0;
/*
* Populate obj->maps with libbpf internal maps.
*/
if (obj->efile.data_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
obj->efile.data_shndx,
- obj->efile.data,
- &obj->sections.data);
+ obj->efile.data->d_buf,
+ obj->efile.data->d_size);
if (err)
return err;
}
if (obj->efile.rodata_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
obj->efile.rodata_shndx,
- obj->efile.rodata,
- &obj->sections.rodata);
+ obj->efile.rodata->d_buf,
+ obj->efile.rodata->d_size);
if (err)
return err;
}
if (obj->efile.bss_shndx >= 0) {
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
obj->efile.bss_shndx,
- obj->efile.bss, NULL);
+ NULL,
+ obj->efile.bss->d_size);
if (err)
return err;
}
return 0;
}
+
+static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
+ const void *name)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ if (strcmp(obj->externs[i].name, name) == 0)
+ return &obj->externs[i];
+ }
+ return NULL;
+}
+
+static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
+ char value)
+{
+ switch (ext->type) {
+ case EXT_BOOL:
+ if (value == 'm') {
+ pr_warn("extern %s=%c should be tristate or char\n",
+ ext->name, value);
+ return -EINVAL;
+ }
+ *(bool *)ext_val = value == 'y' ? true : false;
+ break;
+ case EXT_TRISTATE:
+ if (value == 'y')
+ *(enum libbpf_tristate *)ext_val = TRI_YES;
+ else if (value == 'm')
+ *(enum libbpf_tristate *)ext_val = TRI_MODULE;
+ else /* value == 'n' */
+ *(enum libbpf_tristate *)ext_val = TRI_NO;
+ break;
+ case EXT_CHAR:
+ *(char *)ext_val = value;
+ break;
+ case EXT_UNKNOWN:
+ case EXT_INT:
+ case EXT_CHAR_ARR:
+ default:
+ pr_warn("extern %s=%c should be bool, tristate, or char\n",
+ ext->name, value);
+ return -EINVAL;
+ }
+ ext->is_set = true;
+ return 0;
+}
+
+static int set_ext_value_str(struct extern_desc *ext, char *ext_val,
+ const char *value)
+{
+ size_t len;
+
+ if (ext->type != EXT_CHAR_ARR) {
+ pr_warn("extern %s=%s should char array\n", ext->name, value);
+ return -EINVAL;
+ }
+
+ len = strlen(value);
+ if (value[len - 1] != '"') {
+ pr_warn("extern '%s': invalid string config '%s'\n",
+ ext->name, value);
+ return -EINVAL;
+ }
+
+ /* strip quotes */
+ len -= 2;
+ if (len >= ext->sz) {
+ pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+ ext->name, value, len, ext->sz - 1);
+ len = ext->sz - 1;
+ }
+ memcpy(ext_val, value + 1, len);
+ ext_val[len] = '\0';
+ ext->is_set = true;
+ return 0;
+}
+
+static int parse_u64(const char *value, __u64 *res)
+{
+ char *value_end;
+ int err;
+
+ errno = 0;
+ *res = strtoull(value, &value_end, 0);
+ if (errno) {
+ err = -errno;
+ pr_warn("failed to parse '%s' as integer: %d\n", value, err);
+ return err;
+ }
+ if (*value_end) {
+ pr_warn("failed to parse '%s' as integer completely\n", value);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
+{
+ int bit_sz = ext->sz * 8;
+
+ if (ext->sz == 8)
+ return true;
+
+ /* Validate that value stored in u64 fits in integer of `ext->sz`
+ * bytes size without any loss of information. If the target integer
+ * is signed, we rely on the following limits of integer type of
+ * Y bits and subsequent transformation:
+ *
+ * -2^(Y-1) <= X <= 2^(Y-1) - 1
+ * 0 <= X + 2^(Y-1) <= 2^Y - 1
+ * 0 <= X + 2^(Y-1) < 2^Y
+ *
+ * For unsigned target integer, check that all the (64 - Y) bits are
+ * zero.
+ */
+ if (ext->is_signed)
+ return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
+ else
+ return (v >> bit_sz) == 0;
+}
+
+static int set_ext_value_num(struct extern_desc *ext, void *ext_val,
+ __u64 value)
+{
+ if (ext->type != EXT_INT && ext->type != EXT_CHAR) {
+ pr_warn("extern %s=%llu should be integer\n",
+ ext->name, (unsigned long long)value);
+ return -EINVAL;
+ }
+ if (!is_ext_value_in_range(ext, value)) {
+ pr_warn("extern %s=%llu value doesn't fit in %d bytes\n",
+ ext->name, (unsigned long long)value, ext->sz);
+ return -ERANGE;
+ }
+ switch (ext->sz) {
+ case 1: *(__u8 *)ext_val = value; break;
+ case 2: *(__u16 *)ext_val = value; break;
+ case 4: *(__u32 *)ext_val = value; break;
+ case 8: *(__u64 *)ext_val = value; break;
+ default:
+ return -EINVAL;
+ }
+ ext->is_set = true;
+ return 0;
+}
+
+static int bpf_object__process_kconfig_line(struct bpf_object *obj,
+ char *buf, void *data)
+{
+ struct extern_desc *ext;
+ char *sep, *value;
+ int len, err = 0;
+ void *ext_val;
+ __u64 num;
+
+ if (strncmp(buf, "CONFIG_", 7))
+ return 0;
+
+ sep = strchr(buf, '=');
+ if (!sep) {
+ pr_warn("failed to parse '%s': no separator\n", buf);
+ return -EINVAL;
+ }
+
+ /* Trim ending '\n' */
+ len = strlen(buf);
+ if (buf[len - 1] == '\n')
+ buf[len - 1] = '\0';
+ /* Split on '=' and ensure that a value is present. */
+ *sep = '\0';
+ if (!sep[1]) {
+ *sep = '=';
+ pr_warn("failed to parse '%s': no value\n", buf);
+ return -EINVAL;
+ }
+
+ ext = find_extern_by_name(obj, buf);
+ if (!ext || ext->is_set)
+ return 0;
+
+ ext_val = data + ext->data_off;
+ value = sep + 1;
+
+ switch (*value) {
+ case 'y': case 'n': case 'm':
+ err = set_ext_value_tri(ext, ext_val, *value);
+ break;
+ case '"':
+ err = set_ext_value_str(ext, ext_val, value);
+ break;
+ default:
+ /* assume integer */
+ err = parse_u64(value, &num);
+ if (err) {
+ pr_warn("extern %s=%s should be integer\n",
+ ext->name, value);
+ return err;
+ }
+ err = set_ext_value_num(ext, ext_val, num);
+ break;
+ }
+ if (err)
+ return err;
+ pr_debug("extern %s=%s\n", ext->name, value);
+ return 0;
+}
+
+static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
+{
+ char buf[PATH_MAX];
+ struct utsname uts;
+ int len, err = 0;
+ gzFile file;
+
+ uname(&uts);
+ len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ /* gzopen also accepts uncompressed files. */
+ file = gzopen(buf, "r");
+ if (!file)
+ file = gzopen("/proc/config.gz", "r");
+
+ if (!file) {
+ pr_warn("failed to open system Kconfig\n");
+ return -ENOENT;
+ }
+
+ while (gzgets(file, buf, sizeof(buf))) {
+ err = bpf_object__process_kconfig_line(obj, buf, data);
+ if (err) {
+ pr_warn("error parsing system Kconfig line '%s': %d\n",
+ buf, err);
+ goto out;
+ }
+ }
+
+out:
+ gzclose(file);
+ return err;
+}
+
+static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
+ const char *config, void *data)
+{
+ char buf[PATH_MAX];
+ int err = 0;
+ FILE *file;
+
+ file = fmemopen((void *)config, strlen(config), "r");
+ if (!file) {
+ err = -errno;
+ pr_warn("failed to open in-memory Kconfig: %d\n", err);
+ return err;
+ }
+
+ while (fgets(buf, sizeof(buf), file)) {
+ err = bpf_object__process_kconfig_line(obj, buf, data);
+ if (err) {
+ pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
+ buf, err);
+ break;
+ }
+ }
+
+ fclose(file);
+ return err;
+}
+
+static int bpf_object__init_kconfig_map(struct bpf_object *obj)
+{
+ struct extern_desc *last_ext;
+ size_t map_sz;
+ int err;
+
+ if (obj->nr_extern == 0)
+ return 0;
+
+ last_ext = &obj->externs[obj->nr_extern - 1];
+ map_sz = last_ext->data_off + last_ext->sz;
+
+ err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
+ obj->efile.symbols_shndx,
+ NULL, map_sz);
+ if (err)
+ return err;
+
+ obj->kconfig_map_idx = obj->nr_maps - 1;
+
+ return 0;
+}
+
static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
{
Elf_Data *symbols = obj->efile.symbols;
@@ -1242,15 +1622,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
}
sz = btf__resolve_size(obj->btf, t->type);
if (sz < 0) {
- pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n",
- map_name, t->type, sz);
+ pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found key [%u], sz = %lld.\n",
- map_name, t->type, sz);
+ pr_debug("map '%s': found key [%u], sz = %zd.\n",
+ map_name, t->type, (ssize_t)sz);
if (map->def.key_size && map->def.key_size != sz) {
- pr_warn("map '%s': conflicting key size %u != %lld.\n",
- map_name, map->def.key_size, sz);
+ pr_warn("map '%s': conflicting key size %u != %zd.\n",
+ map_name, map->def.key_size, (ssize_t)sz);
return -EINVAL;
}
map->def.key_size = sz;
@@ -1285,15 +1665,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
}
sz = btf__resolve_size(obj->btf, t->type);
if (sz < 0) {
- pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n",
- map_name, t->type, sz);
+ pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found value [%u], sz = %lld.\n",
- map_name, t->type, sz);
+ pr_debug("map '%s': found value [%u], sz = %zd.\n",
+ map_name, t->type, (ssize_t)sz);
if (map->def.value_size && map->def.value_size != sz) {
- pr_warn("map '%s': conflicting value size %u != %lld.\n",
- map_name, map->def.value_size, sz);
+ pr_warn("map '%s': conflicting value size %u != %zd.\n",
+ map_name, map->def.value_size, (ssize_t)sz);
return -EINVAL;
}
map->def.value_size = sz;
@@ -1393,21 +1773,20 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
return 0;
}
-static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps,
- const char *pin_root_path)
+static int bpf_object__init_maps(struct bpf_object *obj,
+ const struct bpf_object_open_opts *opts)
{
- bool strict = !relaxed_maps;
+ const char *pin_root_path;
+ bool strict;
int err;
- err = bpf_object__init_user_maps(obj, strict);
- if (err)
- return err;
-
- err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
- if (err)
- return err;
+ strict = !OPTS_GET(opts, relaxed_maps, false);
+ pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
- err = bpf_object__init_global_data_maps(obj);
+ err = bpf_object__init_user_maps(obj, strict);
+ err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
+ err = err ?: bpf_object__init_global_data_maps(obj);
+ err = err ?: bpf_object__init_kconfig_map(obj);
if (err)
return err;
@@ -1509,7 +1888,8 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
{
- return obj->efile.btf_maps_shndx >= 0;
+ return obj->efile.btf_maps_shndx >= 0 ||
+ obj->nr_extern > 0;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@@ -1526,11 +1906,6 @@ static int bpf_object__init_btf(struct bpf_object *obj,
BTF_ELF_SEC, err);
goto out;
}
- err = btf__finalize_data(obj, obj->btf);
- if (err) {
- pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
- goto out;
- }
}
if (btf_ext_data) {
if (!obj->btf) {
@@ -1564,6 +1939,30 @@ out:
return 0;
}
+static int bpf_object__finalize_btf(struct bpf_object *obj)
+{
+ int err;
+
+ if (!obj->btf)
+ return 0;
+
+ err = btf__finalize_data(obj, obj->btf);
+ if (!err)
+ return 0;
+
+ pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+ btf__free(obj->btf);
+ obj->btf = NULL;
+ btf_ext__free(obj->btf_ext);
+ obj->btf_ext = NULL;
+
+ if (bpf_object__is_btf_mandatory(obj)) {
+ pr_warn("BTF is required, but is missing or corrupted.\n");
+ return -ENOENT;
+ }
+ return 0;
+}
+
static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
{
int err = 0;
@@ -1592,8 +1991,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
return 0;
}
-static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
- const char *pin_root_path)
+static int bpf_object__elf_collect(struct bpf_object *obj)
{
Elf *elf = obj->efile.elf;
GElf_Ehdr *ep = &obj->efile.ehdr;
@@ -1665,6 +2063,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
return -LIBBPF_ERRNO__FORMAT;
}
obj->efile.symbols = data;
+ obj->efile.symbols_shndx = idx;
obj->efile.strtabidx = sh.sh_link;
} else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
if (sh.sh_flags & SHF_EXECINSTR) {
@@ -1683,10 +2082,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
name, obj->path, cp);
return err;
}
- } else if (strcmp(name, ".data") == 0) {
+ } else if (strcmp(name, DATA_SEC) == 0) {
obj->efile.data = data;
obj->efile.data_shndx = idx;
- } else if (strcmp(name, ".rodata") == 0) {
+ } else if (strcmp(name, RODATA_SEC) == 0) {
obj->efile.rodata = data;
obj->efile.rodata_shndx = idx;
} else {
@@ -1716,7 +2115,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
obj->efile.reloc_sects[nr_sects].shdr = sh;
obj->efile.reloc_sects[nr_sects].data = data;
- } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
+ } else if (sh.sh_type == SHT_NOBITS &&
+ strcmp(name, BSS_SEC) == 0) {
obj->efile.bss = data;
obj->efile.bss_shndx = idx;
} else {
@@ -1728,14 +2128,217 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps,
pr_warn("Corrupted ELF file: index of strtab invalid\n");
return -LIBBPF_ERRNO__FORMAT;
}
- err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
- if (!err)
- err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path);
- if (!err)
- err = bpf_object__sanitize_and_load_btf(obj);
- if (!err)
- err = bpf_object__init_prog_names(obj);
- return err;
+ return bpf_object__init_btf(obj, btf_data, btf_ext_data);
+}
+
+static bool sym_is_extern(const GElf_Sym *sym)
+{
+ int bind = GELF_ST_BIND(sym->st_info);
+ /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
+ return sym->st_shndx == SHN_UNDEF &&
+ (bind == STB_GLOBAL || bind == STB_WEAK) &&
+ GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
+}
+
+static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
+{
+ const struct btf_type *t;
+ const char *var_name;
+ int i, n;
+
+ if (!btf)
+ return -ESRCH;
+
+ n = btf__get_nr_types(btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_var(t))
+ continue;
+
+ var_name = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(var_name, ext_name))
+ continue;
+
+ if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+static enum extern_type find_extern_type(const struct btf *btf, int id,
+ bool *is_signed)
+{
+ const struct btf_type *t;
+ const char *name;
+
+ t = skip_mods_and_typedefs(btf, id, NULL);
+ name = btf__name_by_offset(btf, t->name_off);
+
+ if (is_signed)
+ *is_signed = false;
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT: {
+ int enc = btf_int_encoding(t);
+
+ if (enc & BTF_INT_BOOL)
+ return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN;
+ if (is_signed)
+ *is_signed = enc & BTF_INT_SIGNED;
+ if (t->size == 1)
+ return EXT_CHAR;
+ if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
+ return EXT_UNKNOWN;
+ return EXT_INT;
+ }
+ case BTF_KIND_ENUM:
+ if (t->size != 4)
+ return EXT_UNKNOWN;
+ if (strcmp(name, "libbpf_tristate"))
+ return EXT_UNKNOWN;
+ return EXT_TRISTATE;
+ case BTF_KIND_ARRAY:
+ if (btf_array(t)->nelems == 0)
+ return EXT_UNKNOWN;
+ if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR)
+ return EXT_UNKNOWN;
+ return EXT_CHAR_ARR;
+ default:
+ return EXT_UNKNOWN;
+ }
+}
+
+static int cmp_externs(const void *_a, const void *_b)
+{
+ const struct extern_desc *a = _a;
+ const struct extern_desc *b = _b;
+
+ /* descending order by alignment requirements */
+ if (a->align != b->align)
+ return a->align > b->align ? -1 : 1;
+ /* ascending order by size, within same alignment class */
+ if (a->sz != b->sz)
+ return a->sz < b->sz ? -1 : 1;
+ /* resolve ties by name */
+ return strcmp(a->name, b->name);
+}
+
+static int bpf_object__collect_externs(struct bpf_object *obj)
+{
+ const struct btf_type *t;
+ struct extern_desc *ext;
+ int i, n, off, btf_id;
+ struct btf_type *sec;
+ const char *ext_name;
+ Elf_Scn *scn;
+ GElf_Shdr sh;
+
+ if (!obj->efile.symbols)
+ return 0;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx);
+ if (!scn)
+ return -LIBBPF_ERRNO__FORMAT;
+ if (gelf_getshdr(scn, &sh) != &sh)
+ return -LIBBPF_ERRNO__FORMAT;
+ n = sh.sh_size / sh.sh_entsize;
+
+ pr_debug("looking for externs among %d symbols...\n", n);
+ for (i = 0; i < n; i++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(obj->efile.symbols, i, &sym))
+ return -LIBBPF_ERRNO__FORMAT;
+ if (!sym_is_extern(&sym))
+ continue;
+ ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name);
+ if (!ext_name || !ext_name[0])
+ continue;
+
+ ext = obj->externs;
+ ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
+ if (!ext)
+ return -ENOMEM;
+ obj->externs = ext;
+ ext = &ext[obj->nr_extern];
+ memset(ext, 0, sizeof(*ext));
+ obj->nr_extern++;
+
+ ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
+ if (ext->btf_id <= 0) {
+ pr_warn("failed to find BTF for extern '%s': %d\n",
+ ext_name, ext->btf_id);
+ return ext->btf_id;
+ }
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ ext->name = btf__name_by_offset(obj->btf, t->name_off);
+ ext->sym_idx = i;
+ ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
+ ext->sz = btf__resolve_size(obj->btf, t->type);
+ if (ext->sz <= 0) {
+ pr_warn("failed to resolve size of extern '%s': %d\n",
+ ext_name, ext->sz);
+ return ext->sz;
+ }
+ ext->align = btf__align_of(obj->btf, t->type);
+ if (ext->align <= 0) {
+ pr_warn("failed to determine alignment of extern '%s': %d\n",
+ ext_name, ext->align);
+ return -EINVAL;
+ }
+ ext->type = find_extern_type(obj->btf, t->type,
+ &ext->is_signed);
+ if (ext->type == EXT_UNKNOWN) {
+ pr_warn("extern '%s' type is unsupported\n", ext_name);
+ return -ENOTSUP;
+ }
+ }
+ pr_debug("collected %d externs total\n", obj->nr_extern);
+
+ if (!obj->nr_extern)
+ return 0;
+
+ /* sort externs by (alignment, size, name) and calculate their offsets
+ * within a map */
+ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
+ off = 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ ext->data_off = roundup(off, ext->align);
+ off = ext->data_off + ext->sz;
+ pr_debug("extern #%d: symbol %d, off %u, name %s\n",
+ i, ext->sym_idx, ext->data_off, ext->name);
+ }
+
+ btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC);
+ if (btf_id <= 0) {
+ pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC);
+ return -ESRCH;
+ }
+
+ sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id);
+ sec->size = off;
+ n = btf_vlen(sec);
+ for (i = 0; i < n; i++) {
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+
+ t = btf__type_by_id(obj->btf, vs->type);
+ ext_name = btf__name_by_offset(obj->btf, t->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ vs->offset = ext->data_off;
+ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ }
+
+ return 0;
}
static struct bpf_program *
@@ -1765,6 +2368,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
return NULL;
}
+struct bpf_program *
+bpf_object__find_program_by_name(const struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_program *prog;
+
+ bpf_object__for_each_program(prog, obj) {
+ if (!strcmp(prog->name, name))
+ return prog;
+ }
+ return NULL;
+}
+
static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
int shndx)
{
@@ -1789,6 +2405,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
return LIBBPF_MAP_BSS;
else if (shndx == obj->efile.rodata_shndx)
return LIBBPF_MAP_RODATA;
+ else if (shndx == obj->efile.symbols_shndx)
+ return LIBBPF_MAP_KCONFIG;
else
return LIBBPF_MAP_UNSPEC;
}
@@ -1817,7 +2435,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return -LIBBPF_ERRNO__RELOC;
}
if (sym->st_value % 8) {
- pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value);
+ pr_warn("bad call relo offset: %zu\n",
+ (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_CALL;
@@ -1832,6 +2451,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
}
+
+ if (sym_is_extern(sym)) {
+ int sym_idx = GELF_R_SYM(rel->r_info);
+ int i, n = obj->nr_extern;
+ struct extern_desc *ext;
+
+ for (i = 0; i < n; i++) {
+ ext = &obj->externs[i];
+ if (ext->sym_idx == sym_idx)
+ break;
+ }
+ if (i >= n) {
+ pr_warn("extern relo failed to find extern for sym %d\n",
+ sym_idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n",
+ i, ext->name, ext->sym_idx, ext->data_off, insn_idx);
+ reloc_desc->type = RELO_EXTERN;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = ext->data_off;
+ return 0;
+ }
+
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n",
name, shdr_idx);
@@ -1859,8 +2502,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
break;
}
if (map_idx >= nr_maps) {
- pr_warn("map relo failed to find map for sec %u, off %llu\n",
- shdr_idx, (__u64)sym->st_value);
+ pr_warn("map relo failed to find map for sec %u, off %zu\n",
+ shdr_idx, (size_t)sym->st_value);
return -LIBBPF_ERRNO__RELOC;
}
reloc_desc->type = RELO_LD64;
@@ -1875,11 +2518,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
pr_warn("bad data relo against section %u\n", shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
- if (!obj->caps.global_data) {
- pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n",
- name, insn_idx);
- return -LIBBPF_ERRNO__RELOC;
- }
for (map_idx = 0; map_idx < nr_maps; map_idx++) {
map = &obj->maps[map_idx];
if (map->libbpf_type != type)
@@ -1941,9 +2579,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
sym.st_name) ? : "<?>";
- pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
- (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info),
- (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info),
+ pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n",
+ (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info),
+ (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info),
GELF_ST_BIND(sym.st_info), sym.st_name, name,
insn_idx);
@@ -2298,29 +2936,35 @@ bpf_object__reuse_map(struct bpf_map *map)
static int
bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
{
+ enum libbpf_map_type map_type = map->libbpf_type;
char *cp, errmsg[STRERR_BUFSIZE];
int err, zero = 0;
- __u8 *data;
- /* Nothing to do here since kernel already zero-initializes .bss map. */
- if (map->libbpf_type == LIBBPF_MAP_BSS)
+ /* kernel already zero-initializes .bss map. */
+ if (map_type == LIBBPF_MAP_BSS)
return 0;
- data = map->libbpf_type == LIBBPF_MAP_DATA ?
- obj->sections.data : obj->sections.rodata;
+ err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
+ if (err) {
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("Error setting initial map(%s) contents: %s\n",
+ map->name, cp);
+ return err;
+ }
- err = bpf_map_update_elem(map->fd, &zero, data, 0);
- /* Freeze .rodata map as read-only from syscall side. */
- if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) {
+ /* Freeze .rodata and .kconfig map as read-only from syscall side. */
+ if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
err = bpf_map_freeze(map->fd);
if (err) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("Error freezing map(%s) as read-only: %s\n",
map->name, cp);
- err = 0;
+ return err;
}
}
- return err;
+ return 0;
}
static int
@@ -2411,6 +3055,7 @@ err_out:
cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("failed to create map (name: '%s'): %s(%d)\n",
map->name, cp, err);
+ pr_perm_msg(err);
for (j = 0; j < i; j++)
zclose(obj->maps[j].fd);
return err;
@@ -2536,6 +3181,21 @@ static bool str_is_empty(const char *s)
return !s || !s[0];
}
+static bool is_flex_arr(const struct btf *btf,
+ const struct bpf_core_accessor *acc,
+ const struct btf_array *arr)
+{
+ const struct btf_type *t;
+
+ /* not a flexible array, if not inside a struct or has non-zero size */
+ if (!acc->name || arr->nelems > 0)
+ return false;
+
+ /* has to be the last member of enclosing struct */
+ t = btf__type_by_id(btf, acc->type_id);
+ return acc->idx == btf_vlen(t) - 1;
+}
+
/*
* Turn bpf_field_reloc into a low- and high-level spec representation,
* validating correctness along the way, as well as calculating resulting
@@ -2573,6 +3233,7 @@ static int bpf_core_spec_parse(const struct btf *btf,
struct bpf_core_spec *spec)
{
int access_idx, parsed_len, i;
+ struct bpf_core_accessor *acc;
const struct btf_type *t;
const char *name;
__u32 id;
@@ -2620,6 +3281,7 @@ static int bpf_core_spec_parse(const struct btf *btf,
return -EINVAL;
access_idx = spec->raw_spec[i];
+ acc = &spec->spec[spec->len];
if (btf_is_composite(t)) {
const struct btf_member *m;
@@ -2637,18 +3299,23 @@ static int bpf_core_spec_parse(const struct btf *btf,
if (str_is_empty(name))
return -EINVAL;
- spec->spec[spec->len].type_id = id;
- spec->spec[spec->len].idx = access_idx;
- spec->spec[spec->len].name = name;
+ acc->type_id = id;
+ acc->idx = access_idx;
+ acc->name = name;
spec->len++;
}
id = m->type;
} else if (btf_is_array(t)) {
const struct btf_array *a = btf_array(t);
+ bool flex;
t = skip_mods_and_typedefs(btf, a->type, &id);
- if (!t || access_idx >= a->nelems)
+ if (!t)
+ return -EINVAL;
+
+ flex = is_flex_arr(btf, acc - 1, a);
+ if (!flex && access_idx >= a->nelems)
return -EINVAL;
spec->spec[spec->len].type_id = id;
@@ -2953,12 +3620,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
*/
if (i > 0) {
const struct btf_array *a;
+ bool flex;
if (!btf_is_array(targ_type))
return 0;
a = btf_array(targ_type);
- if (local_acc->idx >= a->nelems)
+ flex = is_flex_arr(targ_btf, targ_acc - 1, a);
+ if (!flex && local_acc->idx >= a->nelems)
return 0;
if (!skip_mods_and_typedefs(targ_btf, a->type,
&targ_id))
@@ -3142,11 +3811,13 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
insn = &prog->insns[insn_idx];
class = BPF_CLASS(insn->code);
- if (class == BPF_ALU || class == BPF_ALU64) {
+ switch (class) {
+ case BPF_ALU:
+ case BPF_ALU64:
if (BPF_SRC(insn->code) != BPF_K)
return -EINVAL;
if (!failed && validate && insn->imm != orig_val) {
- pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n",
+ pr_warn("prog '%s': unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
bpf_program__title(prog, false), insn_idx,
insn->imm, orig_val, new_val);
return -EINVAL;
@@ -3156,7 +3827,29 @@ static int bpf_core_reloc_insn(struct bpf_program *prog,
pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n",
bpf_program__title(prog, false), insn_idx,
failed ? " w/ failed reloc" : "", orig_val, new_val);
- } else {
+ break;
+ case BPF_LDX:
+ case BPF_ST:
+ case BPF_STX:
+ if (!failed && validate && insn->off != orig_val) {
+ pr_warn("prog '%s': unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ insn->off, orig_val, new_val);
+ return -EINVAL;
+ }
+ if (new_val > SHRT_MAX) {
+ pr_warn("prog '%s': insn #%d (LD/LDX/ST/STX) value too big: %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ new_val);
+ return -ERANGE;
+ }
+ orig_val = insn->off;
+ insn->off = new_val;
+ pr_debug("prog '%s': patched insn #%d (LD/LDX/ST/STX)%s off %u -> %u\n",
+ bpf_program__title(prog, false), insn_idx,
+ failed ? " w/ failed reloc" : "", orig_val, new_val);
+ break;
+ default:
pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n",
bpf_program__title(prog, false),
insn_idx, insn->code, insn->src_reg, insn->dst_reg,
@@ -3559,9 +4252,6 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
size_t new_cnt;
int err;
- if (relo->type != RELO_CALL)
- return -LIBBPF_ERRNO__RELOC;
-
if (prog->idx == obj->efile.text_shndx) {
pr_warn("relo in .text insn %d into off %d (insn #%d)\n",
relo->insn_idx, relo->sym_off, relo->sym_off / 8);
@@ -3623,27 +4313,37 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
for (i = 0; i < prog->nr_reloc; i++) {
struct reloc_desc *relo = &prog->reloc_desc[i];
+ struct bpf_insn *insn = &prog->insns[relo->insn_idx];
- if (relo->type == RELO_LD64 || relo->type == RELO_DATA) {
- struct bpf_insn *insn = &prog->insns[relo->insn_idx];
-
- if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
- pr_warn("relocation out of range: '%s'\n",
- prog->section_name);
- return -LIBBPF_ERRNO__RELOC;
- }
+ if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
+ pr_warn("relocation out of range: '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
- if (relo->type != RELO_DATA) {
- insn[0].src_reg = BPF_PSEUDO_MAP_FD;
- } else {
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[1].imm = insn[0].imm + relo->sym_off;
- }
+ switch (relo->type) {
+ case RELO_LD64:
+ insn[0].src_reg = BPF_PSEUDO_MAP_FD;
+ insn[0].imm = obj->maps[relo->map_idx].fd;
+ break;
+ case RELO_DATA:
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[1].imm = insn[0].imm + relo->sym_off;
insn[0].imm = obj->maps[relo->map_idx].fd;
- } else if (relo->type == RELO_CALL) {
+ break;
+ case RELO_EXTERN:
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ insn[1].imm = relo->sym_off;
+ break;
+ case RELO_CALL:
err = bpf_program__reloc_text(prog, obj, relo);
if (err)
return err;
+ break;
+ default:
+ pr_warn("relo #%d: bad relo type %d\n", i, relo->type);
+ return -EINVAL;
}
}
@@ -3778,6 +4478,7 @@ retry_load:
ret = -errno;
cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
pr_warn("load bpf program failed: %s\n", cp);
+ pr_perm_msg(ret);
if (log_buf && log_buf[0] != '\0') {
ret = -LIBBPF_ERRNO__VERIFY;
@@ -3807,11 +4508,22 @@ out:
return ret;
}
-int
-bpf_program__load(struct bpf_program *prog,
- char *license, __u32 kern_version)
+static int libbpf_find_attach_btf_id(const char *name,
+ enum bpf_attach_type attach_type,
+ __u32 attach_prog_fd);
+
+int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
{
- int err = 0, fd, i;
+ int err = 0, fd, i, btf_id;
+
+ if (prog->type == BPF_PROG_TYPE_TRACING) {
+ btf_id = libbpf_find_attach_btf_id(prog->section_name,
+ prog->expected_attach_type,
+ prog->attach_prog_fd);
+ if (btf_id <= 0)
+ return btf_id;
+ prog->attach_btf_id = btf_id;
+ }
if (prog->instances.nr < 0 || !prog->instances.fds) {
if (prog->preprocessor) {
@@ -3835,7 +4547,7 @@ bpf_program__load(struct bpf_program *prog,
prog->section_name, prog->instances.nr);
}
err = load_program(prog, prog->insns, prog->insns_cnt,
- license, kern_version, &fd);
+ license, kern_ver, &fd);
if (!err)
prog->instances.fds[0] = fd;
goto out;
@@ -3864,9 +4576,7 @@ bpf_program__load(struct bpf_program *prog,
}
err = load_program(prog, result.new_insn_ptr,
- result.new_insn_cnt,
- license, kern_version, &fd);
-
+ result.new_insn_cnt, license, kern_ver, &fd);
if (err) {
pr_warn("Loading the %dth instance of program '%s' failed\n",
i, prog->section_name);
@@ -3910,20 +4620,14 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
return 0;
}
-static int libbpf_find_attach_btf_id(const char *name,
- enum bpf_attach_type attach_type,
- __u32 attach_prog_fd);
static struct bpf_object *
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
- struct bpf_object_open_opts *opts)
+ const struct bpf_object_open_opts *opts)
{
- const char *pin_root_path;
+ const char *obj_name, *kconfig;
struct bpf_program *prog;
struct bpf_object *obj;
- const char *obj_name;
char tmp_name[64];
- bool relaxed_maps;
- __u32 attach_prog_fd;
int err;
if (elf_version(EV_CURRENT) == EV_NONE) {
@@ -3952,16 +4656,23 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
return obj;
obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false);
- relaxed_maps = OPTS_GET(opts, relaxed_maps, false);
- pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
- attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
-
- CHECK_ERR(bpf_object__elf_init(obj), err, out);
- CHECK_ERR(bpf_object__check_endianness(obj), err, out);
- CHECK_ERR(bpf_object__probe_caps(obj), err, out);
- CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path),
- err, out);
- CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
+ kconfig = OPTS_GET(opts, kconfig, NULL);
+ if (kconfig) {
+ obj->kconfig = strdup(kconfig);
+ if (!obj->kconfig)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ err = bpf_object__elf_init(obj);
+ err = err ? : bpf_object__check_endianness(obj);
+ err = err ? : bpf_object__elf_collect(obj);
+ err = err ? : bpf_object__collect_externs(obj);
+ err = err ? : bpf_object__finalize_btf(obj);
+ err = err ? : bpf_object__init_maps(obj, opts);
+ err = err ? : bpf_object__init_prog_names(obj);
+ err = err ? : bpf_object__collect_reloc(obj);
+ if (err)
+ goto out;
bpf_object__elf_finish(obj);
bpf_object__for_each_program(prog, obj) {
@@ -3978,15 +4689,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
bpf_program__set_type(prog, prog_type);
bpf_program__set_expected_attach_type(prog, attach_type);
- if (prog_type == BPF_PROG_TYPE_TRACING) {
- err = libbpf_find_attach_btf_id(prog->section_name,
- attach_type,
- attach_prog_fd);
- if (err <= 0)
- goto out;
- prog->attach_btf_id = err;
- prog->attach_prog_fd = attach_prog_fd;
- }
+ if (prog_type == BPF_PROG_TYPE_TRACING)
+ prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
}
return obj;
@@ -4026,7 +4730,7 @@ struct bpf_object *bpf_object__open(const char *path)
}
struct bpf_object *
-bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts)
+bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
{
if (!path)
return ERR_PTR(-EINVAL);
@@ -4038,7 +4742,7 @@ bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts)
struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
- struct bpf_object_open_opts *opts)
+ const struct bpf_object_open_opts *opts)
{
if (!obj_buf || obj_buf_sz == 0)
return ERR_PTR(-EINVAL);
@@ -4079,6 +4783,92 @@ int bpf_object__unload(struct bpf_object *obj)
return 0;
}
+static int bpf_object__sanitize_maps(struct bpf_object *obj)
+{
+ struct bpf_map *m;
+
+ bpf_object__for_each_map(m, obj) {
+ if (!bpf_map__is_internal(m))
+ continue;
+ if (!obj->caps.global_data) {
+ pr_warn("kernel doesn't support global data\n");
+ return -ENOTSUP;
+ }
+ if (!obj->caps.array_mmap)
+ m->def.map_flags ^= BPF_F_MMAPABLE;
+ }
+
+ return 0;
+}
+
+static int bpf_object__resolve_externs(struct bpf_object *obj,
+ const char *extra_kconfig)
+{
+ bool need_config = false;
+ struct extern_desc *ext;
+ int err, i;
+ void *data;
+
+ if (obj->nr_extern == 0)
+ return 0;
+
+ data = obj->maps[obj->kconfig_map_idx].mmaped;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+
+ if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+ void *ext_val = data + ext->data_off;
+ __u32 kver = get_kernel_version();
+
+ if (!kver) {
+ pr_warn("failed to get kernel version\n");
+ return -EINVAL;
+ }
+ err = set_ext_value_num(ext, ext_val, kver);
+ if (err)
+ return err;
+ pr_debug("extern %s=0x%x\n", ext->name, kver);
+ } else if (strncmp(ext->name, "CONFIG_", 7) == 0) {
+ need_config = true;
+ } else {
+ pr_warn("unrecognized extern '%s'\n", ext->name);
+ return -EINVAL;
+ }
+ }
+ if (need_config && extra_kconfig) {
+ err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data);
+ if (err)
+ return -EINVAL;
+ need_config = false;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (!ext->is_set) {
+ need_config = true;
+ break;
+ }
+ }
+ }
+ if (need_config) {
+ err = bpf_object__read_kconfig_file(obj, data);
+ if (err)
+ return -EINVAL;
+ }
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+
+ if (!ext->is_set && !ext->is_weak) {
+ pr_warn("extern %s (strong) not resolved\n", ext->name);
+ return -ESRCH;
+ } else if (!ext->is_set) {
+ pr_debug("extern %s (weak) not resolved, defaulting to zero\n",
+ ext->name);
+ }
+ }
+
+ return 0;
+}
+
int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
{
struct bpf_object *obj;
@@ -4097,9 +4887,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
obj->loaded = true;
- CHECK_ERR(bpf_object__create_maps(obj), err, out);
- CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out);
- CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
+ err = bpf_object__probe_caps(obj);
+ err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
+ err = err ? : bpf_object__sanitize_and_load_btf(obj);
+ err = err ? : bpf_object__sanitize_maps(obj);
+ err = err ? : bpf_object__create_maps(obj);
+ err = err ? : bpf_object__relocate(obj, attr->target_btf_path);
+ err = err ? : bpf_object__load_progs(obj, attr->log_level);
+ if (err)
+ goto out;
return 0;
out:
@@ -4670,17 +5466,26 @@ void bpf_object__close(struct bpf_object *obj)
btf_ext__free(obj->btf_ext);
for (i = 0; i < obj->nr_maps; i++) {
- zfree(&obj->maps[i].name);
- zfree(&obj->maps[i].pin_path);
- if (obj->maps[i].clear_priv)
- obj->maps[i].clear_priv(&obj->maps[i],
- obj->maps[i].priv);
- obj->maps[i].priv = NULL;
- obj->maps[i].clear_priv = NULL;
+ struct bpf_map *map = &obj->maps[i];
+
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ map->priv = NULL;
+ map->clear_priv = NULL;
+
+ if (map->mmaped) {
+ munmap(map->mmaped, bpf_map_mmap_sz(map));
+ map->mmaped = NULL;
+ }
+
+ zfree(&map->name);
+ zfree(&map->pin_path);
}
- zfree(&obj->sections.rodata);
- zfree(&obj->sections.data);
+ zfree(&obj->kconfig);
+ zfree(&obj->externs);
+ obj->nr_extern = 0;
+
zfree(&obj->maps);
obj->nr_maps = 0;
@@ -4820,6 +5625,11 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
prog->prog_ifindex = ifindex;
}
+const char *bpf_program__name(const struct bpf_program *prog)
+{
+ return prog->name;
+}
+
const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
{
const char *title;
@@ -4972,7 +5782,28 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
*/
#define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
-static const struct {
+#define SEC_DEF(sec_pfx, ptype, ...) { \
+ .sec = sec_pfx, \
+ .len = sizeof(sec_pfx) - 1, \
+ .prog_type = BPF_PROG_TYPE_##ptype, \
+ __VA_ARGS__ \
+}
+
+struct bpf_sec_def;
+
+typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+
+struct bpf_sec_def {
const char *sec;
size_t len;
enum bpf_prog_type prog_type;
@@ -4980,24 +5811,40 @@ static const struct {
bool is_attachable;
bool is_attach_btf;
enum bpf_attach_type attach_type;
-} section_names[] = {
+ attach_fn_t attach_fn;
+};
+
+static const struct bpf_sec_def section_defs[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
- BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT),
+ SEC_DEF("kprobe/", KPROBE,
+ .attach_fn = attach_kprobe),
BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE),
- BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE),
+ SEC_DEF("kretprobe/", KPROBE,
+ .attach_fn = attach_kprobe),
BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE),
BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
- BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
- BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT),
- BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
- BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT),
- BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING,
- BPF_TRACE_RAW_TP),
- BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING,
- BPF_TRACE_FENTRY),
- BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING,
- BPF_TRACE_FEXIT),
+ SEC_DEF("tracepoint/", TRACEPOINT,
+ .attach_fn = attach_tp),
+ SEC_DEF("tp/", TRACEPOINT,
+ .attach_fn = attach_tp),
+ SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT,
+ .attach_fn = attach_raw_tp),
+ SEC_DEF("raw_tp/", RAW_TRACEPOINT,
+ .attach_fn = attach_raw_tp),
+ SEC_DEF("tp_btf/", TRACING,
+ .expected_attach_type = BPF_TRACE_RAW_TP,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fentry/", TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
+ SEC_DEF("fexit/", TRACING,
+ .expected_attach_type = BPF_TRACE_FEXIT,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -5059,12 +5906,26 @@ static const struct {
#undef BPF_APROG_SEC
#undef BPF_EAPROG_SEC
#undef BPF_APROG_COMPAT
+#undef SEC_DEF
#define MAX_TYPE_NAME_SIZE 32
+static const struct bpf_sec_def *find_sec_def(const char *sec_name)
+{
+ int i, n = ARRAY_SIZE(section_defs);
+
+ for (i = 0; i < n; i++) {
+ if (strncmp(sec_name,
+ section_defs[i].sec, section_defs[i].len))
+ continue;
+ return &section_defs[i];
+ }
+ return NULL;
+}
+
static char *libbpf_get_type_names(bool attach_type)
{
- int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE;
+ int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
char *buf;
buf = malloc(len);
@@ -5073,16 +5934,16 @@ static char *libbpf_get_type_names(bool attach_type)
buf[0] = '\0';
/* Forge string buf with all available names */
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
- if (attach_type && !section_names[i].is_attachable)
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+ if (attach_type && !section_defs[i].is_attachable)
continue;
- if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) {
+ if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
free(buf);
return NULL;
}
strcat(buf, " ");
- strcat(buf, section_names[i].sec);
+ strcat(buf, section_defs[i].sec);
}
return buf;
@@ -5091,23 +5952,23 @@ static char *libbpf_get_type_names(bool attach_type)
int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
enum bpf_attach_type *expected_attach_type)
{
+ const struct bpf_sec_def *sec_def;
char *type_names;
- int i;
if (!name)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
- if (strncmp(name, section_names[i].sec, section_names[i].len))
- continue;
- *prog_type = section_names[i].prog_type;
- *expected_attach_type = section_names[i].expected_attach_type;
+ sec_def = find_sec_def(name);
+ if (sec_def) {
+ *prog_type = sec_def->prog_type;
+ *expected_attach_type = sec_def->expected_attach_type;
return 0;
}
- pr_warn("failed to guess program type from ELF section '%s'\n", name);
+
+ pr_debug("failed to guess program type from ELF section '%s'\n", name);
type_names = libbpf_get_type_names(false);
if (type_names != NULL) {
- pr_info("supported section(type) names are:%s\n", type_names);
+ pr_debug("supported section(type) names are:%s\n", type_names);
free(type_names);
}
@@ -5186,16 +6047,16 @@ static int libbpf_find_attach_btf_id(const char *name,
if (!name)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
- if (!section_names[i].is_attach_btf)
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+ if (!section_defs[i].is_attach_btf)
continue;
- if (strncmp(name, section_names[i].sec, section_names[i].len))
+ if (strncmp(name, section_defs[i].sec, section_defs[i].len))
continue;
if (attach_prog_fd)
- err = libbpf_find_prog_btf_id(name + section_names[i].len,
+ err = libbpf_find_prog_btf_id(name + section_defs[i].len,
attach_prog_fd);
else
- err = libbpf_find_vmlinux_btf_id(name + section_names[i].len,
+ err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len,
attach_type);
if (err <= 0)
pr_warn("%s is not found in vmlinux BTF\n", name);
@@ -5214,18 +6075,18 @@ int libbpf_attach_type_by_name(const char *name,
if (!name)
return -EINVAL;
- for (i = 0; i < ARRAY_SIZE(section_names); i++) {
- if (strncmp(name, section_names[i].sec, section_names[i].len))
+ for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
+ if (strncmp(name, section_defs[i].sec, section_defs[i].len))
continue;
- if (!section_names[i].is_attachable)
+ if (!section_defs[i].is_attachable)
return -EINVAL;
- *attach_type = section_names[i].attach_type;
+ *attach_type = section_defs[i].attach_type;
return 0;
}
- pr_warn("failed to guess attach type based on ELF section name '%s'\n", name);
+ pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
type_names = libbpf_get_type_names(true);
if (type_names != NULL) {
- pr_info("attachable section(type) names are:%s\n", type_names);
+ pr_debug("attachable section(type) names are:%s\n", type_names);
free(type_names);
}
@@ -5466,17 +6327,37 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
}
struct bpf_link {
+ int (*detach)(struct bpf_link *link);
int (*destroy)(struct bpf_link *link);
+ bool disconnected;
};
+/* Release "ownership" of underlying BPF resource (typically, BPF program
+ * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
+ * link, when destructed through bpf_link__destroy() call won't attempt to
+ * detach/unregisted that BPF resource. This is useful in situations where,
+ * say, attached BPF program has to outlive userspace program that attached it
+ * in the system. Depending on type of BPF program, though, there might be
+ * additional steps (like pinning BPF program in BPF FS) necessary to ensure
+ * exit of userspace program doesn't trigger automatic detachment and clean up
+ * inside the kernel.
+ */
+void bpf_link__disconnect(struct bpf_link *link)
+{
+ link->disconnected = true;
+}
+
int bpf_link__destroy(struct bpf_link *link)
{
- int err;
+ int err = 0;
if (!link)
return 0;
- err = link->destroy(link);
+ if (!link->disconnected && link->detach)
+ err = link->detach(link);
+ if (link->destroy)
+ link->destroy(link);
free(link);
return err;
@@ -5487,7 +6368,7 @@ struct bpf_link_fd {
int fd; /* hook FD */
};
-static int bpf_link__destroy_perf_event(struct bpf_link *link)
+static int bpf_link__detach_perf_event(struct bpf_link *link)
{
struct bpf_link_fd *l = (void *)link;
int err;
@@ -5519,10 +6400,10 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
return ERR_PTR(-EINVAL);
}
- link = malloc(sizeof(*link));
+ link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.destroy = &bpf_link__destroy_perf_event;
+ link->link.detach = &bpf_link__detach_perf_event;
link->fd = pfd;
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
@@ -5679,6 +6560,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
return link;
}
+static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ const char *func_name;
+ bool retprobe;
+
+ func_name = bpf_program__title(prog, false) + sec->len;
+ retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+
+ return bpf_program__attach_kprobe(prog, retprobe, func_name);
+}
+
struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
bool retprobe, pid_t pid,
const char *binary_path,
@@ -5791,7 +6684,33 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
return link;
}
-static int bpf_link__destroy_fd(struct bpf_link *link)
+static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ char *sec_name, *tp_cat, *tp_name;
+ struct bpf_link *link;
+
+ sec_name = strdup(bpf_program__title(prog, false));
+ if (!sec_name)
+ return ERR_PTR(-ENOMEM);
+
+ /* extract "tp/<category>/<name>" */
+ tp_cat = sec_name + sec->len;
+ tp_name = strchr(tp_cat, '/');
+ if (!tp_name) {
+ link = ERR_PTR(-EINVAL);
+ goto out;
+ }
+ *tp_name = '\0';
+ tp_name++;
+
+ link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
+out:
+ free(sec_name);
+ return link;
+}
+
+static int bpf_link__detach_fd(struct bpf_link *link)
{
struct bpf_link_fd *l = (void *)link;
@@ -5812,10 +6731,10 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
return ERR_PTR(-EINVAL);
}
- link = malloc(sizeof(*link));
+ link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.destroy = &bpf_link__destroy_fd;
+ link->link.detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
if (pfd < 0) {
@@ -5830,6 +6749,14 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
return (struct bpf_link *)link;
}
+static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ const char *tp_name = bpf_program__title(prog, false) + sec->len;
+
+ return bpf_program__attach_raw_tracepoint(prog, tp_name);
+}
+
struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
{
char errmsg[STRERR_BUFSIZE];
@@ -5843,10 +6770,10 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
return ERR_PTR(-EINVAL);
}
- link = malloc(sizeof(*link));
+ link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.destroy = &bpf_link__destroy_fd;
+ link->link.detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
if (pfd < 0) {
@@ -5861,6 +6788,23 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
return (struct bpf_link *)link;
}
+static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ return bpf_program__attach_trace(prog);
+}
+
+struct bpf_link *bpf_program__attach(struct bpf_program *prog)
+{
+ const struct bpf_sec_def *sec_def;
+
+ sec_def = find_sec_def(bpf_program__title(prog, false));
+ if (!sec_def || !sec_def->attach_fn)
+ return ERR_PTR(-ESRCH);
+
+ return sec_def->attach_fn(sec_def, prog);
+}
+
enum bpf_perf_event_ret
bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
void **copy_mem, size_t *copy_size,
@@ -5944,7 +6888,7 @@ struct perf_buffer {
size_t mmap_size;
struct perf_cpu_buf **cpu_bufs;
struct epoll_event *events;
- int cpu_cnt;
+ int cpu_cnt; /* number of allocated CPU buffers */
int epoll_fd; /* perf event FD */
int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
};
@@ -6078,11 +7022,13 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
struct perf_buffer_params *p)
{
+ const char *online_cpus_file = "/sys/devices/system/cpu/online";
struct bpf_map_info map = {};
char msg[STRERR_BUFSIZE];
struct perf_buffer *pb;
+ bool *online = NULL;
__u32 map_info_len;
- int err, i;
+ int err, i, j, n;
if (page_cnt & (page_cnt - 1)) {
pr_warn("page count should be power of two, but is %zu\n",
@@ -6151,20 +7097,32 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
goto error;
}
- for (i = 0; i < pb->cpu_cnt; i++) {
+ err = parse_cpu_mask_file(online_cpus_file, &online, &n);
+ if (err) {
+ pr_warn("failed to get online CPU mask: %d\n", err);
+ goto error;
+ }
+
+ for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
struct perf_cpu_buf *cpu_buf;
int cpu, map_key;
cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
+ /* in case user didn't explicitly requested particular CPUs to
+ * be attached to, skip offline/not present CPUs
+ */
+ if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
+ continue;
+
cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
if (IS_ERR(cpu_buf)) {
err = PTR_ERR(cpu_buf);
goto error;
}
- pb->cpu_bufs[i] = cpu_buf;
+ pb->cpu_bufs[j] = cpu_buf;
err = bpf_map_update_elem(pb->map_fd, &map_key,
&cpu_buf->fd, 0);
@@ -6176,21 +7134,25 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
goto error;
}
- pb->events[i].events = EPOLLIN;
- pb->events[i].data.ptr = cpu_buf;
+ pb->events[j].events = EPOLLIN;
+ pb->events[j].data.ptr = cpu_buf;
if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
- &pb->events[i]) < 0) {
+ &pb->events[j]) < 0) {
err = -errno;
pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
cpu, cpu_buf->fd,
libbpf_strerror_r(err, msg, sizeof(msg)));
goto error;
}
+ j++;
}
+ pb->cpu_cnt = j;
+ free(online);
return pb;
error:
+ free(online);
if (pb)
perf_buffer__free(pb);
return ERR_PTR(err);
@@ -6521,62 +7483,267 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
}
}
-int libbpf_num_possible_cpus(void)
+int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
{
- static const char *fcpu = "/sys/devices/system/cpu/possible";
- int len = 0, n = 0, il = 0, ir = 0;
- unsigned int start = 0, end = 0;
- int tmp_cpus = 0;
- static int cpus;
- char buf[128];
- int error = 0;
- int fd = -1;
+ int err = 0, n, len, start, end = -1;
+ bool *tmp;
- tmp_cpus = READ_ONCE(cpus);
- if (tmp_cpus > 0)
- return tmp_cpus;
+ *mask = NULL;
+ *mask_sz = 0;
+
+ /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
+ while (*s) {
+ if (*s == ',' || *s == '\n') {
+ s++;
+ continue;
+ }
+ n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
+ if (n <= 0 || n > 2) {
+ pr_warn("Failed to get CPU range %s: %d\n", s, n);
+ err = -EINVAL;
+ goto cleanup;
+ } else if (n == 1) {
+ end = start;
+ }
+ if (start < 0 || start > end) {
+ pr_warn("Invalid CPU range [%d,%d] in %s\n",
+ start, end, s);
+ err = -EINVAL;
+ goto cleanup;
+ }
+ tmp = realloc(*mask, end + 1);
+ if (!tmp) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ *mask = tmp;
+ memset(tmp + *mask_sz, 0, start - *mask_sz);
+ memset(tmp + start, 1, end - start + 1);
+ *mask_sz = end + 1;
+ s += len;
+ }
+ if (!*mask_sz) {
+ pr_warn("Empty CPU range\n");
+ return -EINVAL;
+ }
+ return 0;
+cleanup:
+ free(*mask);
+ *mask = NULL;
+ return err;
+}
+
+int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
+{
+ int fd, err = 0, len;
+ char buf[128];
fd = open(fcpu, O_RDONLY);
if (fd < 0) {
- error = errno;
- pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error));
- return -error;
+ err = -errno;
+ pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
+ return err;
}
len = read(fd, buf, sizeof(buf));
close(fd);
if (len <= 0) {
- error = len ? errno : EINVAL;
- pr_warn("Failed to read # of possible cpus from %s: %s\n",
- fcpu, strerror(error));
- return -error;
+ err = len ? -errno : -EINVAL;
+ pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
+ return err;
}
- if (len == sizeof(buf)) {
- pr_warn("File %s size overflow\n", fcpu);
- return -EOVERFLOW;
+ if (len >= sizeof(buf)) {
+ pr_warn("CPU mask is too big in file %s\n", fcpu);
+ return -E2BIG;
}
buf[len] = '\0';
- for (ir = 0, tmp_cpus = 0; ir <= len; ir++) {
- /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
- if (buf[ir] == ',' || buf[ir] == '\0') {
- buf[ir] = '\0';
- n = sscanf(&buf[il], "%u-%u", &start, &end);
- if (n <= 0) {
- pr_warn("Failed to get # CPUs from %s\n",
- &buf[il]);
- return -EINVAL;
- } else if (n == 1) {
- end = start;
- }
- tmp_cpus += end - start + 1;
- il = ir + 1;
- }
- }
- if (tmp_cpus <= 0) {
- pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);
- return -EINVAL;
+ return parse_cpu_mask_str(buf, mask, mask_sz);
+}
+
+int libbpf_num_possible_cpus(void)
+{
+ static const char *fcpu = "/sys/devices/system/cpu/possible";
+ static int cpus;
+ int err, n, i, tmp_cpus;
+ bool *mask;
+
+ tmp_cpus = READ_ONCE(cpus);
+ if (tmp_cpus > 0)
+ return tmp_cpus;
+
+ err = parse_cpu_mask_file(fcpu, &mask, &n);
+ if (err)
+ return err;
+
+ tmp_cpus = 0;
+ for (i = 0; i < n; i++) {
+ if (mask[i])
+ tmp_cpus++;
}
+ free(mask);
WRITE_ONCE(cpus, tmp_cpus);
return tmp_cpus;
}
+
+int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
+ const struct bpf_object_open_opts *opts)
+{
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
+ .object_name = s->name,
+ );
+ struct bpf_object *obj;
+ int i;
+
+ /* Attempt to preserve opts->object_name, unless overriden by user
+ * explicitly. Overwriting object name for skeletons is discouraged,
+ * as it breaks global data maps, because they contain object name
+ * prefix as their own map name prefix. When skeleton is generated,
+ * bpftool is making an assumption that this name will stay the same.
+ */
+ if (opts) {
+ memcpy(&skel_opts, opts, sizeof(*opts));
+ if (!opts->object_name)
+ skel_opts.object_name = s->name;
+ }
+
+ obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
+ if (IS_ERR(obj)) {
+ pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
+ s->name, PTR_ERR(obj));
+ return PTR_ERR(obj);
+ }
+
+ *s->obj = obj;
+
+ for (i = 0; i < s->map_cnt; i++) {
+ struct bpf_map **map = s->maps[i].map;
+ const char *name = s->maps[i].name;
+ void **mmaped = s->maps[i].mmaped;
+
+ *map = bpf_object__find_map_by_name(obj, name);
+ if (!*map) {
+ pr_warn("failed to find skeleton map '%s'\n", name);
+ return -ESRCH;
+ }
+
+ /* externs shouldn't be pre-setup from user code */
+ if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
+ *mmaped = (*map)->mmaped;
+ }
+
+ for (i = 0; i < s->prog_cnt; i++) {
+ struct bpf_program **prog = s->progs[i].prog;
+ const char *name = s->progs[i].name;
+
+ *prog = bpf_object__find_program_by_name(obj, name);
+ if (!*prog) {
+ pr_warn("failed to find skeleton program '%s'\n", name);
+ return -ESRCH;
+ }
+ }
+
+ return 0;
+}
+
+int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
+{
+ int i, err;
+
+ err = bpf_object__load(*s->obj);
+ if (err) {
+ pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
+ return err;
+ }
+
+ for (i = 0; i < s->map_cnt; i++) {
+ struct bpf_map *map = *s->maps[i].map;
+ size_t mmap_sz = bpf_map_mmap_sz(map);
+ int prot, map_fd = bpf_map__fd(map);
+ void **mmaped = s->maps[i].mmaped;
+
+ if (!mmaped)
+ continue;
+
+ if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
+ *mmaped = NULL;
+ continue;
+ }
+
+ if (map->def.map_flags & BPF_F_RDONLY_PROG)
+ prot = PROT_READ;
+ else
+ prot = PROT_READ | PROT_WRITE;
+
+ /* Remap anonymous mmap()-ed "map initialization image" as
+ * a BPF map-backed mmap()-ed memory, but preserving the same
+ * memory address. This will cause kernel to change process'
+ * page table to point to a different piece of kernel memory,
+ * but from userspace point of view memory address (and its
+ * contents, being identical at this point) will stay the
+ * same. This mapping will be released by bpf_object__close()
+ * as per normal clean up procedure, so we don't need to worry
+ * about it from skeleton's clean up perspective.
+ */
+ *mmaped = mmap(map->mmaped, mmap_sz, prot,
+ MAP_SHARED | MAP_FIXED, map_fd, 0);
+ if (*mmaped == MAP_FAILED) {
+ err = -errno;
+ *mmaped = NULL;
+ pr_warn("failed to re-mmap() map '%s': %d\n",
+ bpf_map__name(map), err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
+{
+ int i;
+
+ for (i = 0; i < s->prog_cnt; i++) {
+ struct bpf_program *prog = *s->progs[i].prog;
+ struct bpf_link **link = s->progs[i].link;
+ const struct bpf_sec_def *sec_def;
+ const char *sec_name = bpf_program__title(prog, false);
+
+ sec_def = find_sec_def(sec_name);
+ if (!sec_def || !sec_def->attach_fn)
+ continue;
+
+ *link = sec_def->attach_fn(sec_def, prog);
+ if (IS_ERR(*link)) {
+ pr_warn("failed to auto-attach program '%s': %ld\n",
+ bpf_program__name(prog), PTR_ERR(*link));
+ return PTR_ERR(*link);
+ }
+ }
+
+ return 0;
+}
+
+void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
+{
+ int i;
+
+ for (i = 0; i < s->prog_cnt; i++) {
+ struct bpf_link **link = s->progs[i].link;
+
+ if (!IS_ERR_OR_NULL(*link))
+ bpf_link__destroy(*link);
+ *link = NULL;
+ }
+}
+
+void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
+{
+ if (s->progs)
+ bpf_object__detach_skeleton(s);
+ if (s->obj)
+ bpf_object__close(*s->obj);
+ free(s->maps);
+ free(s->progs);
+ free(s);
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 0dbf4bfba0c4..fe592ef48f1b 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -17,14 +17,12 @@
#include <sys/types.h> // for size_t
#include <linux/bpf.h>
+#include "libbpf_common.h"
+
#ifdef __cplusplus
extern "C" {
#endif
-#ifndef LIBBPF_API
-#define LIBBPF_API __attribute__((visibility("default")))
-#endif
-
enum libbpf_errno {
__LIBBPF_ERRNO__START = 4000,
@@ -67,28 +65,6 @@ struct bpf_object_open_attr {
enum bpf_prog_type prog_type;
};
-/* Helper macro to declare and initialize libbpf options struct
- *
- * This dance with uninitialized declaration, followed by memset to zero,
- * followed by assignment using compound literal syntax is done to preserve
- * ability to use a nice struct field initialization syntax and **hopefully**
- * have all the padding bytes initialized to zero. It's not guaranteed though,
- * when copying literal, that compiler won't copy garbage in literal's padding
- * bytes, but that's the best way I've found and it seems to work in practice.
- *
- * Macro declares opts struct of given type and name, zero-initializes,
- * including any extra padding, it with memset() and then assigns initial
- * values provided by users in struct initializer-syntax as varargs.
- */
-#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \
- struct TYPE NAME = ({ \
- memset(&NAME, 0, sizeof(struct TYPE)); \
- (struct TYPE) { \
- .sz = sizeof(struct TYPE), \
- __VA_ARGS__ \
- }; \
- })
-
struct bpf_object_open_opts {
/* size of this struct, for forward/backward compatiblity */
size_t sz;
@@ -109,15 +85,19 @@ struct bpf_object_open_opts {
*/
const char *pin_root_path;
__u32 attach_prog_fd;
+ /* Additional kernel config content that augments and overrides
+ * system Kconfig for CONFIG_xxx externs.
+ */
+ const char *kconfig;
};
-#define bpf_object_open_opts__last_field attach_prog_fd
+#define bpf_object_open_opts__last_field kconfig
LIBBPF_API struct bpf_object *bpf_object__open(const char *path);
LIBBPF_API struct bpf_object *
-bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts);
+bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts);
LIBBPF_API struct bpf_object *
bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
- struct bpf_object_open_opts *opts);
+ const struct bpf_object_open_opts *opts);
/* deprecated bpf_object__open variants */
LIBBPF_API struct bpf_object *
@@ -126,11 +106,6 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
LIBBPF_API struct bpf_object *
bpf_object__open_xattr(struct bpf_object_open_attr *attr);
-int bpf_object__section_size(const struct bpf_object *obj, const char *name,
- __u32 *size);
-int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
- __u32 *off);
-
enum libbpf_pin_type {
LIBBPF_PIN_NONE,
/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
@@ -161,6 +136,7 @@ struct bpf_object_load_attr {
LIBBPF_API int bpf_object__load(struct bpf_object *obj);
LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);
LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
+
LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
@@ -171,6 +147,9 @@ LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj);
LIBBPF_API struct bpf_program *
bpf_object__find_program_by_title(const struct bpf_object *obj,
const char *title);
+LIBBPF_API struct bpf_program *
+bpf_object__find_program_by_name(const struct bpf_object *obj,
+ const char *name);
LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev);
#define bpf_object__for_each_safe(pos, tmp) \
@@ -214,6 +193,7 @@ LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
__u32 ifindex);
+LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
bool needs_copy);
@@ -235,9 +215,12 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
struct bpf_link;
+LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
LIBBPF_API struct bpf_link *
+bpf_program__attach(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
LIBBPF_API struct bpf_link *
bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
@@ -512,18 +495,6 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
void **copy_mem, size_t *copy_size,
bpf_perf_event_print_t fn, void *private_data);
-struct nlattr;
-typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
-int libbpf_netlink_open(unsigned int *nl_pid);
-int libbpf_nl_get_link(int sock, unsigned int nl_pid,
- libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
-int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
-int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
- libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
-int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
- libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
-
struct bpf_prog_linfo;
struct bpf_prog_info;
@@ -630,6 +601,50 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear);
*/
LIBBPF_API int libbpf_num_possible_cpus(void);
+struct bpf_map_skeleton {
+ const char *name;
+ struct bpf_map **map;
+ void **mmaped;
+};
+
+struct bpf_prog_skeleton {
+ const char *name;
+ struct bpf_program **prog;
+ struct bpf_link **link;
+};
+
+struct bpf_object_skeleton {
+ size_t sz; /* size of this struct, for forward/backward compatibility */
+
+ const char *name;
+ void *data;
+ size_t data_sz;
+
+ struct bpf_object **obj;
+
+ int map_cnt;
+ int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */
+ struct bpf_map_skeleton *maps;
+
+ int prog_cnt;
+ int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */
+ struct bpf_prog_skeleton *progs;
+};
+
+LIBBPF_API int
+bpf_object__open_skeleton(struct bpf_object_skeleton *s,
+ const struct bpf_object_open_opts *opts);
+LIBBPF_API int bpf_object__load_skeleton(struct bpf_object_skeleton *s);
+LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s);
+LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s);
+LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s);
+
+enum libbpf_tristate {
+ TRI_NO = 0,
+ TRI_YES = 1,
+ TRI_MODULE = 2,
+};
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 8ddc2c40e482..e9713a574243 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -208,3 +208,19 @@ LIBBPF_0.0.6 {
btf__find_by_name_kind;
libbpf_find_vmlinux_btf_id;
} LIBBPF_0.0.5;
+
+LIBBPF_0.0.7 {
+ global:
+ btf_dump__emit_type_decl;
+ bpf_link__disconnect;
+ bpf_object__find_program_by_name;
+ bpf_object__attach_skeleton;
+ bpf_object__destroy_skeleton;
+ bpf_object__detach_skeleton;
+ bpf_object__load_skeleton;
+ bpf_object__open_skeleton;
+ bpf_prog_attach_xattr;
+ bpf_program__attach;
+ bpf_program__name;
+ btf__align_of;
+} LIBBPF_0.0.6;
diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template
index ac17fcef2108..b45ed534bdfb 100644
--- a/tools/lib/bpf/libbpf.pc.template
+++ b/tools/lib/bpf/libbpf.pc.template
@@ -8,5 +8,5 @@ Name: libbpf
Description: BPF library
Version: @VERSION@
Libs: -L${libdir} -lbpf
-Requires.private: libelf
+Requires.private: libelf zlib
Cflags: -I${includedir}
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
new file mode 100644
index 000000000000..a23ae1ac27eb
--- /dev/null
+++ b/tools/lib/bpf/libbpf_common.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Common user-facing libbpf helpers.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+
+#ifndef __LIBBPF_LIBBPF_COMMON_H
+#define __LIBBPF_LIBBPF_COMMON_H
+
+#include <string.h>
+
+#ifndef LIBBPF_API
+#define LIBBPF_API __attribute__((visibility("default")))
+#endif
+
+/* Helper macro to declare and initialize libbpf options struct
+ *
+ * This dance with uninitialized declaration, followed by memset to zero,
+ * followed by assignment using compound literal syntax is done to preserve
+ * ability to use a nice struct field initialization syntax and **hopefully**
+ * have all the padding bytes initialized to zero. It's not guaranteed though,
+ * when copying literal, that compiler won't copy garbage in literal's padding
+ * bytes, but that's the best way I've found and it seems to work in practice.
+ *
+ * Macro declares opts struct of given type and name, zero-initializes,
+ * including any extra padding, it with memset() and then assigns initial
+ * values provided by users in struct initializer-syntax as varargs.
+ */
+#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \
+ struct TYPE NAME = ({ \
+ memset(&NAME, 0, sizeof(struct TYPE)); \
+ (struct TYPE) { \
+ .sz = sizeof(struct TYPE), \
+ __VA_ARGS__ \
+ }; \
+ })
+
+#endif /* __LIBBPF_LIBBPF_COMMON_H */
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 97ac17a64a58..8c3afbd97747 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -76,7 +76,7 @@ static inline bool libbpf_validate_opts(const char *opts,
for (i = opts_sz; i < user_sz; i++) {
if (opts[i]) {
- pr_warn("%s has non-zero extra bytes",
+ pr_warn("%s has non-zero extra bytes\n",
type_name);
return false;
}
@@ -95,9 +95,28 @@ static inline bool libbpf_validate_opts(const char *opts,
#define OPTS_GET(opts, field, fallback_value) \
(OPTS_HAS(opts, field) ? (opts)->field : fallback_value)
+int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);
+int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);
int libbpf__load_raw_btf(const char *raw_types, size_t types_len,
const char *str_sec, size_t str_len);
+int bpf_object__section_size(const struct bpf_object *obj, const char *name,
+ __u32 *size);
+int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
+ __u32 *off);
+
+struct nlattr;
+typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb);
+int libbpf_netlink_open(unsigned int *nl_pid);
+int libbpf_nl_get_link(int sock, unsigned int nl_pid,
+ libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie);
+int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie);
+int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex,
+ libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie);
+int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle,
+ libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie);
+
struct btf_ext_info {
/*
* info points to the individual info section (e.g. func_info and
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index cbb429f55062..c874c017c636 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -39,11 +39,12 @@ DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
ifeq ($(LP64), 1)
- libdir_relative = lib64
+ libdir_relative_temp = lib64
else
- libdir_relative = lib
+ libdir_relative_temp = lib
endif
+libdir_relative ?= $(libdir_relative_temp)
prefix ?= /usr/local
libdir = $(prefix)/$(libdir_relative)
man_dir = $(prefix)/share/man
@@ -97,6 +98,7 @@ EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION)
LIB_INSTALL = libtraceevent.a libtraceevent.so*
+LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL))
INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
@@ -207,10 +209,11 @@ define do_install
$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
endef
-PKG_CONFIG_FILE = libtraceevent.pc
+PKG_CONFIG_SOURCE_FILE = libtraceevent.pc
+PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE))
define do_install_pkgconfig_file
if [ -n "${pkgconfig_dir}" ]; then \
- cp -f ${PKG_CONFIG_FILE}.template ${PKG_CONFIG_FILE}; \
+ cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \
sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \
sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \
sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
index f440989fa55e..349bb81482ab 100644
--- a/tools/lib/traceevent/plugins/Makefile
+++ b/tools/lib/traceevent/plugins/Makefile
@@ -32,11 +32,12 @@ DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
ifeq ($(LP64), 1)
- libdir_relative = lib64
+ libdir_relative_tmp = lib64
else
- libdir_relative = lib
+ libdir_relative_tmp = lib
endif
+libdir_relative ?= $(libdir_relative_tmp)
prefix ?= /usr/local
libdir = $(prefix)/$(libdir_relative)
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index 6a5bb2b17039..cf95baef7b61 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -68,10 +68,11 @@ OPTIONS
-------
-i::
--input=<path>::
- Input file name.
+ Input file name, for the 'report', 'diff' and 'buildid-list' subcommands.
-o::
--output=<path>::
- Output file name.
+ Output file name, for the 'record' subcommand. Doesn't work with 'report',
+ just redirect the output to a file when using 'report'.
--host::
Collect host side performance profile.
--guest::
diff --git a/tools/perf/arch/arm/tests/regs_load.S b/tools/perf/arch/arm/tests/regs_load.S
index 6e2495cc4517..4284307d7822 100644
--- a/tools/perf/arch/arm/tests/regs_load.S
+++ b/tools/perf/arch/arm/tests/regs_load.S
@@ -37,7 +37,7 @@
.text
.type perf_regs_load,%function
-ENTRY(perf_regs_load)
+SYM_FUNC_START(perf_regs_load)
str r0, [r0, #R0]
str r1, [r0, #R1]
str r2, [r0, #R2]
@@ -56,4 +56,4 @@ ENTRY(perf_regs_load)
str lr, [r0, #PC] // store pc as lr in order to skip the call
// to this function
mov pc, lr
-ENDPROC(perf_regs_load)
+SYM_FUNC_END(perf_regs_load)
diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
index 07042511dca9..d49de40b6818 100644
--- a/tools/perf/arch/arm64/tests/regs_load.S
+++ b/tools/perf/arch/arm64/tests/regs_load.S
@@ -7,7 +7,7 @@
#define LDR_REG(r) ldr x##r, [x0, 8 * r]
#define SP (8 * 31)
#define PC (8 * 32)
-ENTRY(perf_regs_load)
+SYM_FUNC_START(perf_regs_load)
STR_REG(0)
STR_REG(1)
STR_REG(2)
@@ -44,4 +44,4 @@ ENTRY(perf_regs_load)
str x30, [x0, #PC]
LDR_REG(1)
ret
-ENDPROC(perf_regs_load)
+SYM_FUNC_END(perf_regs_load)
diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S
index bbe5a0d16e51..80f14f52e3f6 100644
--- a/tools/perf/arch/x86/tests/regs_load.S
+++ b/tools/perf/arch/x86/tests/regs_load.S
@@ -28,7 +28,7 @@
.text
#ifdef HAVE_ARCH_X86_64_SUPPORT
-ENTRY(perf_regs_load)
+SYM_FUNC_START(perf_regs_load)
movq %rax, AX(%rdi)
movq %rbx, BX(%rdi)
movq %rcx, CX(%rdi)
@@ -60,9 +60,9 @@ ENTRY(perf_regs_load)
movq %r14, R14(%rdi)
movq %r15, R15(%rdi)
ret
-ENDPROC(perf_regs_load)
+SYM_FUNC_END(perf_regs_load)
#else
-ENTRY(perf_regs_load)
+SYM_FUNC_START(perf_regs_load)
push %edi
movl 8(%esp), %edi
movl %eax, AX(%edi)
@@ -88,7 +88,7 @@ ENTRY(perf_regs_load)
movl $0, FS(%edi)
movl $0, GS(%edi)
ret
-ENDPROC(perf_regs_load)
+SYM_FUNC_END(perf_regs_load)
#endif
/*
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 9664a72a089d..7e124a7b8bfd 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -403,17 +403,6 @@ static int perf_event__repipe_tracing_data(struct perf_session *session,
return err;
}
-static int perf_event__repipe_id_index(struct perf_session *session,
- union perf_event *event)
-{
- int err;
-
- perf_event__repipe_synth(session->tool, event);
- err = perf_event__process_id_index(session, event);
-
- return err;
-}
-
static int dso__read_build_id(struct dso *dso)
{
if (dso->has_build_id)
@@ -651,7 +640,7 @@ static int __cmd_inject(struct perf_inject *inject)
inject->tool.comm = perf_event__repipe_comm;
inject->tool.namespaces = perf_event__repipe_namespaces;
inject->tool.exit = perf_event__repipe_exit;
- inject->tool.id_index = perf_event__repipe_id_index;
+ inject->tool.id_index = perf_event__process_id_index;
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
inject->tool.auxtrace = perf_event__process_auxtrace;
inject->tool.aux = perf_event__drop_aux;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b5063d3b6fd0..fb19ef63cc35 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -832,7 +832,7 @@ try_again:
if ((errno == EINVAL || errno == EBADF) &&
pos->leader != pos &&
pos->weak_group) {
- pos = perf_evlist__reset_weak_group(evlist, pos);
+ pos = perf_evlist__reset_weak_group(evlist, pos, true);
goto try_again;
}
rc = -errno;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 830d563de889..387311c67264 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -388,6 +388,14 @@ static int report__setup_sample_type(struct report *rep)
}
}
+ if (sort__mode == SORT_MODE__MEMORY) {
+ if (!is_pipe && !(sample_type & PERF_SAMPLE_DATA_SRC)) {
+ ui__error("Selected --mem-mode but no mem data. "
+ "Did you call perf record without -d?\n");
+ return -1;
+ }
+ }
+
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER)) {
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0a15253b438c..a098c2ebf4ea 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -65,6 +65,7 @@
#include "util/target.h"
#include "util/time-utils.h"
#include "util/top.h"
+#include "util/affinity.h"
#include "asm/bug.h"
#include <linux/time64.h>
@@ -265,15 +266,10 @@ static int read_single_counter(struct evsel *counter, int cpu,
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
-static int read_counter(struct evsel *counter, struct timespec *rs)
+static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
- int ncpus, cpu, thread;
-
- if (target__has_cpu(&target) && !target__has_per_thread(&target))
- ncpus = perf_evsel__nr_cpus(counter);
- else
- ncpus = 1;
+ int thread;
if (!counter->supported)
return -ENOENT;
@@ -282,40 +278,38 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
nthreads = 1;
for (thread = 0; thread < nthreads; thread++) {
- for (cpu = 0; cpu < ncpus; cpu++) {
- struct perf_counts_values *count;
-
- count = perf_counts(counter->counts, cpu, thread);
-
- /*
- * The leader's group read loads data into its group members
- * (via perf_evsel__read_counter) and sets threir count->loaded.
- */
- if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
- read_single_counter(counter, cpu, thread, rs)) {
- counter->counts->scaled = -1;
- perf_counts(counter->counts, cpu, thread)->ena = 0;
- perf_counts(counter->counts, cpu, thread)->run = 0;
- return -1;
- }
+ struct perf_counts_values *count;
- perf_counts__set_loaded(counter->counts, cpu, thread, false);
+ count = perf_counts(counter->counts, cpu, thread);
- if (STAT_RECORD) {
- if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
- pr_err("failed to write stat event\n");
- return -1;
- }
- }
+ /*
+ * The leader's group read loads data into its group members
+ * (via perf_evsel__read_counter()) and sets their count->loaded.
+ */
+ if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
+ read_single_counter(counter, cpu, thread, rs)) {
+ counter->counts->scaled = -1;
+ perf_counts(counter->counts, cpu, thread)->ena = 0;
+ perf_counts(counter->counts, cpu, thread)->run = 0;
+ return -1;
+ }
+
+ perf_counts__set_loaded(counter->counts, cpu, thread, false);
- if (verbose > 1) {
- fprintf(stat_config.output,
- "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter),
- cpu,
- count->val, count->ena, count->run);
+ if (STAT_RECORD) {
+ if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ pr_err("failed to write stat event\n");
+ return -1;
}
}
+
+ if (verbose > 1) {
+ fprintf(stat_config.output,
+ "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter),
+ cpu,
+ count->val, count->ena, count->run);
+ }
}
return 0;
@@ -324,15 +318,37 @@ static int read_counter(struct evsel *counter, struct timespec *rs)
static void read_counters(struct timespec *rs)
{
struct evsel *counter;
- int ret;
+ struct affinity affinity;
+ int i, ncpus, cpu;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+
+ ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
+ if (!target__has_cpu(&target) || target__has_per_thread(&target))
+ ncpus = 1;
+ evlist__for_each_cpu(evsel_list, i, cpu) {
+ if (i >= ncpus)
+ break;
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (!counter->err) {
+ counter->err = read_counter_cpu(counter, rs,
+ counter->cpu_iter - 1);
+ }
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evsel_list, counter) {
- ret = read_counter(counter, rs);
- if (ret)
+ if (counter->err)
pr_debug("failed to read counter %s\n", counter->name);
-
- if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+ if (counter->err == 0 && perf_stat_process_counter(&stat_config, counter))
pr_warning("failed to process counter %s\n", counter->name);
+ counter->err = 0;
}
}
@@ -420,6 +436,62 @@ static bool is_target_alive(struct target *_target,
return false;
}
+enum counter_recovery {
+ COUNTER_SKIP,
+ COUNTER_RETRY,
+ COUNTER_FATAL,
+};
+
+static enum counter_recovery stat_handle_error(struct evsel *counter)
+{
+ char msg[BUFSIZ];
+ /*
+ * PPC returns ENXIO for HW counters until 2.6.37
+ * (behavior changed with commit b0a873e).
+ */
+ if (errno == EINVAL || errno == ENOSYS ||
+ errno == ENOENT || errno == EOPNOTSUPP ||
+ errno == ENXIO) {
+ if (verbose > 0)
+ ui__warning("%s event is not supported by the kernel.\n",
+ perf_evsel__name(counter));
+ counter->supported = false;
+ /*
+ * errored is a sticky flag that means one of the counter's
+ * cpu event had a problem and needs to be reexamined.
+ */
+ counter->errored = true;
+
+ if ((counter->leader != counter) ||
+ !(counter->leader->core.nr_members > 1))
+ return COUNTER_SKIP;
+ } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ if (verbose > 0)
+ ui__warning("%s\n", msg);
+ return COUNTER_RETRY;
+ } else if (target__has_per_thread(&target) &&
+ evsel_list->core.threads &&
+ evsel_list->core.threads->err_thread != -1) {
+ /*
+ * For global --per-thread case, skip current
+ * error thread.
+ */
+ if (!thread_map__remove(evsel_list->core.threads,
+ evsel_list->core.threads->err_thread)) {
+ evsel_list->core.threads->err_thread = -1;
+ return COUNTER_RETRY;
+ }
+ }
+
+ perf_evsel__open_strerror(counter, &target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
+
+ if (child_pid != -1)
+ kill(child_pid, SIGTERM);
+ return COUNTER_FATAL;
+}
+
static int __run_perf_stat(int argc, const char **argv, int run_idx)
{
int interval = stat_config.interval;
@@ -433,6 +505,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
+ struct affinity affinity;
+ int i, cpu;
+ bool second_pass = false;
if (interval) {
ts.tv_sec = interval / USEC_PER_MSEC;
@@ -457,61 +532,104 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (group)
perf_evlist__set_leader(evsel_list);
- evlist__for_each_entry(evsel_list, counter) {
+ if (affinity__setup(&affinity) < 0)
+ return -1;
+
+ evlist__for_each_cpu (evsel_list, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (counter->reset_group || counter->errored)
+ continue;
try_again:
- if (create_perf_stat_counter(counter, &stat_config, &target) < 0) {
-
- /* Weak group failed. Reset the group. */
- if ((errno == EINVAL || errno == EBADF) &&
- counter->leader != counter &&
- counter->weak_group) {
- counter = perf_evlist__reset_weak_group(evsel_list, counter);
- goto try_again;
- }
+ if (create_perf_stat_counter(counter, &stat_config, &target,
+ counter->cpu_iter - 1) < 0) {
- /*
- * PPC returns ENXIO for HW counters until 2.6.37
- * (behavior changed with commit b0a873e).
- */
- if (errno == EINVAL || errno == ENOSYS ||
- errno == ENOENT || errno == EOPNOTSUPP ||
- errno == ENXIO) {
- if (verbose > 0)
- ui__warning("%s event is not supported by the kernel.\n",
- perf_evsel__name(counter));
- counter->supported = false;
-
- if ((counter->leader != counter) ||
- !(counter->leader->core.nr_members > 1))
- continue;
- } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
- if (verbose > 0)
- ui__warning("%s\n", msg);
- goto try_again;
- } else if (target__has_per_thread(&target) &&
- evsel_list->core.threads &&
- evsel_list->core.threads->err_thread != -1) {
/*
- * For global --per-thread case, skip current
- * error thread.
+ * Weak group failed. We cannot just undo this here
+ * because earlier CPUs might be in group mode, and the kernel
+ * doesn't support mixing group and non group reads. Defer
+ * it to later.
+ * Don't close here because we're in the wrong affinity.
*/
- if (!thread_map__remove(evsel_list->core.threads,
- evsel_list->core.threads->err_thread)) {
- evsel_list->core.threads->err_thread = -1;
+ if ((errno == EINVAL || errno == EBADF) &&
+ counter->leader != counter &&
+ counter->weak_group) {
+ perf_evlist__reset_weak_group(evsel_list, counter, false);
+ assert(counter->reset_group);
+ second_pass = true;
+ continue;
+ }
+
+ switch (stat_handle_error(counter)) {
+ case COUNTER_FATAL:
+ return -1;
+ case COUNTER_RETRY:
goto try_again;
+ case COUNTER_SKIP:
+ continue;
+ default:
+ break;
}
+
}
+ counter->supported = true;
+ }
+ }
- perf_evsel__open_strerror(counter, &target,
- errno, msg, sizeof(msg));
- ui__error("%s\n", msg);
+ if (second_pass) {
+ /*
+ * Now redo all the weak group after closing them,
+ * and also close errored counters.
+ */
- if (child_pid != -1)
- kill(child_pid, SIGTERM);
+ evlist__for_each_cpu(evsel_list, i, cpu) {
+ affinity__set(&affinity, cpu);
+ /* First close errored or weak retry */
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->reset_group && !counter->errored)
+ continue;
+ if (evsel__cpu_iter_skip_no_inc(counter, cpu))
+ continue;
+ perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
+ }
+ /* Now reopen weak */
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->reset_group && !counter->errored)
+ continue;
+ if (evsel__cpu_iter_skip(counter, cpu))
+ continue;
+ if (!counter->reset_group)
+ continue;
+try_again_reset:
+ pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
+ if (create_perf_stat_counter(counter, &stat_config, &target,
+ counter->cpu_iter - 1) < 0) {
+
+ switch (stat_handle_error(counter)) {
+ case COUNTER_FATAL:
+ return -1;
+ case COUNTER_RETRY:
+ goto try_again_reset;
+ case COUNTER_SKIP:
+ continue;
+ default:
+ break;
+ }
+ }
+ counter->supported = true;
+ }
+ }
+ }
+ affinity__cleanup(&affinity);
- return -1;
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->supported) {
+ perf_evsel__free_fd(&counter->core);
+ continue;
}
- counter->supported = true;
l = strlen(counter->unit);
if (l > stat_config.unit_width)
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index dc80044bc46f..795e353de095 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1568,9 +1568,13 @@ int cmd_top(int argc, const char **argv)
*/
status = perf_env__read_cpuid(&perf_env);
if (status) {
- pr_err("Couldn't read the cpuid for this machine: %s\n",
- str_error_r(errno, errbuf, sizeof(errbuf)));
- goto out_delete_evlist;
+ /*
+ * Some arches do not provide a get_cpuid(), so just use pr_debug, otherwise
+ * warn the user explicitely.
+ */
+ eprintf(status == ENOSYS ? 1 : 0, verbose,
+ "Couldn't read the cpuid for this machine: %s\n",
+ str_error_r(errno, errbuf, sizeof(errbuf)));
}
top.evlist->env = &perf_env;
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index a1dc16724352..68039a96c1dc 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -110,8 +110,8 @@ for i in $FILES; do
done
# diff with extra ignore lines
-check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
-check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
+check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
check include/linux/ctype.h '-I "isdigit("'
diff --git a/tools/perf/lib/cpumap.c b/tools/perf/lib/cpumap.c
index 2ca1fafa620d..f93f4e703e4c 100644
--- a/tools/perf/lib/cpumap.c
+++ b/tools/perf/lib/cpumap.c
@@ -68,14 +68,28 @@ static struct perf_cpu_map *cpu_map__default_new(void)
return cpus;
}
+static int cmp_int(const void *a, const void *b)
+{
+ return *(const int *)a - *(const int*)b;
+}
+
static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
{
size_t payload_size = nr_cpus * sizeof(int);
struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+ int i, j;
if (cpus != NULL) {
- cpus->nr = nr_cpus;
memcpy(cpus->map, tmp_cpus, payload_size);
+ qsort(cpus->map, nr_cpus, sizeof(int), cmp_int);
+ /* Remove dups */
+ j = 0;
+ for (i = 0; i < nr_cpus; i++) {
+ if (i == 0 || cpus->map[i] != cpus->map[i - 1])
+ cpus->map[j++] = cpus->map[i];
+ }
+ cpus->nr = j;
+ assert(j <= nr_cpus);
refcount_set(&cpus->refcnt, 1);
}
@@ -272,3 +286,60 @@ int perf_cpu_map__max(struct perf_cpu_map *map)
return max;
}
+
+/*
+ * Merge two cpumaps
+ *
+ * orig either gets freed and replaced with a new map, or reused
+ * with no reference count change (similar to "realloc")
+ * other has its reference count increased.
+ */
+
+struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other)
+{
+ int *tmp_cpus;
+ int tmp_len;
+ int i, j, k;
+ struct perf_cpu_map *merged;
+
+ if (!orig && !other)
+ return NULL;
+ if (!orig) {
+ perf_cpu_map__get(other);
+ return other;
+ }
+ if (!other)
+ return orig;
+ if (orig->nr == other->nr &&
+ !memcmp(orig->map, other->map, orig->nr * sizeof(int)))
+ return orig;
+
+ tmp_len = orig->nr + other->nr;
+ tmp_cpus = malloc(tmp_len * sizeof(int));
+ if (!tmp_cpus)
+ return NULL;
+
+ /* Standard merge algorithm from wikipedia */
+ i = j = k = 0;
+ while (i < orig->nr && j < other->nr) {
+ if (orig->map[i] <= other->map[j]) {
+ if (orig->map[i] == other->map[j])
+ j++;
+ tmp_cpus[k++] = orig->map[i++];
+ } else
+ tmp_cpus[k++] = other->map[j++];
+ }
+
+ while (i < orig->nr)
+ tmp_cpus[k++] = orig->map[i++];
+
+ while (j < other->nr)
+ tmp_cpus[k++] = other->map[j++];
+ assert(k <= tmp_len);
+
+ merged = cpu_map__trim_new(k, tmp_cpus);
+ free(tmp_cpus);
+ perf_cpu_map__put(orig);
+ return merged;
+}
diff --git a/tools/perf/lib/evlist.c b/tools/perf/lib/evlist.c
index 205ddbb80bc1..ae9e65aa2491 100644
--- a/tools/perf/lib/evlist.c
+++ b/tools/perf/lib/evlist.c
@@ -54,6 +54,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
perf_thread_map__put(evsel->threads);
evsel->threads = perf_thread_map__get(evlist->threads);
+ evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
}
static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
diff --git a/tools/perf/lib/evsel.c b/tools/perf/lib/evsel.c
index 5a89857b0381..4dc06289f4c7 100644
--- a/tools/perf/lib/evsel.c
+++ b/tools/perf/lib/evsel.c
@@ -114,16 +114,23 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
return err;
}
+static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)
+{
+ int thread;
+
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
+ if (FD(evsel, cpu, thread) >= 0)
+ close(FD(evsel, cpu, thread));
+ FD(evsel, cpu, thread) = -1;
+ }
+}
+
void perf_evsel__close_fd(struct perf_evsel *evsel)
{
- int cpu, thread;
+ int cpu;
for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
- for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
- if (FD(evsel, cpu, thread) >= 0)
- close(FD(evsel, cpu, thread));
- FD(evsel, cpu, thread) = -1;
- }
+ perf_evsel__close_fd_cpu(evsel, cpu);
}
void perf_evsel__free_fd(struct perf_evsel *evsel)
@@ -141,6 +148,14 @@ void perf_evsel__close(struct perf_evsel *evsel)
perf_evsel__free_fd(evsel);
}
+void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu)
+{
+ if (evsel->fd == NULL)
+ return;
+
+ perf_evsel__close_fd_cpu(evsel, cpu);
+}
+
int perf_evsel__read_size(struct perf_evsel *evsel)
{
u64 read_format = evsel->attr.read_format;
@@ -183,38 +198,61 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
}
static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
- int ioc, void *arg)
+ int ioc, void *arg,
+ int cpu)
{
- int cpu, thread;
+ int thread;
- for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
- for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
- int fd = FD(evsel, cpu, thread),
- err = ioctl(fd, ioc, arg);
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+ int fd = FD(evsel, cpu, thread),
+ err = ioctl(fd, ioc, arg);
- if (err)
- return err;
- }
+ if (err)
+ return err;
}
return 0;
}
+int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu)
+{
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu);
+}
+
int perf_evsel__enable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, 0);
+ int i;
+ int err = 0;
+
+ for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
+ err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, i);
+ return err;
+}
+
+int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu)
+{
+ return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu);
}
int perf_evsel__disable(struct perf_evsel *evsel)
{
- return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, 0);
+ int i;
+ int err = 0;
+
+ for (i = 0; i < xyarray__max_x(evsel->fd) && !err; i++)
+ err = perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, i);
+ return err;
}
int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
{
- return perf_evsel__run_ioctl(evsel,
+ int err = 0, i;
+
+ for (i = 0; i < evsel->cpus->nr && !err; i++)
+ err = perf_evsel__run_ioctl(evsel,
PERF_EVENT_IOC_SET_FILTER,
- (void *)filter);
+ (void *)filter, i);
+ return err;
}
struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
diff --git a/tools/perf/lib/include/internal/evlist.h b/tools/perf/lib/include/internal/evlist.h
index a2fbccf1922f..74dc8c3f0b66 100644
--- a/tools/perf/lib/include/internal/evlist.h
+++ b/tools/perf/lib/include/internal/evlist.h
@@ -18,6 +18,7 @@ struct perf_evlist {
int nr_entries;
bool has_user_cpus;
struct perf_cpu_map *cpus;
+ struct perf_cpu_map *all_cpus;
struct perf_thread_map *threads;
int nr_mmaps;
size_t mmap_len;
diff --git a/tools/perf/lib/include/perf/cpumap.h b/tools/perf/lib/include/perf/cpumap.h
index ac9aa497f84a..6a17ad730cbc 100644
--- a/tools/perf/lib/include/perf/cpumap.h
+++ b/tools/perf/lib/include/perf/cpumap.h
@@ -12,6 +12,8 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
+LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
+ struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);
LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);
diff --git a/tools/perf/lib/include/perf/evsel.h b/tools/perf/lib/include/perf/evsel.h
index 557f5815a9c9..c82ec39a4ad0 100644
--- a/tools/perf/lib/include/perf/evsel.h
+++ b/tools/perf/lib/include/perf/evsel.h
@@ -26,10 +26,13 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);
LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel);
+LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel);
+LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu);
LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);
LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);
LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel);
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
index 436ce33f1182..5da8296b667e 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
@@ -32,7 +32,7 @@
"EventCode": "132",
"EventName": "DTLB1_GPAGE_WRITES",
"BriefDescription": "DTLB1 Two-Gigabyte Page Writes",
- "PublicDescription": "Counter:132 Name:DTLB1_GPAGE_WRITES A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a two-gigabyte page."
+ "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a two-gigabyte page."
},
{
"Unit": "CPU-M-CF",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
index 68618152ea2c..89e070727e1b 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
@@ -4,7 +4,7 @@
"EventCode": "128",
"EventName": "L1D_RO_EXCL_WRITES",
"BriefDescription": "L1D Read-only Exclusive Writes",
- "PublicDescription": "L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+ "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
},
{
"Unit": "CPU-M-CF",
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index bc7151d639d7..45a34ce4fe89 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -297,7 +297,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
index 49c5f123d811..961fe4395758 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -115,7 +115,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index 113d19e92678..746734ce09be 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -297,7 +297,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 2ba32af9bc36..f94653229dd4 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -315,7 +315,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
index c80f16fde6d0..5402cd3120f9 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -267,7 +267,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index e501729c3dd1..832f3cb40b34 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -267,7 +267,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index e2446966b651..d69b2a8fc0bc 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -285,7 +285,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
index 9294769dec64..5f465fd81315 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -285,7 +285,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index 603ff9c2e9a1..3e909b306003 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -171,7 +171,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
index c6b485b3a2cb..50c053235752 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -171,7 +171,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index 0ca539bb60f6..e7feb60f9fa9 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -303,7 +303,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 047d7e11aa6f..21d7a0c2c2e8 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -315,7 +315,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index a3c595fba943..1692529639b0 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -54,6 +54,7 @@ perf-y += unit_number__scnprintf.o
perf-y += mem2node.o
perf-y += maps.o
perf-y += time-utils-test.o
+perf-y += genelf.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 7115aa32a51e..5f05db75cdd8 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -260,6 +260,11 @@ static struct test generic_tests[] = {
.func = test__cpu_map_print,
},
{
+ .desc = "Merge cpu map",
+ .func = test__cpu_map_merge,
+ },
+
+ {
.desc = "Probe SDT events",
.func = test__sdt_event,
},
@@ -297,6 +302,10 @@ static struct test generic_tests[] = {
.func = test__time_utils,
},
{
+ .desc = "Test jit_write_elf",
+ .func = test__jit_write_elf,
+ },
+ {
.desc = "maps__merge_in",
.func = test__maps__merge_in,
},
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 8a0d236202b0..4ac56741ac5f 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -120,3 +120,19 @@ int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_un
TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1-10,12-20,22-30,32-40"));
return 0;
}
+
+int test__cpu_map_merge(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_cpu_map *a = perf_cpu_map__new("4,2,1");
+ struct perf_cpu_map *b = perf_cpu_map__new("4,5,7");
+ struct perf_cpu_map *c = perf_cpu_map__merge(a, b);
+ char buf[100];
+
+ TEST_ASSERT_VAL("failed to merge map: bad nr", c->nr == 5);
+ cpu_map__snprint(c, buf, sizeof(buf));
+ TEST_ASSERT_VAL("failed to merge map: bad result", !strcmp(buf, "1-2,4-5,7"));
+ perf_cpu_map__put(a);
+ perf_cpu_map__put(b);
+ perf_cpu_map__put(c);
+ return 0;
+}
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index 1ee8704e2284..1e8a9f5c356d 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist)
evsel->core.attr.disabled = 1;
- err = perf_evsel__open_per_cpu(evsel, cpus);
+ err = perf_evsel__open_per_cpu(evsel, cpus, -1);
if (err) {
if (err == -EACCES)
return TEST_SKIP;
@@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist)
return -1;
}
- err = perf_evsel__open_per_cpu(evsel, cpus);
+ err = perf_evsel__open_per_cpu(evsel, cpus, -1);
if (err == -EACCES)
return TEST_SKIP;
diff --git a/tools/perf/tests/genelf.c b/tools/perf/tests/genelf.c
new file mode 100644
index 000000000000..f797f9823e89
--- /dev/null
+++ b/tools/perf/tests/genelf.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/compiler.h>
+
+#include "debug.h"
+#include "tests.h"
+
+#ifdef HAVE_JITDUMP
+#include <libelf.h>
+#include "../util/genelf.h"
+#endif
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+
+int test__jit_write_elf(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+#ifdef HAVE_JITDUMP
+ static unsigned char x86_code[] = {
+ 0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
+ 0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
+ 0xCD, 0x80 /* int $0x80 */
+ };
+ char path[PATH_MAX];
+ int fd, ret;
+
+ strcpy(path, TEMPL);
+
+ fd = mkstemp(path);
+ if (fd < 0) {
+ perror("mkstemp failed");
+ return TEST_FAIL;
+ }
+
+ pr_info("Writing jit code to: %s\n", path);
+
+ ret = jit_write_elf(fd, 0, "main", x86_code, sizeof(x86_code),
+ NULL, 0, NULL, 0, 0);
+ close(fd);
+
+ unlink(path);
+
+ return ret ? TEST_FAIL : 0;
+#else
+ return TEST_SKIP;
+#endif
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 25aea387e2bf..9a160fef47c9 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -98,6 +98,7 @@ int test__event_update(struct test *test, int subtest);
int test__event_times(struct test *test, int subtest);
int test__backward_ring_buffer(struct test *test, int subtest);
int test__cpu_map_print(struct test *test, int subtest);
+int test__cpu_map_merge(struct test *test, int subtest);
int test__sdt_event(struct test *test, int subtest);
int test__is_printable_array(struct test *test, int subtest);
int test__bitmap_print(struct test *test, int subtest);
@@ -109,6 +110,7 @@ int test__unit_number__scnprint(struct test *test, int subtest);
int test__mem2node(struct test *t, int subtest);
int test__maps__merge_in(struct test *t, int subtest);
int test__time_utils(struct test *t, int subtest);
+int test__jit_write_elf(struct test *test, int subtest);
bool test__bp_signal_is_supported(void);
bool test__bp_account_is_supported(void);
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index 1a8d3be2030e..062ca849c8fd 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -45,6 +45,7 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size,
P_FLAG(NEWPID);
P_FLAG(NEWNET);
P_FLAG(IO);
+ P_FLAG(CLEAR_SIGHAND);
#undef P_FLAG
if (flags)
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 57943f3685f8..3a442f021468 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -63,4 +63,5 @@ int cpu_map__build_map(struct perf_cpu_map *cpus, struct perf_cpu_map **res,
int cpu_map__cpu(struct perf_cpu_map *cpus, int idx);
bool cpu_map__has(struct perf_cpu_map *cpus, int cpu);
+
#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index fdce590d2278..1548237b6558 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -18,6 +18,7 @@
#include "debug.h"
#include "units.h"
#include <internal/lib.h> // page_size
+#include "affinity.h"
#include "../perf.h"
#include "asm/bug.h"
#include "bpf-event.h"
@@ -342,14 +343,63 @@ static int perf_evlist__nr_threads(struct evlist *evlist,
return perf_thread_map__nr(evlist->core.threads);
}
+void evlist__cpu_iter_start(struct evlist *evlist)
+{
+ struct evsel *pos;
+
+ /*
+ * Reset the per evsel cpu_iter. This is needed because
+ * each evsel's cpumap may have a different index space,
+ * and some operations need the index to modify
+ * the FD xyarray (e.g. open, close)
+ */
+ evlist__for_each_entry(evlist, pos)
+ pos->cpu_iter = 0;
+}
+
+bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu)
+{
+ if (ev->cpu_iter >= ev->core.cpus->nr)
+ return true;
+ if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu)
+ return true;
+ return false;
+}
+
+bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
+{
+ if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) {
+ ev->cpu_iter++;
+ return false;
+ }
+ return true;
+}
+
void evlist__disable(struct evlist *evlist)
{
struct evsel *pos;
+ struct affinity affinity;
+ int cpu, i;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+ evlist__for_each_cpu(evlist, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evlist, pos) {
+ if (evsel__cpu_iter_skip(pos, cpu))
+ continue;
+ if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__disable_cpu(pos, pos->cpu_iter - 1);
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
- if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
- evsel__disable(pos);
+ pos->disabled = true;
}
evlist->enabled = false;
@@ -358,11 +408,28 @@ void evlist__disable(struct evlist *evlist)
void evlist__enable(struct evlist *evlist)
{
struct evsel *pos;
+ struct affinity affinity;
+ int cpu, i;
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+ evlist__for_each_cpu(evlist, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evlist, pos) {
+ if (evsel__cpu_iter_skip(pos, cpu))
+ continue;
+ if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ evsel__enable_cpu(pos, pos->cpu_iter - 1);
+ }
+ }
+ affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
continue;
- evsel__enable(pos);
+ pos->disabled = false;
}
evlist->enabled = true;
@@ -1137,9 +1204,35 @@ void perf_evlist__set_selected(struct evlist *evlist,
void evlist__close(struct evlist *evlist)
{
struct evsel *evsel;
+ struct affinity affinity;
+ int cpu, i;
- evlist__for_each_entry_reverse(evlist, evsel)
- evsel__close(evsel);
+ /*
+ * With perf record core.cpus is usually NULL.
+ * Use the old method to handle this for now.
+ */
+ if (!evlist->core.cpus) {
+ evlist__for_each_entry_reverse(evlist, evsel)
+ evsel__close(evsel);
+ return;
+ }
+
+ if (affinity__setup(&affinity) < 0)
+ return;
+ evlist__for_each_cpu(evlist, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry_reverse(evlist, evsel) {
+ if (evsel__cpu_iter_skip(evsel, cpu))
+ continue;
+ perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
+ }
+ }
+ affinity__cleanup(&affinity);
+ evlist__for_each_entry_reverse(evlist, evsel) {
+ perf_evsel__free_fd(&evsel->core);
+ perf_evsel__free_id(&evsel->core);
+ }
}
static int perf_evlist__create_syswide_maps(struct evlist *evlist)
@@ -1577,7 +1670,8 @@ void perf_evlist__force_leader(struct evlist *evlist)
}
struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
- struct evsel *evsel)
+ struct evsel *evsel,
+ bool close)
{
struct evsel *c2, *leader;
bool is_open = true;
@@ -1594,10 +1688,15 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
if (c2 == evsel)
is_open = false;
if (c2->leader == leader) {
- if (is_open)
+ if (is_open && close)
perf_evsel__close(&c2->core);
c2->leader = c2;
c2->core.nr_members = 0;
+ /*
+ * Set this for all former members of the group
+ * to indicate they get reopened.
+ */
+ c2->reset_group = true;
}
}
return leader;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 3655b9ebb147..f5bd5c386df1 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -334,9 +334,17 @@ void perf_evlist__to_front(struct evlist *evlist,
#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
__evlist__for_each_entry_safe(&(evlist)->core.entries, tmp, evsel)
+#define evlist__for_each_cpu(evlist, index, cpu) \
+ evlist__cpu_iter_start(evlist); \
+ perf_cpu_map__for_each_cpu (cpu, index, (evlist)->core.all_cpus)
+
void perf_evlist__set_tracking_event(struct evlist *evlist,
struct evsel *tracking_evsel);
+void evlist__cpu_iter_start(struct evlist *evlist);
+bool evsel__cpu_iter_skip(struct evsel *ev, int cpu);
+bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu);
+
struct evsel *
perf_evlist__find_evsel_by_str(struct evlist *evlist, const char *str);
@@ -348,5 +356,6 @@ bool perf_evlist__exclude_kernel(struct evlist *evlist);
void perf_evlist__force_leader(struct evlist *evlist);
struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
- struct evsel *evsel);
+ struct evsel *evsel,
+ bool close);
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index f4dea055b080..a69e64236120 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1223,16 +1223,27 @@ int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
return perf_evsel__append_filter(evsel, "%s,%s", filter);
}
+/* Caller has to clear disabled after going through all CPUs. */
+int evsel__enable_cpu(struct evsel *evsel, int cpu)
+{
+ return perf_evsel__enable_cpu(&evsel->core, cpu);
+}
+
int evsel__enable(struct evsel *evsel)
{
int err = perf_evsel__enable(&evsel->core);
if (!err)
evsel->disabled = false;
-
return err;
}
+/* Caller has to set disabled after going through all CPUs. */
+int evsel__disable_cpu(struct evsel *evsel, int cpu)
+{
+ return perf_evsel__disable_cpu(&evsel->core, cpu);
+}
+
int evsel__disable(struct evsel *evsel)
{
int err = perf_evsel__disable(&evsel->core);
@@ -1587,8 +1598,9 @@ static int perf_event_open(struct evsel *evsel,
return fd;
}
-int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
- struct perf_thread_map *threads)
+static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads,
+ int start_cpu, int end_cpu)
{
int cpu, thread, nthreads;
unsigned long flags = PERF_FLAG_FD_CLOEXEC;
@@ -1665,7 +1677,7 @@ retry_sample_id:
display_attr(&evsel->core.attr);
- for (cpu = 0; cpu < cpus->nr; cpu++) {
+ for (cpu = start_cpu; cpu < end_cpu; cpu++) {
for (thread = 0; thread < nthreads; thread++) {
int fd, group_fd;
@@ -1843,6 +1855,12 @@ out_close:
return err;
}
+int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
+ struct perf_thread_map *threads)
+{
+ return evsel__open_cpu(evsel, cpus, threads, 0, cpus ? cpus->nr : 1);
+}
+
void evsel__close(struct evsel *evsel)
{
perf_evsel__close(&evsel->core);
@@ -1850,9 +1868,14 @@ void evsel__close(struct evsel *evsel)
}
int perf_evsel__open_per_cpu(struct evsel *evsel,
- struct perf_cpu_map *cpus)
+ struct perf_cpu_map *cpus,
+ int cpu)
{
- return evsel__open(evsel, cpus, NULL);
+ if (cpu == -1)
+ return evsel__open_cpu(evsel, cpus, NULL, 0,
+ cpus ? cpus->nr : 1);
+
+ return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
}
int perf_evsel__open_per_thread(struct evsel *evsel,
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index ddc5ee6f6592..dc14f4a823cd 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -86,6 +86,7 @@ struct evsel {
struct list_head config_terms;
struct bpf_object *bpf_obj;
int bpf_fd;
+ int err;
bool auto_merge_stats;
bool merged_stat;
const char * metric_expr;
@@ -94,7 +95,10 @@ struct evsel {
struct evsel *metric_leader;
bool collect_stat;
bool weak_group;
+ bool reset_group;
+ bool errored;
bool percore;
+ int cpu_iter;
const char *pmu_name;
struct {
perf_evsel__sb_cb_t *cb;
@@ -218,11 +222,14 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter);
int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int perf_evsel__append_addr_filter(struct evsel *evsel,
const char *filter);
+int evsel__enable_cpu(struct evsel *evsel, int cpu);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
+int evsel__disable_cpu(struct evsel *evsel, int cpu);
int perf_evsel__open_per_cpu(struct evsel *evsel,
- struct perf_cpu_map *cpus);
+ struct perf_cpu_map *cpus,
+ int cpu);
int perf_evsel__open_per_thread(struct evsel *evsel,
struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
index f9f18b8b1df9..aed49806a09b 100644
--- a/tools/perf/util/genelf.c
+++ b/tools/perf/util/genelf.c
@@ -8,15 +8,12 @@
*/
#include <sys/types.h>
-#include <stdio.h>
-#include <getopt.h>
#include <stddef.h>
#include <libelf.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <inttypes.h>
-#include <limits.h>
#include <fcntl.h>
#include <err.h>
#ifdef HAVE_DWARF_SUPPORT
@@ -31,8 +28,6 @@
#define NT_GNU_BUILD_ID 3
#endif
-#define JVMTI
-
#define BUILD_ID_URANDOM /* different uuid for each run */
#ifdef HAVE_LIBCRYPTO
@@ -511,44 +506,3 @@ error:
return retval;
}
-
-#ifndef JVMTI
-
-static unsigned char x86_code[] = {
- 0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
- 0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
- 0xCD, 0x80 /* int $0x80 */
-};
-
-static struct options options;
-
-int main(int argc, char **argv)
-{
- int c, fd, ret;
-
- while ((c = getopt(argc, argv, "o:h")) != -1) {
- switch (c) {
- case 'o':
- options.output = optarg;
- break;
- case 'h':
- printf("Usage: genelf -o output_file [-h]\n");
- return 0;
- default:
- errx(1, "unknown option");
- }
- }
-
- fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666);
- if (fd == -1)
- err(1, "cannot create file %s", options.output);
-
- ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code));
- close(fd);
-
- if (ret != 0)
- unlink(options.output);
-
- return ret;
-}
-#endif
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index becc2d109423..93ad27830e2b 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -850,7 +850,7 @@ int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
*/
int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
{
- return -1;
+ return ENOSYS; /* Not implemented */
}
static int write_cpuid(struct feat_fd *ff,
@@ -1089,21 +1089,18 @@ static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
}
-static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
+#define MAX_CACHE_LVL 4
+
+static int build_caches(struct cpu_cache_level caches[], u32 *cntp)
{
u32 i, cnt = 0;
- long ncpus;
u32 nr, cpu;
u16 level;
- ncpus = sysconf(_SC_NPROCESSORS_CONF);
- if (ncpus < 0)
- return -1;
-
- nr = (u32)(ncpus & UINT_MAX);
+ nr = cpu__max_cpu();
for (cpu = 0; cpu < nr; cpu++) {
- for (level = 0; level < 10; level++) {
+ for (level = 0; level < MAX_CACHE_LVL; level++) {
struct cpu_cache_level c;
int err;
@@ -1123,18 +1120,12 @@ static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
caches[cnt++] = c;
else
cpu_cache_level__free(&c);
-
- if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
- goto out;
}
}
- out:
*cntp = cnt;
return 0;
}
-#define MAX_CACHE_LVL 4
-
static int write_cache(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
@@ -1143,7 +1134,7 @@ static int write_cache(struct feat_fd *ff,
u32 cnt = 0, i, version = 1;
int ret;
- ret = build_caches(caches, max_caches, &cnt);
+ ret = build_caches(caches, &cnt);
if (ret)
goto out;
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
index f01d48a8d707..b8a5159361b4 100644
--- a/tools/perf/util/include/linux/linkage.h
+++ b/tools/perf/util/include/linux/linkage.h
@@ -5,10 +5,93 @@
/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
-#define ENTRY(name) \
- .globl name; \
+/* Some toolchains use other characters (e.g. '`') to mark new line in macro */
+#ifndef ASM_NL
+#define ASM_NL ;
+#endif
+
+#ifndef __ALIGN
+#define __ALIGN .align 4,0x90
+#define __ALIGN_STR ".align 4,0x90"
+#endif
+
+/* SYM_T_FUNC -- type used by assembler to mark functions */
+#ifndef SYM_T_FUNC
+#define SYM_T_FUNC STT_FUNC
+#endif
+
+/* SYM_A_* -- align the symbol? */
+#define SYM_A_ALIGN ALIGN
+
+/* SYM_L_* -- linkage of symbols */
+#define SYM_L_GLOBAL(name) .globl name
+#define SYM_L_LOCAL(name) /* nothing */
+
+#define ALIGN __ALIGN
+
+/* === generic annotations === */
+
+/* SYM_ENTRY -- use only if you have to for non-paired symbols */
+#ifndef SYM_ENTRY
+#define SYM_ENTRY(name, linkage, align...) \
+ linkage(name) ASM_NL \
+ align ASM_NL \
name:
+#endif
+
+/* SYM_START -- use only if you have to */
+#ifndef SYM_START
+#define SYM_START(name, linkage, align...) \
+ SYM_ENTRY(name, linkage, align)
+#endif
+
+/* SYM_END -- use only if you have to */
+#ifndef SYM_END
+#define SYM_END(name, sym_type) \
+ .type name sym_type ASM_NL \
+ .size name, .-name
+#endif
+
+/*
+ * SYM_FUNC_START_ALIAS -- use where there are two global names for one
+ * function
+ */
+#ifndef SYM_FUNC_START_ALIAS
+#define SYM_FUNC_START_ALIAS(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+/* SYM_FUNC_START -- use for global functions */
+#ifndef SYM_FUNC_START
+/*
+ * The same as SYM_FUNC_START_ALIAS, but we will need to distinguish these two
+ * later.
+ */
+#define SYM_FUNC_START(name) \
+ SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
+#endif
+
+/* SYM_FUNC_START_LOCAL -- use for local functions */
+#ifndef SYM_FUNC_START_LOCAL
+/* the same as SYM_FUNC_START_LOCAL_ALIAS, see comment near SYM_FUNC_START */
+#define SYM_FUNC_START_LOCAL(name) \
+ SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)
+#endif
+
+/* SYM_FUNC_END_ALIAS -- the end of LOCAL_ALIASed or ALIASed function */
+#ifndef SYM_FUNC_END_ALIAS
+#define SYM_FUNC_END_ALIAS(name) \
+ SYM_END(name, SYM_T_FUNC)
+#endif
-#define ENDPROC(name)
+/*
+ * SYM_FUNC_END -- the end of SYM_FUNC_START_LOCAL, SYM_FUNC_START,
+ * SYM_FUNC_START_WEAK, ...
+ */
+#ifndef SYM_FUNC_END
+/* the same as SYM_FUNC_END_ALIAS, see comment near SYM_FUNC_START */
+#define SYM_FUNC_END(name) \
+ SYM_END(name, SYM_T_FUNC)
+#endif
#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 416d174d223c..c8c5410315e8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -2446,6 +2446,7 @@ static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms
list_for_each_entry(ilist, &inline_node->val, list) {
struct map_symbol ilist_ms = {
+ .maps = ms->maps,
.map = map,
.sym = ilist->symbol,
};
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 6a4d350d5cdb..02aee946b6c1 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -103,8 +103,11 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
if (!strcmp(ev->name, ids[i])) {
if (!metric_events[i])
metric_events[i] = ev;
+ i++;
+ if (i == idnum)
+ break;
} else {
- if (++i == idnum) {
+ if (i + 1 == idnum) {
/* Discard the whole match and start again */
i = 0;
memset(metric_events, 0,
@@ -124,7 +127,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
}
}
- if (i != idnum - 1) {
+ if (i != idnum) {
/* Not whole match */
return NULL;
}
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 345b5ccc90f6..9fcba2872130 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2681,12 +2681,12 @@ static int setup_sort_list(struct perf_hpp_list *list, char *str,
ret = sort_dimension__add(list, tok, evlist, level);
if (ret == -EINVAL) {
if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
- pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+ ui__error("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
else
- pr_err("Invalid --sort key: `%s'", tok);
+ ui__error("Invalid --sort key: `%s'", tok);
break;
} else if (ret == -ESRCH) {
- pr_err("Unknown --sort key: `%s'", tok);
+ ui__error("Unknown --sort key: `%s'", tok);
break;
}
}
@@ -2743,7 +2743,7 @@ static int setup_sort_order(struct evlist *evlist)
return 0;
if (sort_order[1] == '\0') {
- pr_err("Invalid --sort key: `+'");
+ ui__error("Invalid --sort key: `+'");
return -EINVAL;
}
@@ -2959,6 +2959,9 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
+ if (sort__mode != SORT_MODE__MEMORY)
+ return -EINVAL;
+
return __sort_dimension__add_output(list, sd);
}
@@ -2968,6 +2971,9 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
+ if (sort__mode != SORT_MODE__BRANCH)
+ return -EINVAL;
+
return __sort_dimension__add_output(list, sd);
}
@@ -3034,7 +3040,7 @@ static int __setup_output_field(void)
strp++;
if (!strlen(strp)) {
- pr_err("Invalid --fields key: `+'");
+ ui__error("Invalid --fields key: `+'");
goto out;
}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 332cb730785b..5f26137b8d60 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -464,7 +464,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
- struct target *target)
+ struct target *target,
+ int cpu)
{
struct perf_event_attr *attr = &evsel->core.attr;
struct evsel *leader = evsel->leader;
@@ -518,7 +519,7 @@ int create_perf_stat_counter(struct evsel *evsel,
}
if (target__has_cpu(target) && !target__has_per_thread(target))
- return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel));
+ return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
return perf_evsel__open_per_thread(evsel, evsel->core.threads);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index bfa9aaf36ce6..fb990efa54a8 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -214,7 +214,8 @@ size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
int create_perf_stat_counter(struct evsel *evsel,
struct perf_stat_config *config,
- struct target *target);
+ struct target *target,
+ int cpu);
void
perf_evlist__print_counters(struct evlist *evlist,
struct perf_stat_config *config,
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index efe06d621983..e59eb9e7f923 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -31,15 +31,12 @@ class KunitStatus(Enum):
TEST_FAILURE = auto()
def create_default_kunitconfig():
- if not os.path.exists(kunit_kernel.KUNITCONFIG_PATH):
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
shutil.copyfile('arch/um/configs/kunit_defconfig',
- kunit_kernel.KUNITCONFIG_PATH)
+ kunit_kernel.kunitconfig_path)
def run_tests(linux: kunit_kernel.LinuxSourceTree,
request: KunitRequest) -> KunitResult:
- if request.defconfig:
- create_default_kunitconfig()
-
config_start = time.time()
success = linux.build_reconfig(request.build_dir)
config_end = time.time()
@@ -108,15 +105,22 @@ def main(argv, linux=None):
run_parser.add_argument('--build_dir',
help='As in the make command, it specifies the build '
'directory.',
- type=str, default=None, metavar='build_dir')
+ type=str, default='', metavar='build_dir')
run_parser.add_argument('--defconfig',
- help='Uses a default kunitconfig.',
+ help='Uses a default .kunitconfig.',
action='store_true')
cli_args = parser.parse_args(argv)
if cli_args.subcommand == 'run':
+ if cli_args.build_dir:
+ if not os.path.exists(cli_args.build_dir):
+ os.mkdir(cli_args.build_dir)
+ kunit_kernel.kunitconfig_path = os.path.join(
+ cli_args.build_dir,
+ kunit_kernel.kunitconfig_path)
+
if cli_args.defconfig:
create_default_kunitconfig()
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index bf3876835331..cc5d844ecca1 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -14,7 +14,7 @@ import os
import kunit_config
KCONFIG_PATH = '.config'
-KUNITCONFIG_PATH = 'kunitconfig'
+kunitconfig_path = '.kunitconfig'
class ConfigError(Exception):
"""Represents an error trying to configure the Linux kernel."""
@@ -82,7 +82,7 @@ class LinuxSourceTree(object):
def __init__(self):
self._kconfig = kunit_config.Kconfig()
- self._kconfig.read_from_file(KUNITCONFIG_PATH)
+ self._kconfig.read_from_file(kunitconfig_path)
self._ops = LinuxSourceTreeOperations()
def clean(self):
@@ -111,7 +111,7 @@ class LinuxSourceTree(object):
return True
def build_reconfig(self, build_dir):
- """Creates a new .config if it is not a subset of the kunitconfig."""
+ """Creates a new .config if it is not a subset of the .kunitconfig."""
kconfig_path = get_kconfig_path(build_dir)
if os.path.exists(kconfig_path):
existing_kconfig = kunit_config.Kconfig()
@@ -140,10 +140,10 @@ class LinuxSourceTree(object):
return False
return True
- def run_kernel(self, args=[], timeout=None, build_dir=None):
+ def run_kernel(self, args=[], timeout=None, build_dir=''):
args.extend(['mem=256M'])
process = self._ops.linux_bin(args, timeout, build_dir)
- with open('test.log', 'w') as f:
+ with open(os.path.join(build_dir, 'test.log'), 'w') as f:
for line in process.stdout:
f.write(line.rstrip().decode('ascii') + '\n')
yield line.rstrip().decode('ascii')
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 4a12baa0cd4e..cba97756ac4a 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -174,6 +174,7 @@ class KUnitMainTest(unittest.TestCase):
kunit.main(['run'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
assert self.linux_source_mock.run_kernel.call_count == 1
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_passes_args_fail(self):
@@ -199,7 +200,14 @@ class KUnitMainTest(unittest.TestCase):
timeout = 3453
kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
- self.linux_source_mock.run_kernel.assert_called_once_with(timeout=timeout)
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=timeout)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
+ def test_run_builddir(self):
+ build_dir = '.kunit'
+ kunit.main(['run', '--build_dir', build_dir], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
if __name__ == '__main__':
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index c4a9196d794c..6aca8d5be159 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -5,6 +5,7 @@ ldflags-y += --wrap=devm_ioremap_nocache
ldflags-y += --wrap=devm_memremap
ldflags-y += --wrap=devm_memunmap
ldflags-y += --wrap=ioremap_nocache
+ldflags-y += --wrap=ioremap
ldflags-y += --wrap=iounmap
ldflags-y += --wrap=memunmap
ldflags-y += --wrap=__devm_request_region
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 3f55f2f99112..6271ac757a4b 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -193,6 +193,12 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
}
EXPORT_SYMBOL(__wrap_ioremap_nocache);
+void __iomem *__wrap_ioremap(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap);
+}
+EXPORT_SYMBOL(__wrap_ioremap);
+
void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
{
return __nfit_test_ioremap(offset, size, ioremap_wc);
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 419652458da4..301ac12d5d69 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -21,7 +21,6 @@ test_lirc_mode2_user
get_cgroup_id_user
test_skb_cgroup_id_user
test_socket_cookie
-test_cgroup_attach
test_cgroup_storage
test_select_reuseport
test_flow_dissector
@@ -38,5 +37,8 @@ test_hashmap
test_btf_dump
xdping
test_cpp
+*.skel.h
/no_alu32
/bpf_gcc
+/tools
+bpf_helper_defs.h
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e0fe01d9ec33..f1f949cd8ed9 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -3,10 +3,12 @@ include ../../../../scripts/Kbuild.include
include ../../../scripts/Makefile.arch
CURDIR := $(abspath .)
-LIBDIR := $(abspath ../../../lib)
+TOOLSDIR := $(abspath ../../..)
+LIBDIR := $(TOOLSDIR)/lib
BPFDIR := $(LIBDIR)/bpf
-TOOLSDIR := $(abspath ../../../include)
-APIDIR := $(TOOLSDIR)/uapi
+TOOLSINCDIR := $(TOOLSDIR)/include
+BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
+APIDIR := $(TOOLSINCDIR)/uapi
GENDIR := $(abspath ../../../../include/generated)
GENHDR := $(GENDIR)/autoconf.h
@@ -19,18 +21,18 @@ LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
BPF_GCC ?= $(shell command -v bpf-gcc;)
CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \
- -I$(GENDIR) -I$(TOOLSDIR) -I$(CURDIR) \
+ -I$(GENDIR) -I$(TOOLSINCDIR) -I$(CURDIR) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
-LDLIBS += -lcap -lelf -lrt -lpthread
+LDLIBS += -lcap -lelf -lz -lrt -lpthread
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
- test_cgroup_storage test_select_reuseport \
+ test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
- test_cgroup_attach test_progs-no_alu32
+ test_progs-no_alu32
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
@@ -75,6 +77,24 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
TEST_CUSTOM_PROGS = urandom_read
+# Emit succinct information message describing current building step
+# $1 - generic step name (e.g., CC, LINK, etc);
+# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
+# $3 - target (assumed to be file); only file name will be emitted;
+# $4 - optional extra arg, emitted as-is, if provided.
+ifeq ($(V),1)
+msg =
+else
+msg = @$(info $(1)$(if $(2), [$(2)]) $(notdir $(3)))$(if $(4), $(4))
+endif
+
+# override lib.mk's default rules
+OVERRIDE_TARGETS := 1
+override define CLEAN
+ $(call msg, CLEAN)
+ $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+endef
+
include ../lib.mk
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
@@ -87,10 +107,16 @@ $(notdir $(TEST_GEN_PROGS) \
$(TEST_GEN_PROGS_EXTENDED) \
$(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+$(OUTPUT)/%:%.c
+ $(call msg, BINARY,,$@)
+ $(LINK.c) $^ $(LDLIBS) -o $@
+
$(OUTPUT)/urandom_read: urandom_read.c
+ $(call msg, BINARY,,$@)
$(CC) -o $@ $< -Wl,--build-id
$(OUTPUT)/test_stub.o: test_stub.c
+ $(call msg, CC,,$@)
$(CC) -c $(CFLAGS) -o $@ $<
BPFOBJ := $(OUTPUT)/libbpf.a
@@ -110,19 +136,24 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
$(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
-$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c
.PHONY: force
# force a rebuild of BPFOBJ when its dependencies are updated
force:
+DEFAULT_BPFTOOL := $(OUTPUT)/tools/usr/local/sbin/bpftool
+BPFTOOL ?= $(DEFAULT_BPFTOOL)
+
+$(DEFAULT_BPFTOOL): force
+ $(MAKE) -C $(BPFTOOLDIR) DESTDIR=$(OUTPUT)/tools install
+
$(BPFOBJ): force
$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
-BPF_HELPERS := $(BPFDIR)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h)
-$(BPFDIR)/bpf_helper_defs.h:
- $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ bpf_helper_defs.h
+BPF_HELPERS := $(OUTPUT)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h)
+$(OUTPUT)/bpf_helper_defs.h:
+ $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ $(OUTPUT)/bpf_helper_defs.h
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
@@ -159,27 +190,33 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
# $3 - CFLAGS
# $4 - LDFLAGS
define CLANG_BPF_BUILD_RULE
+ $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
+ $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
define CLANG_NATIVE_BPF_BUILD_RULE
+ $(call msg, CLANG-BPF,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
+ $(call msg, GCC-BPF,$(TRUNNER_BINARY),$2)
$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
endef
+SKEL_BLACKLIST := btf__% test_pinning_invalid.c
+
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
# Parameters:
@@ -195,8 +232,11 @@ TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
$$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
-TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
- $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)))
+TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
+TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
+TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
+ $$(filter-out $(SKEL_BLACKLIST), \
+ $$(TRUNNER_BPF_SRCS)))
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
@@ -226,12 +266,19 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS), \
$(TRUNNER_BPF_LDFLAGS))
+
+$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
+ $(TRUNNER_OUTPUT)/%.o \
+ | $(BPFTOOL) $(TRUNNER_OUTPUT)
+ $$(call msg, GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $$(BPFTOOL) gen skeleton $$< > $$@
endif
# ensure we set up tests.h header generation rule just once
ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),)
$(TRUNNER_TESTS_DIR)-tests-hdr := y
$(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
+ $$(call msg, TEST-HDR,$(TRUNNER_BINARY),$$@)
$$(shell ( cd $(TRUNNER_TESTS_DIR); \
echo '/* Generated header, do not edit */'; \
ls *.c 2> /dev/null | \
@@ -245,7 +292,9 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_TESTS_DIR)/%.c \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
+ $(TRUNNER_BPF_SKELS) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $$(call msg, TEST-OBJ,$(TRUNNER_BINARY),$$@)
cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
@@ -253,17 +302,20 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $$(call msg, EXTRA-OBJ,$(TRUNNER_BINARY),$$@)
$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+# only copy extra resources if in flavored build
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
ifneq ($2,)
- # only copy extra resources if in flavored build
+ $$(call msg, EXTRAS-CP,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
cp -a $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
| $(TRUNNER_BINARY)-extras
+ $$(call msg, BINARY,,$$@)
$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
endef
@@ -315,12 +367,15 @@ verifier/tests.h: verifier/*.c
echo '#endif' \
) > verifier/tests.h)
$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
+ $(call msg, BINARY,,$@)
$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
# Make sure we are able to include and link libbpf against c++.
-$(OUTPUT)/test_cpp: test_cpp.cpp $(BPFOBJ)
+$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
+ $(call msg, CXX,,$@)
$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
- feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc
+ feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc \
+ tools *.skel.h
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index a83111a32d4a..a0ee87c8e1ea 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -1,26 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
-
-#define EMBED_FILE(NAME, PATH) \
-asm ( \
-" .pushsection \".rodata\", \"a\", @progbits \n" \
-" .global "#NAME"_data \n" \
-#NAME"_data: \n" \
-" .incbin \"" PATH "\" \n" \
-#NAME"_data_end: \n" \
-" .global "#NAME"_size \n" \
-" .type "#NAME"_size, @object \n" \
-" .size "#NAME"_size, 4 \n" \
-" .align 4, \n" \
-#NAME"_size: \n" \
-" .int "#NAME"_data_end - "#NAME"_data \n" \
-" .popsection \n" \
-); \
-extern char NAME##_data[]; \
-extern int NAME##_size;
+#include "test_attach_probe.skel.h"
ssize_t get_base_addr() {
- size_t start;
+ size_t start, offset;
char buf[256];
FILE *f;
@@ -28,10 +11,11 @@ ssize_t get_base_addr() {
if (!f)
return -errno;
- while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) {
+ while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
+ &start, buf, &offset) == 3) {
if (strcmp(buf, "r-xp") == 0) {
fclose(f);
- return start;
+ return start - offset;
}
}
@@ -39,30 +23,12 @@ ssize_t get_base_addr() {
return -EINVAL;
}
-EMBED_FILE(probe, "test_attach_probe.o");
-
void test_attach_probe(void)
{
- const char *kprobe_name = "kprobe/sys_nanosleep";
- const char *kretprobe_name = "kretprobe/sys_nanosleep";
- const char *uprobe_name = "uprobe/trigger_func";
- const char *uretprobe_name = "uretprobe/trigger_func";
- const int kprobe_idx = 0, kretprobe_idx = 1;
- const int uprobe_idx = 2, uretprobe_idx = 3;
- const char *obj_name = "attach_probe";
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
- .object_name = obj_name,
- .relaxed_maps = true,
- );
- struct bpf_program *kprobe_prog, *kretprobe_prog;
- struct bpf_program *uprobe_prog, *uretprobe_prog;
- struct bpf_object *obj;
- int err, duration = 0, res;
- struct bpf_link *kprobe_link = NULL;
- struct bpf_link *kretprobe_link = NULL;
- struct bpf_link *uprobe_link = NULL;
- struct bpf_link *uretprobe_link = NULL;
- int results_map_fd;
+ int duration = 0;
+ struct bpf_link *kprobe_link, *kretprobe_link;
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ struct test_attach_probe* skel;
size_t uprobe_offset;
ssize_t base_addr;
@@ -72,123 +38,68 @@ void test_attach_probe(void)
return;
uprobe_offset = (size_t)&get_base_addr - base_addr;
- /* open object */
- obj = bpf_object__open_mem(probe_data, probe_size, &open_opts);
- if (CHECK(IS_ERR(obj), "obj_open_mem", "err %ld\n", PTR_ERR(obj)))
+ skel = test_attach_probe__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
return;
-
- if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
- "wrong obj name '%s', expected '%s'\n",
- bpf_object__name(obj), obj_name))
+ if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
goto cleanup;
- kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
- if (CHECK(!kprobe_prog, "find_probe",
- "prog '%s' not found\n", kprobe_name))
- goto cleanup;
- kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
- if (CHECK(!kretprobe_prog, "find_probe",
- "prog '%s' not found\n", kretprobe_name))
- goto cleanup;
- uprobe_prog = bpf_object__find_program_by_title(obj, uprobe_name);
- if (CHECK(!uprobe_prog, "find_probe",
- "prog '%s' not found\n", uprobe_name))
- goto cleanup;
- uretprobe_prog = bpf_object__find_program_by_title(obj, uretprobe_name);
- if (CHECK(!uretprobe_prog, "find_probe",
- "prog '%s' not found\n", uretprobe_name))
- goto cleanup;
-
- /* create maps && load programs */
- err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
- goto cleanup;
-
- /* load maps */
- results_map_fd = bpf_find_map(__func__, obj, "results_map");
- if (CHECK(results_map_fd < 0, "find_results_map",
- "err %d\n", results_map_fd))
- goto cleanup;
-
- kprobe_link = bpf_program__attach_kprobe(kprobe_prog,
+ kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
false /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link))) {
- kprobe_link = NULL;
+ "err %ld\n", PTR_ERR(kprobe_link)))
goto cleanup;
- }
- kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog,
+ skel->links.handle_kprobe = kprobe_link;
+
+ kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
true /* retprobe */,
SYS_NANOSLEEP_KPROBE_NAME);
if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
- "err %ld\n", PTR_ERR(kretprobe_link))) {
- kretprobe_link = NULL;
+ "err %ld\n", PTR_ERR(kretprobe_link)))
goto cleanup;
- }
- uprobe_link = bpf_program__attach_uprobe(uprobe_prog,
+ skel->links.handle_kretprobe = kretprobe_link;
+
+ uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
false /* retprobe */,
0 /* self pid */,
"/proc/self/exe",
uprobe_offset);
if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
- "err %ld\n", PTR_ERR(uprobe_link))) {
- uprobe_link = NULL;
+ "err %ld\n", PTR_ERR(uprobe_link)))
goto cleanup;
- }
- uretprobe_link = bpf_program__attach_uprobe(uretprobe_prog,
+ skel->links.handle_uprobe = uprobe_link;
+
+ uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
true /* retprobe */,
-1 /* any pid */,
"/proc/self/exe",
uprobe_offset);
if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
- "err %ld\n", PTR_ERR(uretprobe_link))) {
- uretprobe_link = NULL;
+ "err %ld\n", PTR_ERR(uretprobe_link)))
goto cleanup;
- }
+ skel->links.handle_uretprobe = uretprobe_link;
/* trigger & validate kprobe && kretprobe */
usleep(1);
- err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res);
- if (CHECK(err, "get_kprobe_res",
- "failed to get kprobe res: %d\n", err))
+ if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
+ "wrong kprobe res: %d\n", skel->bss->kprobe_res))
goto cleanup;
- if (CHECK(res != kprobe_idx + 1, "check_kprobe_res",
- "wrong kprobe res: %d\n", res))
- goto cleanup;
-
- err = bpf_map_lookup_elem(results_map_fd, &kretprobe_idx, &res);
- if (CHECK(err, "get_kretprobe_res",
- "failed to get kretprobe res: %d\n", err))
- goto cleanup;
- if (CHECK(res != kretprobe_idx + 1, "check_kretprobe_res",
- "wrong kretprobe res: %d\n", res))
+ if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
+ "wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
goto cleanup;
/* trigger & validate uprobe & uretprobe */
get_base_addr();
- err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res);
- if (CHECK(err, "get_uprobe_res",
- "failed to get uprobe res: %d\n", err))
- goto cleanup;
- if (CHECK(res != uprobe_idx + 1, "check_uprobe_res",
- "wrong uprobe res: %d\n", res))
- goto cleanup;
-
- err = bpf_map_lookup_elem(results_map_fd, &uretprobe_idx, &res);
- if (CHECK(err, "get_uretprobe_res",
- "failed to get uretprobe res: %d\n", err))
+ if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
+ "wrong uprobe res: %d\n", skel->bss->uprobe_res))
goto cleanup;
- if (CHECK(res != uretprobe_idx + 1, "check_uretprobe_res",
- "wrong uretprobe res: %d\n", res))
+ if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
+ "wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
goto cleanup;
cleanup:
- bpf_link__destroy(kprobe_link);
- bpf_link__destroy(kretprobe_link);
- bpf_link__destroy(uprobe_link);
- bpf_link__destroy(uretprobe_link);
- bpf_object__close(obj);
+ test_attach_probe__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
new file mode 100644
index 000000000000..5b13f2c6c402
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "cgroup_helpers.h"
+
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+static int prog_load(void)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+ return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+void test_cgroup_attach_autodetach(void)
+{
+ __u32 duration = 0, prog_cnt = 4, attach_flags;
+ int allow_prog[2] = {-1};
+ __u32 prog_ids[2] = {0};
+ void *ptr = NULL;
+ int cg = 0, i;
+ int attempts;
+
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ allow_prog[i] = prog_load();
+ if (CHECK(allow_prog[i] < 0, "prog_load",
+ "verifier output:\n%s\n-------\n", bpf_log_buf))
+ goto err;
+ }
+
+ if (CHECK_FAIL(setup_cgroup_environment()))
+ goto err;
+
+ /* create a cgroup, attach two programs and remember their ids */
+ cg = create_and_get_cgroup("/cg_autodetach");
+ if (CHECK_FAIL(cg < 0))
+ goto err;
+
+ if (CHECK_FAIL(join_cgroup("/cg_autodetach")))
+ goto err;
+
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+ if (CHECK(bpf_prog_attach(allow_prog[i], cg,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "prog_attach", "prog[%d], errno=%d\n", i, errno))
+ goto err;
+
+ /* make sure that programs are attached and run some traffic */
+ if (CHECK(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
+ prog_ids, &prog_cnt),
+ "prog_query", "errno=%d\n", errno))
+ goto err;
+ if (CHECK_FAIL(system(PING_CMD)))
+ goto err;
+
+ /* allocate some memory (4Mb) to pin the original cgroup */
+ ptr = malloc(4 * (1 << 20));
+ if (CHECK_FAIL(!ptr))
+ goto err;
+
+ /* close programs and cgroup fd */
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ close(allow_prog[i]);
+ allow_prog[i] = -1;
+ }
+
+ close(cg);
+ cg = 0;
+
+ /* leave the cgroup and remove it. don't detach programs */
+ cleanup_cgroup_environment();
+
+ /* wait for the asynchronous auto-detachment.
+ * wait for no more than 5 sec and give up.
+ */
+ for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
+ for (attempts = 5; attempts >= 0; attempts--) {
+ int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
+
+ if (fd < 0)
+ break;
+
+ /* don't leave the fd open */
+ close(fd);
+
+ if (CHECK_FAIL(!attempts))
+ goto err;
+
+ sleep(1);
+ }
+ }
+
+err:
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+ if (allow_prog[i] >= 0)
+ close(allow_prog[i]);
+ if (cg)
+ close(cg);
+ free(ptr);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
new file mode 100644
index 000000000000..2ff21dbce179
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "cgroup_helpers.h"
+
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+static int map_fd = -1;
+
+static int prog_load_cnt(int verdict, int val)
+{
+ int cgroup_storage_fd, percpu_cgroup_storage_fd;
+
+ if (map_fd < 0)
+ map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (map_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
+ cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ if (cgroup_storage_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
+ percpu_cgroup_storage_fd = bpf_create_map(
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ if (percpu_cgroup_storage_fd < 0) {
+ printf("failed to create map '%s'\n", strerror(errno));
+ return -1;
+ }
+
+ struct bpf_insn prog[] = {
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
+ BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+
+ BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+ BPF_MOV64_IMM(BPF_REG_1, val),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
+
+ BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ int ret;
+
+ ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+
+ close(cgroup_storage_fd);
+ return ret;
+}
+
+void test_cgroup_attach_multi(void)
+{
+ __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
+ int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
+ DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
+ int allow_prog[7] = {-1};
+ unsigned long long value;
+ __u32 duration = 0;
+ int i = 0;
+
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
+ allow_prog[i] = prog_load_cnt(1, 1 << i);
+ if (CHECK(allow_prog[i] < 0, "prog_load",
+ "verifier output:\n%s\n-------\n", bpf_log_buf))
+ goto err;
+ }
+
+ if (CHECK_FAIL(setup_cgroup_environment()))
+ goto err;
+
+ cg1 = create_and_get_cgroup("/cg1");
+ if (CHECK_FAIL(cg1 < 0))
+ goto err;
+ cg2 = create_and_get_cgroup("/cg1/cg2");
+ if (CHECK_FAIL(cg2 < 0))
+ goto err;
+ cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
+ if (CHECK_FAIL(cg3 < 0))
+ goto err;
+ cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
+ if (CHECK_FAIL(cg4 < 0))
+ goto err;
+ cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
+ if (CHECK_FAIL(cg5 < 0))
+ goto err;
+
+ if (CHECK_FAIL(join_cgroup("/cg1/cg2/cg3/cg4/cg5")))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "prog0_attach_to_cg1_multi", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "fail_same_prog_attach_to_cg1", "unexpected success\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "prog1_attach_to_cg1_multi", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog2_attach_to_cg2_override", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_MULTI),
+ "prog3_attach_to_cg3_multi", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog4_attach_to_cg4_override", "errno=%d\n", errno))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0),
+ "prog5_attach_to_cg5_none", "errno=%d\n", errno))
+ goto err;
+
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 1 + 2 + 8 + 32);
+
+ /* query the number of effective progs in cg5 */
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, NULL, NULL, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 4);
+ /* retrieve prog_ids of effective progs in cg5 */
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 4);
+ CHECK_FAIL(attach_flags != 0);
+ saved_prog_id = prog_ids[0];
+ /* check enospc handling */
+ prog_ids[0] = 0;
+ prog_cnt = 2;
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt) != -1);
+ CHECK_FAIL(errno != ENOSPC);
+ CHECK_FAIL(prog_cnt != 4);
+ /* check that prog_ids are returned even when buffer is too small */
+ CHECK_FAIL(prog_ids[0] != saved_prog_id);
+ /* retrieve prog_id of single attached prog in cg5 */
+ prog_ids[0] = 0;
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL,
+ prog_ids, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 1);
+ CHECK_FAIL(prog_ids[0] != saved_prog_id);
+
+ /* detach bottom program and ping again */
+ if (CHECK(bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_from_cg5", "errno=%d\n", errno))
+ goto err;
+
+ value = 0;
+ CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 1 + 2 + 8 + 16);
+
+ /* test replace */
+
+ attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
+ attach_opts.replace_prog_fd = allow_prog[0];
+ if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "fail_prog_replace_override", "unexpected success\n"))
+ goto err;
+ CHECK_FAIL(errno != EINVAL);
+
+ attach_opts.flags = BPF_F_REPLACE;
+ if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "fail_prog_replace_no_multi", "unexpected success\n"))
+ goto err;
+ CHECK_FAIL(errno != EINVAL);
+
+ attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
+ attach_opts.replace_prog_fd = -1;
+ if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "fail_prog_replace_bad_fd", "unexpected success\n"))
+ goto err;
+ CHECK_FAIL(errno != EBADF);
+
+ /* replacing a program that is not attached to cgroup should fail */
+ attach_opts.replace_prog_fd = allow_prog[3];
+ if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "fail_prog_replace_no_ent", "unexpected success\n"))
+ goto err;
+ CHECK_FAIL(errno != ENOENT);
+
+ /* replace 1st from the top program */
+ attach_opts.replace_prog_fd = allow_prog[0];
+ if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ BPF_CGROUP_INET_EGRESS, &attach_opts),
+ "prog_replace", "errno=%d\n", errno))
+ goto err;
+
+ value = 0;
+ CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 64 + 2 + 8 + 16);
+
+ /* detach 3rd from bottom program and ping again */
+ if (CHECK(!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS),
+ "fail_prog_detach_from_cg3", "unexpected success\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS),
+ "prog3_detach_from_cg3", "errno=%d\n", errno))
+ goto err;
+
+ value = 0;
+ CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 64 + 2 + 16);
+
+ /* detach 2nd from bottom program and ping again */
+ if (CHECK(bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_from_cg4", "errno=%d\n", errno))
+ goto err;
+
+ value = 0;
+ CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0));
+ CHECK_FAIL(system(PING_CMD));
+ CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value));
+ CHECK_FAIL(value != 64 + 2 + 4);
+
+ prog_cnt = 4;
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 3);
+ CHECK_FAIL(attach_flags != 0);
+ CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL,
+ prog_ids, &prog_cnt));
+ CHECK_FAIL(prog_cnt != 0);
+
+err:
+ for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
+ if (allow_prog[i] >= 0)
+ close(allow_prog[i]);
+ close(cg1);
+ close(cg2);
+ close(cg3);
+ close(cg4);
+ close(cg5);
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
new file mode 100644
index 000000000000..9d8cb48b99de
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "cgroup_helpers.h"
+
+#define FOO "/foo"
+#define BAR "/foo/bar/"
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+static int prog_load(int verdict)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+
+ return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+void test_cgroup_attach_override(void)
+{
+ int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1;
+ __u32 duration = 0;
+
+ allow_prog = prog_load(1);
+ if (CHECK(allow_prog < 0, "prog_load_allow",
+ "verifier output:\n%s\n-------\n", bpf_log_buf))
+ goto err;
+
+ drop_prog = prog_load(0);
+ if (CHECK(drop_prog < 0, "prog_load_drop",
+ "verifier output:\n%s\n-------\n", bpf_log_buf))
+ goto err;
+
+ foo = test__join_cgroup(FOO);
+ if (CHECK(foo < 0, "cgroup_join_foo", "cgroup setup failed\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog_attach_drop_foo_override",
+ "attach prog to %s failed, errno=%d\n", FOO, errno))
+ goto err;
+
+ if (CHECK(!system(PING_CMD), "ping_fail",
+ "ping unexpectedly succeeded\n"))
+ goto err;
+
+ bar = test__join_cgroup(BAR);
+ if (CHECK(bar < 0, "cgroup_join_bar", "cgroup setup failed\n"))
+ goto err;
+
+ if (CHECK(!system(PING_CMD), "ping_fail",
+ "ping unexpectedly succeeded\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog_attach_allow_bar_override",
+ "attach prog to %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_bar",
+ "detach prog from %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(!system(PING_CMD), "ping_fail",
+ "ping unexpectedly succeeded\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog_attach_allow_bar_override",
+ "attach prog to %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_foo",
+ "detach prog from %s failed, errno=%d\n", FOO, errno))
+ goto err;
+
+ if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n"))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "prog_attach_allow_bar_override",
+ "attach prog to %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0),
+ "fail_prog_attach_allow_bar_none",
+ "attach prog to %s unexpectedly succeeded\n", BAR))
+ goto err;
+
+ if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS),
+ "prog_detach_bar",
+ "detach prog from %s failed, errno=%d\n", BAR, errno))
+ goto err;
+
+ if (CHECK(!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS),
+ "fail_prog_detach_foo",
+ "double detach from %s unexpectedly succeeded\n", FOO))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0),
+ "prog_attach_allow_foo_none",
+ "attach prog to %s failed, errno=%d\n", FOO, errno))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0),
+ "fail_prog_attach_allow_bar_none",
+ "attach prog to %s unexpectedly succeeded\n", BAR))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "fail_prog_attach_allow_bar_override",
+ "attach prog to %s unexpectedly succeeded\n", BAR))
+ goto err;
+
+ if (CHECK(!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
+ BPF_F_ALLOW_OVERRIDE),
+ "fail_prog_attach_allow_foo_override",
+ "attach prog to %s unexpectedly succeeded\n", FOO))
+ goto err;
+
+ if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0),
+ "prog_attach_drop_foo_none",
+ "attach prog to %s failed, errno=%d\n", FOO, errno))
+ goto err;
+
+err:
+ close(foo);
+ close(bar);
+ close(allow_prog);
+ close(drop_prog);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c
new file mode 100644
index 000000000000..b093787e9448
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/utsname.h>
+#include <linux/version.h>
+#include "test_core_extern.skel.h"
+
+static uint32_t get_kernel_version(void)
+{
+ uint32_t major, minor, patch;
+ struct utsname info;
+
+ uname(&info);
+ if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
+ return 0;
+ return KERNEL_VERSION(major, minor, patch);
+}
+
+#define CFG "CONFIG_BPF_SYSCALL=n\n"
+
+static struct test_case {
+ const char *name;
+ const char *cfg;
+ bool fails;
+ struct test_core_extern__data data;
+} test_cases[] = {
+ { .name = "default search path", .data = { .bpf_syscall = true } },
+ {
+ .name = "custom values",
+ .cfg = "CONFIG_BPF_SYSCALL=n\n"
+ "CONFIG_TRISTATE=m\n"
+ "CONFIG_BOOL=y\n"
+ "CONFIG_CHAR=100\n"
+ "CONFIG_USHORT=30000\n"
+ "CONFIG_INT=123456\n"
+ "CONFIG_ULONG=0xDEADBEEFC0DE\n"
+ "CONFIG_STR=\"abracad\"\n"
+ "CONFIG_MISSING=0",
+ .data = {
+ .bpf_syscall = false,
+ .tristate_val = TRI_MODULE,
+ .bool_val = true,
+ .char_val = 100,
+ .ushort_val = 30000,
+ .int_val = 123456,
+ .ulong_val = 0xDEADBEEFC0DE,
+ .str_val = "abracad",
+ },
+ },
+ /* TRISTATE */
+ { .name = "tristate (y)", .cfg = CFG"CONFIG_TRISTATE=y\n",
+ .data = { .tristate_val = TRI_YES } },
+ { .name = "tristate (n)", .cfg = CFG"CONFIG_TRISTATE=n\n",
+ .data = { .tristate_val = TRI_NO } },
+ { .name = "tristate (m)", .cfg = CFG"CONFIG_TRISTATE=m\n",
+ .data = { .tristate_val = TRI_MODULE } },
+ { .name = "tristate (int)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=1" },
+ { .name = "tristate (bad)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=M" },
+ /* BOOL */
+ { .name = "bool (y)", .cfg = CFG"CONFIG_BOOL=y\n",
+ .data = { .bool_val = true } },
+ { .name = "bool (n)", .cfg = CFG"CONFIG_BOOL=n\n",
+ .data = { .bool_val = false } },
+ { .name = "bool (tristate)", .fails = 1, .cfg = CFG"CONFIG_BOOL=m" },
+ { .name = "bool (int)", .fails = 1, .cfg = CFG"CONFIG_BOOL=1" },
+ /* CHAR */
+ { .name = "char (tristate)", .cfg = CFG"CONFIG_CHAR=m\n",
+ .data = { .char_val = 'm' } },
+ { .name = "char (bad)", .fails = 1, .cfg = CFG"CONFIG_CHAR=q\n" },
+ { .name = "char (empty)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\n" },
+ { .name = "char (str)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\"y\"\n" },
+ /* STRING */
+ { .name = "str (empty)", .cfg = CFG"CONFIG_STR=\"\"\n",
+ .data = { .str_val = "\0\0\0\0\0\0\0" } },
+ { .name = "str (padded)", .cfg = CFG"CONFIG_STR=\"abra\"\n",
+ .data = { .str_val = "abra\0\0\0" } },
+ { .name = "str (too long)", .cfg = CFG"CONFIG_STR=\"abracada\"\n",
+ .data = { .str_val = "abracad" } },
+ { .name = "str (no value)", .fails = 1, .cfg = CFG"CONFIG_STR=\n" },
+ { .name = "str (bad value)", .fails = 1, .cfg = CFG"CONFIG_STR=bla\n" },
+ /* INTEGERS */
+ {
+ .name = "integer forms",
+ .cfg = CFG
+ "CONFIG_CHAR=0xA\n"
+ "CONFIG_USHORT=0462\n"
+ "CONFIG_INT=-100\n"
+ "CONFIG_ULONG=+1000000000000",
+ .data = {
+ .char_val = 0xA,
+ .ushort_val = 0462,
+ .int_val = -100,
+ .ulong_val = 1000000000000,
+ },
+ },
+ { .name = "int (bad)", .fails = 1, .cfg = CFG"CONFIG_INT=abc" },
+ { .name = "int (str)", .fails = 1, .cfg = CFG"CONFIG_INT=\"abc\"" },
+ { .name = "int (empty)", .fails = 1, .cfg = CFG"CONFIG_INT=" },
+ { .name = "int (mixed)", .fails = 1, .cfg = CFG"CONFIG_INT=123abc" },
+ { .name = "int (max)", .cfg = CFG"CONFIG_INT=2147483647",
+ .data = { .int_val = 2147483647 } },
+ { .name = "int (min)", .cfg = CFG"CONFIG_INT=-2147483648",
+ .data = { .int_val = -2147483648 } },
+ { .name = "int (max+1)", .fails = 1, .cfg = CFG"CONFIG_INT=2147483648" },
+ { .name = "int (min-1)", .fails = 1, .cfg = CFG"CONFIG_INT=-2147483649" },
+ { .name = "ushort (max)", .cfg = CFG"CONFIG_USHORT=65535",
+ .data = { .ushort_val = 65535 } },
+ { .name = "ushort (min)", .cfg = CFG"CONFIG_USHORT=0",
+ .data = { .ushort_val = 0 } },
+ { .name = "ushort (max+1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=65536" },
+ { .name = "ushort (min-1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=-1" },
+ { .name = "u64 (max)", .cfg = CFG"CONFIG_ULONG=0xffffffffffffffff",
+ .data = { .ulong_val = 0xffffffffffffffff } },
+ { .name = "u64 (min)", .cfg = CFG"CONFIG_ULONG=0",
+ .data = { .ulong_val = 0 } },
+ { .name = "u64 (max+1)", .fails = 1, .cfg = CFG"CONFIG_ULONG=0x10000000000000000" },
+};
+
+void test_core_extern(void)
+{
+ const uint32_t kern_ver = get_kernel_version();
+ int err, duration = 0, i, j;
+ struct test_core_extern *skel = NULL;
+ uint64_t *got, *exp;
+ int n = sizeof(*skel->data) / sizeof(uint64_t);
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ struct test_case *t = &test_cases[i];
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .kconfig = t->cfg,
+ );
+
+ if (!test__start_subtest(t->name))
+ continue;
+
+ skel = test_core_extern__open_opts(&opts);
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ goto cleanup;
+ err = test_core_extern__load(skel);
+ if (t->fails) {
+ CHECK(!err, "skel_load",
+ "shouldn't succeed open/load of skeleton\n");
+ goto cleanup;
+ } else if (CHECK(err, "skel_load",
+ "failed to open/load skeleton\n")) {
+ goto cleanup;
+ }
+ err = test_core_extern__attach(skel);
+ if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ t->data.kern_ver = kern_ver;
+ t->data.missing_val = 0xDEADC0DE;
+ got = (uint64_t *)skel->data;
+ exp = (uint64_t *)&t->data;
+ for (j = 0; j < n; j++) {
+ CHECK(got[j] != exp[j], "check_res",
+ "result #%d: expected %lx, but got %lx\n",
+ j, exp[j], got[j]);
+ }
+cleanup:
+ test_core_extern__destroy(skel);
+ skel = NULL;
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 05fe85281ff7..31e177adbdf1 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -74,6 +74,7 @@
.b123 = 2, \
.c1c = 3, \
.d00d = 4, \
+ .f10c = 0, \
}, \
.output_len = sizeof(struct core_reloc_arrays_output) \
}
@@ -308,12 +309,15 @@ static struct core_reloc_test_case test_cases[] = {
ARRAYS_CASE(arrays),
ARRAYS_CASE(arrays___diff_arr_dim),
ARRAYS_CASE(arrays___diff_arr_val_sz),
+ ARRAYS_CASE(arrays___equiv_zero_sz_arr),
+ ARRAYS_CASE(arrays___fixed_arr),
ARRAYS_ERR_CASE(arrays___err_too_small),
ARRAYS_ERR_CASE(arrays___err_too_shallow),
ARRAYS_ERR_CASE(arrays___err_non_array),
ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
+ ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
/* enum/ptr/int handling scenarios */
PRIMITIVES_CASE(primitives),
diff --git a/tools/testing/selftests/bpf/prog_tests/cpu_mask.c b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c
new file mode 100644
index 000000000000..1fa1bdbaffa9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "libbpf_internal.h"
+
+static int duration = 0;
+
+static void validate_mask(int case_nr, const char *exp, bool *mask, int n)
+{
+ int i;
+
+ for (i = 0; exp[i]; i++) {
+ if (exp[i] == '1') {
+ if (CHECK(i + 1 > n, "mask_short",
+ "case #%d: mask too short, got n=%d, need at least %d\n",
+ case_nr, n, i + 1))
+ return;
+ CHECK(!mask[i], "cpu_not_set",
+ "case #%d: mask differs, expected cpu#%d SET\n",
+ case_nr, i);
+ } else {
+ CHECK(i < n && mask[i], "cpu_set",
+ "case #%d: mask differs, expected cpu#%d UNSET\n",
+ case_nr, i);
+ }
+ }
+ CHECK(i < n, "mask_long",
+ "case #%d: mask too long, got n=%d, expected at most %d\n",
+ case_nr, n, i);
+}
+
+static struct {
+ const char *cpu_mask;
+ const char *expect;
+ bool fails;
+} test_cases[] = {
+ { "0\n", "1", false },
+ { "0,2\n", "101", false },
+ { "0-2\n", "111", false },
+ { "0-2,3-4\n", "11111", false },
+ { "0", "1", false },
+ { "0-2", "111", false },
+ { "0,2", "101", false },
+ { "0,1-3", "1111", false },
+ { "0,1,2,3", "1111", false },
+ { "0,2-3,5", "101101", false },
+ { "3-3", "0001", false },
+ { "2-4,6,9-10", "00111010011", false },
+ /* failure cases */
+ { "", "", true },
+ { "0-", "", true },
+ { "0 ", "", true },
+ { "0_1", "", true },
+ { "1-0", "", true },
+ { "-1", "", true },
+};
+
+void test_cpu_mask()
+{
+ int i, err, n;
+ bool *mask;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ mask = NULL;
+ err = parse_cpu_mask_str(test_cases[i].cpu_mask, &mask, &n);
+ if (test_cases[i].fails) {
+ CHECK(!err, "should_fail",
+ "case #%d: parsing should fail!\n", i + 1);
+ } else {
+ if (CHECK(err, "parse_err",
+ "case #%d: cpu mask parsing failed: %d\n",
+ i + 1, err))
+ continue;
+ validate_mask(i + 1, test_cases[i].expect, mask, n);
+ }
+ free(mask);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 40bcff2cc274..235ac4f67f5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,90 +1,55 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include "test_pkt_access.skel.h"
+#include "fentry_test.skel.h"
+#include "fexit_test.skel.h"
void test_fentry_fexit(void)
{
- struct bpf_prog_load_attr attr_fentry = {
- .file = "./fentry_test.o",
- };
- struct bpf_prog_load_attr attr_fexit = {
- .file = "./fexit_test.o",
- };
-
- struct bpf_object *obj_fentry = NULL, *obj_fexit = NULL, *pkt_obj;
- struct bpf_map *data_map_fentry, *data_map_fexit;
- char fentry_name[] = "fentry/bpf_fentry_testX";
- char fexit_name[] = "fexit/bpf_fentry_testX";
- int err, pkt_fd, kfree_skb_fd, i;
- struct bpf_link *link[12] = {};
- struct bpf_program *prog[12];
- __u32 duration, retval;
- const int zero = 0;
- u64 result[12];
-
- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
- &pkt_obj, &pkt_fd);
- if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ struct test_pkt_access *pkt_skel = NULL;
+ struct fentry_test *fentry_skel = NULL;
+ struct fexit_test *fexit_skel = NULL;
+ __u64 *fentry_res, *fexit_res;
+ __u32 duration = 0, retval;
+ int err, pkt_fd, i;
+
+ pkt_skel = test_pkt_access__open_and_load();
+ if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
return;
- err = bpf_prog_load_xattr(&attr_fentry, &obj_fentry, &kfree_skb_fd);
- if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
+ fentry_skel = fentry_test__open_and_load();
+ if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto close_prog;
- err = bpf_prog_load_xattr(&attr_fexit, &obj_fexit, &kfree_skb_fd);
- if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
+ fexit_skel = fexit_test__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
goto close_prog;
- for (i = 0; i < 6; i++) {
- fentry_name[sizeof(fentry_name) - 2] = '1' + i;
- prog[i] = bpf_object__find_program_by_title(obj_fentry, fentry_name);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fentry_name))
- goto close_prog;
- link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
- goto close_prog;
- }
- data_map_fentry = bpf_object__find_map_by_name(obj_fentry, "fentry_t.bss");
- if (CHECK(!data_map_fentry, "find_data_map", "data map not found\n"))
+ err = fentry_test__attach(fentry_skel);
+ if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
goto close_prog;
-
- for (i = 6; i < 12; i++) {
- fexit_name[sizeof(fexit_name) - 2] = '1' + i - 6;
- prog[i] = bpf_object__find_program_by_title(obj_fexit, fexit_name);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fexit_name))
- goto close_prog;
- link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
- goto close_prog;
- }
- data_map_fexit = bpf_object__find_map_by_name(obj_fexit, "fexit_te.bss");
- if (CHECK(!data_map_fexit, "find_data_map", "data map not found\n"))
+ err = fexit_test__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
+ pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
- err = bpf_map_lookup_elem(bpf_map__fd(data_map_fentry), &zero, &result);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
- goto close_prog;
-
- err = bpf_map_lookup_elem(bpf_map__fd(data_map_fexit), &zero, result + 6);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
- goto close_prog;
-
- for (i = 0; i < 12; i++)
- if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
- i % 6 + 1, result[i]))
- goto close_prog;
+ fentry_res = (__u64 *)fentry_skel->bss;
+ fexit_res = (__u64 *)fexit_skel->bss;
+ printf("%lld\n", fentry_skel->bss->test1_result);
+ for (i = 0; i < 6; i++) {
+ CHECK(fentry_res[i] != 1, "result",
+ "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
+ CHECK(fexit_res[i] != 1, "result",
+ "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]);
+ }
close_prog:
- for (i = 0; i < 12; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- bpf_object__close(obj_fentry);
- bpf_object__close(obj_fexit);
- bpf_object__close(pkt_obj);
+ test_pkt_access__destroy(pkt_skel);
+ fentry_test__destroy(fentry_skel);
+ fexit_test__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 9fb103193878..e1a379f5f7d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,64 +1,43 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include "test_pkt_access.skel.h"
+#include "fentry_test.skel.h"
void test_fentry_test(void)
{
- struct bpf_prog_load_attr attr = {
- .file = "./fentry_test.o",
- };
-
- char prog_name[] = "fentry/bpf_fentry_testX";
- struct bpf_object *obj = NULL, *pkt_obj;
- int err, pkt_fd, kfree_skb_fd, i;
- struct bpf_link *link[6] = {};
- struct bpf_program *prog[6];
+ struct test_pkt_access *pkt_skel = NULL;
+ struct fentry_test *fentry_skel = NULL;
+ int err, pkt_fd, i;
__u32 duration, retval;
- struct bpf_map *data_map;
- const int zero = 0;
- u64 result[6];
+ __u64 *result;
- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
- &pkt_obj, &pkt_fd);
- if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ pkt_skel = test_pkt_access__open_and_load();
+ if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
return;
- err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
- if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
- goto close_prog;
+ fentry_skel = fentry_test__open_and_load();
+ if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
+ goto cleanup;
- for (i = 0; i < 6; i++) {
- prog_name[sizeof(prog_name) - 2] = '1' + i;
- prog[i] = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
- goto close_prog;
- link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
- goto close_prog;
- }
- data_map = bpf_object__find_map_by_name(obj, "fentry_t.bss");
- if (CHECK(!data_map, "find_data_map", "data map not found\n"))
- goto close_prog;
+ err = fentry_test__attach(fentry_skel);
+ if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
+ goto cleanup;
+ pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
- err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
- goto close_prog;
-
- for (i = 0; i < 6; i++)
- if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
- i + 1, result[i]))
- goto close_prog;
+ result = (__u64 *)fentry_skel->bss;
+ for (i = 0; i < 6; i++) {
+ if (CHECK(result[i] != 1, "result",
+ "fentry_test%d failed err %lld\n", i + 1, result[i]))
+ goto cleanup;
+ }
-close_prog:
- for (i = 0; i < 6; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- bpf_object__close(obj);
- bpf_object__close(pkt_obj);
+cleanup:
+ fentry_test__destroy(fentry_skel);
+ test_pkt_access__destroy(pkt_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 051a6d48762c..16a814eb4d64 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -1,16 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <sys/mman.h>
+#include "test_mmap.skel.h"
struct map_data {
__u64 val[512 * 4];
};
-struct bss_data {
- __u64 in_val;
- __u64 out_val;
-};
-
static size_t roundup_page(size_t sz)
{
long page_size = sysconf(_SC_PAGE_SIZE);
@@ -19,41 +15,25 @@ static size_t roundup_page(size_t sz)
void test_mmap(void)
{
- const char *file = "test_mmap.o";
- const char *probe_name = "raw_tracepoint/sys_enter";
- const char *tp_name = "sys_enter";
- const size_t bss_sz = roundup_page(sizeof(struct bss_data));
+ const size_t bss_sz = roundup_page(sizeof(struct test_mmap__bss));
const size_t map_sz = roundup_page(sizeof(struct map_data));
const int zero = 0, one = 1, two = 2, far = 1500;
const long page_size = sysconf(_SC_PAGE_SIZE);
int err, duration = 0, i, data_map_fd;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link = NULL;
struct bpf_map *data_map, *bss_map;
void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
- volatile struct bss_data *bss_data;
- volatile struct map_data *map_data;
+ struct test_mmap__bss *bss_data;
+ struct map_data *map_data;
+ struct test_mmap *skel;
__u64 val = 0;
- obj = bpf_object__open_file("test_mmap.o", NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
- file, PTR_ERR(obj)))
+
+ skel = test_mmap__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
return;
- prog = bpf_object__find_program_by_title(obj, probe_name);
- if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name))
- goto cleanup;
- err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "failed to load prog '%s': %d\n",
- probe_name, err))
- goto cleanup;
- bss_map = bpf_object__find_map_by_name(obj, "test_mma.bss");
- if (CHECK(!bss_map, "find_bss_map", ".bss map not found\n"))
- goto cleanup;
- data_map = bpf_object__find_map_by_name(obj, "data_map");
- if (CHECK(!data_map, "find_data_map", "data_map map not found\n"))
- goto cleanup;
+ bss_map = skel->maps.bss;
+ data_map = skel->maps.data_map;
data_map_fd = bpf_map__fd(data_map);
bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
@@ -77,13 +57,15 @@ void test_mmap(void)
CHECK_FAIL(bss_data->in_val);
CHECK_FAIL(bss_data->out_val);
+ CHECK_FAIL(skel->bss->in_val);
+ CHECK_FAIL(skel->bss->out_val);
CHECK_FAIL(map_data->val[0]);
CHECK_FAIL(map_data->val[1]);
CHECK_FAIL(map_data->val[2]);
CHECK_FAIL(map_data->val[far]);
- link = bpf_program__attach_raw_tracepoint(prog, tp_name);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ err = test_mmap__attach(skel);
+ if (CHECK(err, "attach_raw_tp", "err %d\n", err))
goto cleanup;
bss_data->in_val = 123;
@@ -94,6 +76,8 @@ void test_mmap(void)
CHECK_FAIL(bss_data->in_val != 123);
CHECK_FAIL(bss_data->out_val != 123);
+ CHECK_FAIL(skel->bss->in_val != 123);
+ CHECK_FAIL(skel->bss->out_val != 123);
CHECK_FAIL(map_data->val[0] != 111);
CHECK_FAIL(map_data->val[1] != 222);
CHECK_FAIL(map_data->val[2] != 123);
@@ -160,6 +144,8 @@ void test_mmap(void)
usleep(1);
CHECK_FAIL(bss_data->in_val != 321);
CHECK_FAIL(bss_data->out_val != 321);
+ CHECK_FAIL(skel->bss->in_val != 321);
+ CHECK_FAIL(skel->bss->out_val != 321);
CHECK_FAIL(map_data->val[0] != 111);
CHECK_FAIL(map_data->val[1] != 222);
CHECK_FAIL(map_data->val[2] != 321);
@@ -203,6 +189,8 @@ void test_mmap(void)
map_data = tmp2;
CHECK_FAIL(bss_data->in_val != 321);
CHECK_FAIL(bss_data->out_val != 321);
+ CHECK_FAIL(skel->bss->in_val != 321);
+ CHECK_FAIL(skel->bss->out_val != 321);
CHECK_FAIL(map_data->val[0] != 111);
CHECK_FAIL(map_data->val[1] != 222);
CHECK_FAIL(map_data->val[2] != 321);
@@ -214,7 +202,5 @@ cleanup:
CHECK_FAIL(munmap(bss_mmaped, bss_sz));
if (map_mmaped)
CHECK_FAIL(munmap(map_mmaped, map_sz));
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- bpf_object__close(obj);
+ test_mmap__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 3003fddc0613..cf6c87936c69 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -4,6 +4,7 @@
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
+#include "libbpf_internal.h"
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
@@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
void test_perf_buffer(void)
{
- int err, prog_fd, nr_cpus, i, duration = 0;
+ int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
const char *prog_name = "kprobe/sys_nanosleep";
const char *file = "./test_perf_buffer.o";
struct perf_buffer_opts pb_opts = {};
@@ -29,15 +30,27 @@ void test_perf_buffer(void)
struct bpf_object *obj;
struct perf_buffer *pb;
struct bpf_link *link;
+ bool *online;
nr_cpus = libbpf_num_possible_cpus();
if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus))
return;
+ err = parse_cpu_mask_file("/sys/devices/system/cpu/online",
+ &online, &on_len);
+ if (CHECK(err, "nr_on_cpus", "err %d\n", err))
+ return;
+
+ for (i = 0; i < on_len; i++)
+ if (online[i])
+ nr_on_cpus++;
+
/* load program */
err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
- return;
+ if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out_close;
+ }
prog = bpf_object__find_program_by_title(obj, prog_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
@@ -64,6 +77,11 @@ void test_perf_buffer(void)
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
for (i = 0; i < nr_cpus; i++) {
+ if (i >= on_len || !online[i]) {
+ printf("skipping offline CPU #%d\n", i);
+ continue;
+ }
+
CPU_ZERO(&cpu_set);
CPU_SET(i, &cpu_set);
@@ -81,8 +99,8 @@ void test_perf_buffer(void)
if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
goto out_free_pb;
- if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt",
- "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen)))
+ if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt",
+ "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
out_free_pb:
@@ -91,4 +109,5 @@ out_detach:
bpf_link__destroy(link);
out_close:
bpf_object__close(obj);
+ free(online);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 8a3187dec048..7aecfd9e87d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -3,8 +3,7 @@
void test_probe_user(void)
{
-#define kprobe_name "__sys_connect"
- const char *prog_name = "kprobe/" kprobe_name;
+ const char *prog_name = "kprobe/__sys_connect";
const char *obj_file = "./test_probe_user.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
int err, results_map_fd, sock_fd, duration = 0;
@@ -33,8 +32,7 @@ void test_probe_user(void)
"err %d\n", results_map_fd))
goto cleanup;
- kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false,
- kprobe_name);
+ kprobe_link = bpf_program__attach(kprobe_prog);
if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
"err %ld\n", PTR_ERR(kprobe_link))) {
kprobe_link = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index d90acc13d1ec..563e12120e77 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -16,14 +16,11 @@ struct rdonly_map_subtest {
void test_rdonly_maps(void)
{
- const char *prog_name_skip_loop = "raw_tracepoint/sys_enter:skip_loop";
- const char *prog_name_part_loop = "raw_tracepoint/sys_enter:part_loop";
- const char *prog_name_full_loop = "raw_tracepoint/sys_enter:full_loop";
const char *file = "test_rdonly_maps.o";
struct rdonly_map_subtest subtests[] = {
- { "skip loop", prog_name_skip_loop, 0, 0 },
- { "part loop", prog_name_part_loop, 3, 2 + 3 + 4 },
- { "full loop", prog_name_full_loop, 4, 2 + 3 + 4 + 5 },
+ { "skip loop", "skip_loop", 0, 0 },
+ { "part loop", "part_loop", 3, 2 + 3 + 4 },
+ { "full loop", "full_loop", 4, 2 + 3 + 4 + 5 },
};
int i, err, zero = 0, duration = 0;
struct bpf_link *link = NULL;
@@ -50,7 +47,7 @@ void test_rdonly_maps(void)
if (!test__start_subtest(t->subtest_name))
continue;
- prog = bpf_object__find_program_by_title(obj, t->prog_name);
+ prog = bpf_object__find_program_by_name(obj, t->prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
t->prog_name))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 7566c13eb51a..2c37ae7dc214 100644
--- a/tools/testing/selftests/bpf/test_select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -20,8 +20,11 @@
#include <bpf/libbpf.h>
#include "bpf_rlimit.h"
#include "bpf_util.h"
+
+#include "test_progs.h"
#include "test_select_reuseport_common.h"
+#define MAX_TEST_NAME 80
#define MIN_TCPHDR_LEN 20
#define UDPHDR_LEN 8
@@ -32,11 +35,11 @@
static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
static enum result expected_results[NR_RESULTS];
static int sk_fds[REUSEPORT_ARRAY_SIZE];
-static int reuseport_array, outer_map;
+static int reuseport_array = -1, outer_map = -1;
static int select_by_skb_data_prog;
-static int saved_tcp_syncookie;
+static int saved_tcp_syncookie = -1;
static struct bpf_object *obj;
-static int saved_tcp_fo;
+static int saved_tcp_fo = -1;
static __u32 index_zero;
static int epfd;
@@ -46,16 +49,21 @@ static union sa46 {
sa_family_t family;
} srv_sa;
-#define CHECK(condition, tag, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
- printf(format); \
- exit(-1); \
+#define RET_IF(condition, tag, format...) ({ \
+ if (CHECK_FAIL(condition)) { \
+ printf(tag " " format); \
+ return; \
+ } \
+})
+
+#define RET_ERR(condition, tag, format...) ({ \
+ if (CHECK_FAIL(condition)) { \
+ printf(tag " " format); \
+ return -1; \
} \
})
-static void create_maps(void)
+static int create_maps(void)
{
struct bpf_create_map_attr attr = {};
@@ -67,8 +75,8 @@ static void create_maps(void)
attr.max_entries = REUSEPORT_ARRAY_SIZE;
reuseport_array = bpf_create_map_xattr(&attr);
- CHECK(reuseport_array == -1, "creating reuseport_array",
- "reuseport_array:%d errno:%d\n", reuseport_array, errno);
+ RET_ERR(reuseport_array == -1, "creating reuseport_array",
+ "reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
attr.name = "outer_map";
@@ -78,63 +86,61 @@ static void create_maps(void)
attr.max_entries = 1;
attr.inner_map_fd = reuseport_array;
outer_map = bpf_create_map_xattr(&attr);
- CHECK(outer_map == -1, "creating outer_map",
- "outer_map:%d errno:%d\n", outer_map, errno);
+ RET_ERR(outer_map == -1, "creating outer_map",
+ "outer_map:%d errno:%d\n", outer_map, errno);
+
+ return 0;
}
-static void prepare_bpf_obj(void)
+static int prepare_bpf_obj(void)
{
struct bpf_program *prog;
struct bpf_map *map;
int err;
- struct bpf_object_open_attr attr = {
- .file = "test_select_reuseport_kern.o",
- .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
- };
- obj = bpf_object__open_xattr(&attr);
- CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
- "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
-
- prog = bpf_program__next(NULL, obj);
- CHECK(!prog, "get first bpf_program", "!prog\n");
- bpf_program__set_type(prog, attr.prog_type);
+ obj = bpf_object__open("test_select_reuseport_kern.o");
+ RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
+ "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
map = bpf_object__find_map_by_name(obj, "outer_map");
- CHECK(!map, "find outer_map", "!map\n");
+ RET_ERR(!map, "find outer_map", "!map\n");
err = bpf_map__reuse_fd(map, outer_map);
- CHECK(err, "reuse outer_map", "err:%d\n", err);
+ RET_ERR(err, "reuse outer_map", "err:%d\n", err);
err = bpf_object__load(obj);
- CHECK(err, "load bpf_object", "err:%d\n", err);
+ RET_ERR(err, "load bpf_object", "err:%d\n", err);
+ prog = bpf_program__next(NULL, obj);
+ RET_ERR(!prog, "get first bpf_program", "!prog\n");
select_by_skb_data_prog = bpf_program__fd(prog);
- CHECK(select_by_skb_data_prog == -1, "get prog fd",
- "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
+ RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+ "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
map = bpf_object__find_map_by_name(obj, "result_map");
- CHECK(!map, "find result_map", "!map\n");
+ RET_ERR(!map, "find result_map", "!map\n");
result_map = bpf_map__fd(map);
- CHECK(result_map == -1, "get result_map fd",
- "result_map:%d\n", result_map);
+ RET_ERR(result_map == -1, "get result_map fd",
+ "result_map:%d\n", result_map);
map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
- CHECK(!map, "find tmp_index_ovr_map", "!map\n");
+ RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
tmp_index_ovr_map = bpf_map__fd(map);
- CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
- "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
+ RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
map = bpf_object__find_map_by_name(obj, "linum_map");
- CHECK(!map, "find linum_map", "!map\n");
+ RET_ERR(!map, "find linum_map", "!map\n");
linum_map = bpf_map__fd(map);
- CHECK(linum_map == -1, "get linum_map fd",
- "linum_map:%d\n", linum_map);
+ RET_ERR(linum_map == -1, "get linum_map fd",
+ "linum_map:%d\n", linum_map);
map = bpf_object__find_map_by_name(obj, "data_check_map");
- CHECK(!map, "find data_check_map", "!map\n");
+ RET_ERR(!map, "find data_check_map", "!map\n");
data_check_map = bpf_map__fd(map);
- CHECK(data_check_map == -1, "get data_check_map fd",
- "data_check_map:%d\n", data_check_map);
+ RET_ERR(data_check_map == -1, "get data_check_map fd",
+ "data_check_map:%d\n", data_check_map);
+
+ return 0;
}
static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
@@ -163,65 +169,73 @@ static int read_int_sysctl(const char *sysctl)
int fd, ret;
fd = open(sysctl, 0);
- CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
- sysctl, fd, errno);
+ RET_ERR(fd == -1, "open(sysctl)",
+ "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
ret = read(fd, buf, sizeof(buf));
- CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n",
- sysctl, ret, errno);
- close(fd);
+ RET_ERR(ret <= 0, "read(sysctl)",
+ "sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno);
+ close(fd);
return atoi(buf);
}
-static void write_int_sysctl(const char *sysctl, int v)
+static int write_int_sysctl(const char *sysctl, int v)
{
int fd, ret, size;
char buf[16];
fd = open(sysctl, O_RDWR);
- CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
- sysctl, fd, errno);
+ RET_ERR(fd == -1, "open(sysctl)",
+ "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno);
size = snprintf(buf, sizeof(buf), "%d", v);
ret = write(fd, buf, size);
- CHECK(ret != size, "write(sysctl)",
- "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno);
+ RET_ERR(ret != size, "write(sysctl)",
+ "sysctl:%s ret:%d size:%d errno:%d\n",
+ sysctl, ret, size, errno);
+
close(fd);
+ return 0;
}
static void restore_sysctls(void)
{
- write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
- write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
+ if (saved_tcp_fo != -1)
+ write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
+ if (saved_tcp_syncookie != -1)
+ write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
}
-static void enable_fastopen(void)
+static int enable_fastopen(void)
{
int fo;
fo = read_int_sysctl(TCP_FO_SYSCTL);
- write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
+ if (fo < 0)
+ return -1;
+
+ return write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
}
-static void enable_syncookie(void)
+static int enable_syncookie(void)
{
- write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
+ return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
}
-static void disable_syncookie(void)
+static int disable_syncookie(void)
{
- write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
+ return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
}
-static __u32 get_linum(void)
+static long get_linum(void)
{
__u32 linum;
int err;
err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
- CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
- err, errno);
+ RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ err, errno);
return linum;
}
@@ -237,12 +251,12 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
addrlen = sizeof(cli_sa);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
&addrlen);
- CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
- err, errno);
+ RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ err, errno);
err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
- CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
- err, errno);
+ RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ err, errno);
if (type == SOCK_STREAM) {
expected.len = MIN_TCPHDR_LEN;
@@ -284,22 +298,22 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
printf("expected: (0x%x, %u, %u)\n",
expected.eth_protocol, expected.ip_protocol,
expected.bind_inany);
- CHECK(1, "data_check result != expected",
- "bpf_prog_linum:%u\n", get_linum());
+ RET_IF(1, "data_check result != expected",
+ "bpf_prog_linum:%ld\n", get_linum());
}
- CHECK(!result.hash, "data_check result.hash empty",
- "result.hash:%u", result.hash);
+ RET_IF(!result.hash, "data_check result.hash empty",
+ "result.hash:%u", result.hash);
expected.len += cmd ? sizeof(*cmd) : 0;
if (type == SOCK_STREAM)
- CHECK(expected.len > result.len, "expected.len > result.len",
- "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
- expected.len, result.len, get_linum());
+ RET_IF(expected.len > result.len, "expected.len > result.len",
+ "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
+ expected.len, result.len, get_linum());
else
- CHECK(expected.len != result.len, "expected.len != result.len",
- "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
- expected.len, result.len, get_linum());
+ RET_IF(expected.len != result.len, "expected.len != result.len",
+ "expected.len:%u result.len:%u bpf_prog_linum:%ld\n",
+ expected.len, result.len, get_linum());
}
static void check_results(void)
@@ -310,8 +324,8 @@ static void check_results(void)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &results[i]);
- CHECK(err == -1, "lookup_elem(result_map)",
- "i:%u err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "lookup_elem(result_map)",
+ "i:%u err:%d errno:%d\n", i, err, errno);
}
for (i = 0; i < NR_RESULTS; i++) {
@@ -337,10 +351,10 @@ static void check_results(void)
printf(", %u", expected_results[i]);
printf("]\n");
- CHECK(expected_results[broken] != results[broken],
- "unexpected result",
- "expected_results[%u] != results[%u] bpf_prog_linum:%u\n",
- broken, broken, get_linum());
+ RET_IF(expected_results[broken] != results[broken],
+ "unexpected result",
+ "expected_results[%u] != results[%u] bpf_prog_linum:%ld\n",
+ broken, broken, get_linum());
}
static int send_data(int type, sa_family_t family, void *data, size_t len,
@@ -350,17 +364,17 @@ static int send_data(int type, sa_family_t family, void *data, size_t len,
int fd, err;
fd = socket(family, type, 0);
- CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
+ RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
sa46_init_loopback(&cli_sa, family);
err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
- CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
+ RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
sizeof(srv_sa));
- CHECK(err != len && expected >= PASS,
- "sendto()", "family:%u err:%d errno:%d expected:%d\n",
- family, err, errno, expected);
+ RET_ERR(err != len && expected >= PASS,
+ "sendto()", "family:%u err:%d errno:%d expected:%d\n",
+ family, err, errno, expected);
return fd;
}
@@ -375,47 +389,49 @@ static void do_test(int type, sa_family_t family, struct cmd *cmd,
cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
expected);
+ if (cli_fd < 0)
+ return;
nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
- CHECK((nev <= 0 && expected >= PASS) ||
- (nev > 0 && expected < PASS),
- "nev <> expected",
- "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
- nev, expected, type, family,
- cmd ? cmd->reuseport_index : -1,
- cmd ? cmd->pass_on_failure : -1);
+ RET_IF((nev <= 0 && expected >= PASS) ||
+ (nev > 0 && expected < PASS),
+ "nev <> expected",
+ "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
+ nev, expected, type, family,
+ cmd ? cmd->reuseport_index : -1,
+ cmd ? cmd->pass_on_failure : -1);
check_results();
check_data(type, family, cmd, cli_fd);
if (expected < PASS)
return;
- CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
- cmd->reuseport_index != ev.data.u32,
- "check cmd->reuseport_index",
- "cmd:(%u, %u) ev.data.u32:%u\n",
- cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
+ RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
+ cmd->reuseport_index != ev.data.u32,
+ "check cmd->reuseport_index",
+ "cmd:(%u, %u) ev.data.u32:%u\n",
+ cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
srv_fd = sk_fds[ev.data.u32];
if (type == SOCK_STREAM) {
int new_fd = accept(srv_fd, NULL, 0);
- CHECK(new_fd == -1, "accept(srv_fd)",
- "ev.data.u32:%u new_fd:%d errno:%d\n",
- ev.data.u32, new_fd, errno);
+ RET_IF(new_fd == -1, "accept(srv_fd)",
+ "ev.data.u32:%u new_fd:%d errno:%d\n",
+ ev.data.u32, new_fd, errno);
nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
- CHECK(nread != sizeof(rcv_cmd),
- "recv(new_fd)",
- "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
- ev.data.u32, nread, sizeof(rcv_cmd), errno);
+ RET_IF(nread != sizeof(rcv_cmd),
+ "recv(new_fd)",
+ "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+ ev.data.u32, nread, sizeof(rcv_cmd), errno);
close(new_fd);
} else {
nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
- CHECK(nread != sizeof(rcv_cmd),
- "recv(sk_fds)",
- "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
- ev.data.u32, nread, sizeof(rcv_cmd), errno);
+ RET_IF(nread != sizeof(rcv_cmd),
+ "recv(sk_fds)",
+ "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+ ev.data.u32, nread, sizeof(rcv_cmd), errno);
}
close(cli_fd);
@@ -428,18 +444,14 @@ static void test_err_inner_map(int type, sa_family_t family)
.pass_on_failure = 0,
};
- printf("%s: ", __func__);
expected_results[DROP_ERR_INNER_MAP]++;
do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
- printf("OK\n");
}
static void test_err_skb_data(int type, sa_family_t family)
{
- printf("%s: ", __func__);
expected_results[DROP_ERR_SKB_DATA]++;
do_test(type, family, NULL, DROP_ERR_SKB_DATA);
- printf("OK\n");
}
static void test_err_sk_select_port(int type, sa_family_t family)
@@ -449,10 +461,8 @@ static void test_err_sk_select_port(int type, sa_family_t family)
.pass_on_failure = 0,
};
- printf("%s: ", __func__);
expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
- printf("OK\n");
}
static void test_pass(int type, sa_family_t family)
@@ -460,14 +470,12 @@ static void test_pass(int type, sa_family_t family)
struct cmd cmd;
int i;
- printf("%s: ", __func__);
cmd.pass_on_failure = 0;
for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
expected_results[PASS]++;
cmd.reuseport_index = i;
do_test(type, family, &cmd, PASS);
}
- printf("OK\n");
}
static void test_syncookie(int type, sa_family_t family)
@@ -481,7 +489,6 @@ static void test_syncookie(int type, sa_family_t family)
if (type != SOCK_STREAM)
return;
- printf("%s: ", __func__);
/*
* +1 for TCP-SYN and
* +1 for the TCP-ACK (ack the syncookie)
@@ -497,17 +504,16 @@ static void test_syncookie(int type, sa_family_t family)
*/
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
&tmp_index, BPF_ANY);
- CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ "err:%d errno:%d\n", err, errno);
do_test(type, family, &cmd, PASS);
err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
&tmp_index);
- CHECK(err == -1 || tmp_index != -1,
- "lookup_elem(tmp_index_ovr_map)",
- "err:%d errno:%d tmp_index:%d\n",
- err, errno, tmp_index);
+ RET_IF(err == -1 || tmp_index != -1,
+ "lookup_elem(tmp_index_ovr_map)",
+ "err:%d errno:%d tmp_index:%d\n",
+ err, errno, tmp_index);
disable_syncookie();
- printf("OK\n");
}
static void test_pass_on_err(int type, sa_family_t family)
@@ -517,10 +523,8 @@ static void test_pass_on_err(int type, sa_family_t family)
.pass_on_failure = 1,
};
- printf("%s: ", __func__);
expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
- printf("OK\n");
}
static void test_detach_bpf(int type, sa_family_t family)
@@ -532,46 +536,47 @@ static void test_detach_bpf(int type, sa_family_t family)
struct cmd cmd = {};
int optvalue = 0;
- printf("%s: ", __func__);
err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
&optvalue, sizeof(optvalue));
- CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
+ "err:%d errno:%d\n", err, errno);
err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF,
&optvalue, sizeof(optvalue));
- CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == 0 || errno != ENOENT,
+ "setsockopt(SO_DETACH_REUSEPORT_BPF)",
+ "err:%d errno:%d\n", err, errno);
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- CHECK(err == -1, "lookup_elem(result_map)",
- "i:%u err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "lookup_elem(result_map)",
+ "i:%u err:%d errno:%d\n", i, err, errno);
nr_run_before += tmp;
}
cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS);
+ if (cli_fd < 0)
+ return;
nev = epoll_wait(epfd, &ev, 1, 5);
- CHECK(nev <= 0, "nev <= 0",
- "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
- nev, type, family);
+ RET_IF(nev <= 0, "nev <= 0",
+ "nev:%d expected:1 type:%d family:%d data:(0, 0)\n",
+ nev, type, family);
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- CHECK(err == -1, "lookup_elem(result_map)",
- "i:%u err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "lookup_elem(result_map)",
+ "i:%u err:%d errno:%d\n", i, err, errno);
nr_run_after += tmp;
}
- CHECK(nr_run_before != nr_run_after,
- "nr_run_before != nr_run_after",
- "nr_run_before:%u nr_run_after:%u\n",
- nr_run_before, nr_run_after);
+ RET_IF(nr_run_before != nr_run_after,
+ "nr_run_before != nr_run_after",
+ "nr_run_before:%u nr_run_after:%u\n",
+ nr_run_before, nr_run_after);
- printf("OK\n");
close(cli_fd);
#else
- printf("%s: SKIP\n", __func__);
+ test__skip();
#endif
}
@@ -594,73 +599,83 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
*/
for (i = first; i >= 0; i--) {
sk_fds[i] = socket(family, type, 0);
- CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
- i, sk_fds[i], errno);
+ RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
+ i, sk_fds[i], errno);
err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
&optval, sizeof(optval));
- CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
- "sk_fds[%d] err:%d errno:%d\n",
- i, err, errno);
+ RET_IF(err == -1, "setsockopt(SO_REUSEPORT)",
+ "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
if (i == first) {
err = setsockopt(sk_fds[i], SOL_SOCKET,
SO_ATTACH_REUSEPORT_EBPF,
&select_by_skb_data_prog,
sizeof(select_by_skb_data_prog));
- CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ "err:%d errno:%d\n", err, errno);
}
err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
- CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
- i, err, errno);
+ RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
if (type == SOCK_STREAM) {
err = listen(sk_fds[i], 10);
- CHECK(err == -1, "listen()",
- "sk_fds[%d] err:%d errno:%d\n",
- i, err, errno);
+ RET_IF(err == -1, "listen()",
+ "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
}
err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
BPF_NOEXIST);
- CHECK(err == -1, "update_elem(reuseport_array)",
- "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "update_elem(reuseport_array)",
+ "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
if (i == first) {
socklen_t addrlen = sizeof(srv_sa);
err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
&addrlen);
- CHECK(err == -1, "getsockname()",
- "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+ RET_IF(err == -1, "getsockname()",
+ "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
}
}
epfd = epoll_create(1);
- CHECK(epfd == -1, "epoll_create(1)",
- "epfd:%d errno:%d\n", epfd, errno);
+ RET_IF(epfd == -1, "epoll_create(1)",
+ "epfd:%d errno:%d\n", epfd, errno);
ev.events = EPOLLIN;
for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
ev.data.u32 = i;
err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
- CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
+ RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
}
}
-static void setup_per_test(int type, unsigned short family, bool inany)
+static void setup_per_test(int type, sa_family_t family, bool inany,
+ bool no_inner_map)
{
int ovr = -1, err;
prepare_sk_fds(type, family, inany);
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
BPF_ANY);
- CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ "err:%d errno:%d\n", err, errno);
+
+ /* Install reuseport_array to outer_map? */
+ if (no_inner_map)
+ return;
+
+ err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
+ BPF_ANY);
+ RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+ "err:%d errno:%d\n", err, errno);
}
-static void cleanup_per_test(void)
+static void cleanup_per_test(bool no_inner_map)
{
int i, err;
@@ -668,75 +683,124 @@ static void cleanup_per_test(void)
close(sk_fds[i]);
close(epfd);
+ /* Delete reuseport_array from outer_map? */
+ if (no_inner_map)
+ return;
+
err = bpf_map_delete_elem(outer_map, &index_zero);
- CHECK(err == -1, "delete_elem(outer_map)",
- "err:%d errno:%d\n", err, errno);
+ RET_IF(err == -1, "delete_elem(outer_map)",
+ "err:%d errno:%d\n", err, errno);
}
static void cleanup(void)
{
- close(outer_map);
- close(reuseport_array);
- bpf_object__close(obj);
+ if (outer_map != -1)
+ close(outer_map);
+ if (reuseport_array != -1)
+ close(reuseport_array);
+ if (obj)
+ bpf_object__close(obj);
}
-static void test_all(void)
+static const char *family_str(sa_family_t family)
{
- /* Extra SOCK_STREAM to test bind_inany==true */
- const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM };
- const char * const type_strings[] = { "TCP", "UDP", "TCP" };
- const char * const family_strings[] = { "IPv6", "IPv4" };
- const unsigned short families[] = { AF_INET6, AF_INET };
- const bool bind_inany[] = { false, false, true };
- int t, f, err;
-
- for (f = 0; f < ARRAY_SIZE(families); f++) {
- unsigned short family = families[f];
-
- for (t = 0; t < ARRAY_SIZE(types); t++) {
- bool inany = bind_inany[t];
- int type = types[t];
-
- printf("######## %s/%s %s ########\n",
- family_strings[f], type_strings[t],
- inany ? " INANY " : "LOOPBACK");
-
- setup_per_test(type, family, inany);
-
- test_err_inner_map(type, family);
-
- /* Install reuseport_array to the outer_map */
- err = bpf_map_update_elem(outer_map, &index_zero,
- &reuseport_array, BPF_ANY);
- CHECK(err == -1, "update_elem(outer_map)",
- "err:%d errno:%d\n", err, errno);
-
- test_err_skb_data(type, family);
- test_err_sk_select_port(type, family);
- test_pass(type, family);
- test_syncookie(type, family);
- test_pass_on_err(type, family);
- /* Must be the last test */
- test_detach_bpf(type, family);
-
- cleanup_per_test();
- printf("\n");
- }
+ switch (family) {
+ case AF_INET:
+ return "IPv4";
+ case AF_INET6:
+ return "IPv6";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *sotype_str(int sotype)
+{
+ switch (sotype) {
+ case SOCK_STREAM:
+ return "TCP";
+ case SOCK_DGRAM:
+ return "UDP";
+ default:
+ return "unknown";
+ }
+}
+
+#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ }
+
+static void test_config(int sotype, sa_family_t family, bool inany)
+{
+ const struct test {
+ void (*fn)(int sotype, sa_family_t family);
+ const char *name;
+ bool no_inner_map;
+ } tests[] = {
+ TEST_INIT(test_err_inner_map, true /* no_inner_map */),
+ TEST_INIT(test_err_skb_data),
+ TEST_INIT(test_err_sk_select_port),
+ TEST_INIT(test_pass),
+ TEST_INIT(test_syncookie),
+ TEST_INIT(test_pass_on_err),
+ TEST_INIT(test_detach_bpf),
+ };
+ char s[MAX_TEST_NAME];
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ snprintf(s, sizeof(s), "%s/%s %s %s",
+ family_str(family), sotype_str(sotype),
+ inany ? "INANY" : "LOOPBACK", t->name);
+
+ if (!test__start_subtest(s))
+ continue;
+
+ setup_per_test(sotype, family, inany, t->no_inner_map);
+ t->fn(sotype, family);
+ cleanup_per_test(t->no_inner_map);
}
}
-int main(int argc, const char **argv)
+#define BIND_INANY true
+
+static void test_all(void)
{
- create_maps();
- prepare_bpf_obj();
+ const struct config {
+ int sotype;
+ sa_family_t family;
+ bool inany;
+ } configs[] = {
+ { SOCK_STREAM, AF_INET },
+ { SOCK_STREAM, AF_INET, BIND_INANY },
+ { SOCK_STREAM, AF_INET6 },
+ { SOCK_STREAM, AF_INET6, BIND_INANY },
+ { SOCK_DGRAM, AF_INET },
+ { SOCK_DGRAM, AF_INET6 },
+ };
+ const struct config *c;
+
+ for (c = configs; c < configs + ARRAY_SIZE(configs); c++)
+ test_config(c->sotype, c->family, c->inany);
+}
+
+void test_select_reuseport(void)
+{
+ if (create_maps())
+ goto out;
+ if (prepare_bpf_obj())
+ goto out;
+
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
- enable_fastopen();
- disable_syncookie();
- atexit(restore_sysctls);
+ if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0)
+ goto out;
- test_all();
+ if (enable_fastopen())
+ goto out;
+ if (disable_syncookie())
+ goto out;
+ test_all();
+out:
cleanup();
- return 0;
+ restore_sysctls();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index a2eb8db8dafb..c6d6b685a946 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,6 +11,9 @@ void test_skb_ctx(void)
.cb[4] = 5,
.priority = 6,
.tstamp = 7,
+ .wire_len = 100,
+ .gso_segs = 8,
+ .mark = 9,
};
struct bpf_prog_test_run_attr tattr = {
.data_in = &pkt_v4,
@@ -91,4 +94,8 @@ void test_skb_ctx(void)
"ctx_out_tstamp",
"skb->tstamp == %lld, expected %d\n",
skb.tstamp, 8);
+ CHECK_ATTR(skb.mark != 10,
+ "ctx_out_mark",
+ "skb->mark == %u, expected %d\n",
+ skb.mark, 10);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
new file mode 100644
index 000000000000..9264a2736018
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+
+struct s {
+ int a;
+ long long b;
+} __attribute__((packed));
+
+#include "test_skeleton.skel.h"
+
+void test_skeleton(void)
+{
+ int duration = 0, err;
+ struct test_skeleton* skel;
+ struct test_skeleton__bss *bss;
+ struct test_skeleton__kconfig *kcfg;
+
+ skel = test_skeleton__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ if (CHECK(skel->kconfig, "skel_kconfig", "kconfig is mmaped()!\n"))
+ goto cleanup;
+
+ err = test_skeleton__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->in1 = 1;
+ bss->in2 = 2;
+ bss->in3 = 3;
+ bss->in4 = 4;
+ bss->in5.a = 5;
+ bss->in5.b = 6;
+ kcfg = skel->kconfig;
+
+ err = test_skeleton__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ CHECK(bss->out1 != 1, "res1", "got %d != exp %d\n", bss->out1, 1);
+ CHECK(bss->out2 != 2, "res2", "got %lld != exp %d\n", bss->out2, 2);
+ CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
+ CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
+ CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
+ bss->handler_out5.a, 5);
+ CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
+ bss->handler_out5.b, 6);
+
+ CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
+ "got %d != exp %d\n", bss->bpf_syscall, kcfg->CONFIG_BPF_SYSCALL);
+ CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2",
+ "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION);
+
+cleanup:
+ test_skeleton__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index d841dced971f..e8399ae50e77 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -1,16 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
void test_stacktrace_build_id(void)
{
+
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "tracepoint/random/urandom_read";
- const char *file = "./test_stacktrace_build_id.o";
- int err, prog_fd, stack_trace_len;
+ struct test_stacktrace_build_id *skel;
+ int err, stack_trace_len;
__u32 key, previous_key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link = NULL;
char buf[256];
int i, j;
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -18,43 +16,24 @@ void test_stacktrace_build_id(void)
int retry = 1;
retry:
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = test_stacktrace_build_id__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
-
- link = bpf_program__attach_tracepoint(prog, "random", "urandom_read");
- if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
- goto close_prog;
+ err = test_stacktrace_build_id__attach(skel);
+ if (CHECK(err, "attach_tp", "err %d\n", err))
+ goto cleanup;
/* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
- err, errno))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
- goto disable_pmu;
+ goto cleanup;
if (CHECK_FAIL(system("./urandom_read")))
- goto disable_pmu;
+ goto cleanup;
/* disable stack trace collection */
key = 0;
val = 1;
@@ -66,23 +45,23 @@ retry:
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = extract_build_id(buf, 256);
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
do {
char build_id[64];
@@ -90,7 +69,7 @@ retry:
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
@@ -108,8 +87,7 @@ retry:
* try it one more time.
*/
if (build_id_matches < 1 && retry--) {
- bpf_link__destroy(link);
- bpf_object__close(obj);
+ test_stacktrace_build_id__destroy(skel);
printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
__func__);
goto retry;
@@ -117,17 +95,14 @@ retry:
if (CHECK(build_id_matches < 1, "build id match",
"Didn't find expected build ID from the map\n"))
- goto disable_pmu;
+ goto cleanup;
- stack_trace_len = PERF_MAX_STACK_DEPTH
- * sizeof(struct bpf_stack_build_id);
+ stack_trace_len = PERF_MAX_STACK_DEPTH *
+ sizeof(struct bpf_stack_build_id);
err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
"err %d errno %d\n", err, errno);
-disable_pmu:
- bpf_link__destroy(link);
-
-close_prog:
- bpf_object__close(obj);
+cleanup:
+ test_stacktrace_build_id__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f62aa0eb959b..8974450a4bdb 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
static __u64 read_perf_max_sample_freq(void)
{
@@ -16,19 +17,15 @@ static __u64 read_perf_max_sample_freq(void)
void test_stacktrace_build_id_nmi(void)
{
- int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "tracepoint/random/urandom_read";
- const char *file = "./test_stacktrace_build_id.o";
- int err, pmu_fd, prog_fd;
+ int control_map_fd, stackid_hmap_fd, stackmap_fd;
+ struct test_stacktrace_build_id *skel;
+ int err, pmu_fd;
struct perf_event_attr attr = {
.freq = 1,
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
__u32 key, previous_key, val, duration = 0;
- struct bpf_program *prog;
- struct bpf_object *obj;
- struct bpf_link *link;
char buf[256];
int i, j;
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -38,13 +35,16 @@ void test_stacktrace_build_id_nmi(void)
attr.sample_freq = read_perf_max_sample_freq();
retry:
- err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
- if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ skel = test_stacktrace_build_id__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
- goto close_prog;
+ /* override program type */
+ bpf_program__set_perf_event(skel->progs.oncpu);
+
+ err = test_stacktrace_build_id__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
@@ -52,40 +52,25 @@ retry:
if (CHECK(pmu_fd < 0, "perf_event_open",
"err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
pmu_fd, errno))
- goto close_prog;
+ goto cleanup;
- link = bpf_program__attach_perf_event(prog, pmu_fd);
- if (CHECK(IS_ERR(link), "attach_perf_event",
- "err %ld\n", PTR_ERR(link))) {
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
+ "err %ld\n", PTR_ERR(skel->links.oncpu))) {
close(pmu_fd);
- goto close_prog;
+ goto cleanup;
}
/* find map fds */
- control_map_fd = bpf_find_map(__func__, obj, "control_map");
- if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
- if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
-
- stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
- if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
- err, errno))
- goto disable_pmu;
-
- stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
- if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
- "err %d errno %d\n", err, errno))
- goto disable_pmu;
+ control_map_fd = bpf_map__fd(skel->maps.control_map);
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")))
- goto disable_pmu;
+ goto cleanup;
if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000")))
- goto disable_pmu;
+ goto cleanup;
/* disable stack trace collection */
key = 0;
val = 1;
@@ -97,23 +82,23 @@ retry:
err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = extract_build_id(buf, 256);
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
do {
char build_id[64];
@@ -121,7 +106,7 @@ retry:
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
- goto disable_pmu;
+ goto cleanup;
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
@@ -139,8 +124,7 @@ retry:
* try it one more time.
*/
if (build_id_matches < 1 && retry--) {
- bpf_link__destroy(link);
- bpf_object__close(obj);
+ test_stacktrace_build_id__destroy(skel);
printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
__func__);
goto retry;
@@ -148,7 +132,7 @@ retry:
if (CHECK(build_id_matches < 1, "build id match",
"Didn't find expected build ID from the map\n"))
- goto disable_pmu;
+ goto cleanup;
/*
* We intentionally skip compare_stack_ips(). This is because we
@@ -157,8 +141,6 @@ retry:
* BPF_STACK_BUILD_ID_IP;
*/
-disable_pmu:
- bpf_link__destroy(link);
-close_prog:
- bpf_object__close(obj);
+cleanup:
+ test_stacktrace_build_id__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
new file mode 100644
index 000000000000..7185bee16fe4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_xdp_perf(void)
+{
+ const char *file = "./xdp_dummy.o";
+ __u32 duration, retval, size;
+ struct bpf_object *obj;
+ char in[128], out[128];
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128,
+ out, &size, &retval, &duration);
+
+ CHECK(err || retval != XDP_PASS || size != 128,
+ "xdp-perf",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c
new file mode 100644
index 000000000000..65eac371b061
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___equiv_zero_sz_arr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c
new file mode 100644
index 000000000000..ecda2b545ac2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_bad_zero_sz_arr x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c
new file mode 100644
index 000000000000..fe1d01232c22
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___fixed_arr x) {}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 9311489e14b2..6d598cfbdb3e 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -327,6 +327,7 @@ struct core_reloc_arrays_output {
char b123;
int c1c;
int d00d;
+ int f10c;
};
struct core_reloc_arrays_substruct {
@@ -339,6 +340,7 @@ struct core_reloc_arrays {
char b[2][3][4];
struct core_reloc_arrays_substruct c[3];
struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
};
/* bigger array dimensions */
@@ -347,6 +349,7 @@ struct core_reloc_arrays___diff_arr_dim {
char b[3][4][5];
struct core_reloc_arrays_substruct c[4];
struct core_reloc_arrays_substruct d[2][3];
+ struct core_reloc_arrays_substruct f[1][3];
};
/* different size of array's value (struct) */
@@ -363,6 +366,29 @@ struct core_reloc_arrays___diff_arr_val_sz {
int d;
int __padding2;
} d[1][2];
+ struct {
+ int __padding1;
+ int c;
+ int __padding2;
+ } f[][2];
+};
+
+struct core_reloc_arrays___equiv_zero_sz_arr {
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ /* equivalent to flexible array */
+ struct core_reloc_arrays_substruct f[0][2];
+};
+
+struct core_reloc_arrays___fixed_arr {
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
+ /* not a flexible array anymore, but within access bounds */
+ struct core_reloc_arrays_substruct f[1][2];
};
struct core_reloc_arrays___err_too_small {
@@ -370,6 +396,7 @@ struct core_reloc_arrays___err_too_small {
char b[2][3][4];
struct core_reloc_arrays_substruct c[3];
struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
};
struct core_reloc_arrays___err_too_shallow {
@@ -377,6 +404,7 @@ struct core_reloc_arrays___err_too_shallow {
char b[2][3]; /* this one lacks one dimension */
struct core_reloc_arrays_substruct c[3];
struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
};
struct core_reloc_arrays___err_non_array {
@@ -384,6 +412,7 @@ struct core_reloc_arrays___err_non_array {
char b[2][3][4];
struct core_reloc_arrays_substruct c[3];
struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
};
struct core_reloc_arrays___err_wrong_val_type {
@@ -391,6 +420,16 @@ struct core_reloc_arrays___err_wrong_val_type {
char b[2][3][4];
int c[3]; /* value is not a struct */
struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
+};
+
+struct core_reloc_arrays___err_bad_zero_sz_arr {
+ /* zero-sized array, but not at the end */
+ struct core_reloc_arrays_substruct f[0][2];
+ int a[5];
+ char b[2][3][4];
+ struct core_reloc_arrays_substruct c[3];
+ struct core_reloc_arrays_substruct d[1][2];
};
/*
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 534621e38906..221b69700625 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -5,46 +5,36 @@
#include <linux/bpf.h>
#include "bpf_helpers.h"
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 4);
- __type(key, int);
- __type(value, int);
-} results_map SEC(".maps");
+int kprobe_res = 0;
+int kretprobe_res = 0;
+int uprobe_res = 0;
+int uretprobe_res = 0;
SEC("kprobe/sys_nanosleep")
-int handle_sys_nanosleep_entry(struct pt_regs *ctx)
+int handle_kprobe(struct pt_regs *ctx)
{
- const int key = 0, value = 1;
-
- bpf_map_update_elem(&results_map, &key, &value, 0);
+ kprobe_res = 1;
return 0;
}
SEC("kretprobe/sys_nanosleep")
-int handle_sys_getpid_return(struct pt_regs *ctx)
+int handle_kretprobe(struct pt_regs *ctx)
{
- const int key = 1, value = 2;
-
- bpf_map_update_elem(&results_map, &key, &value, 0);
+ kretprobe_res = 2;
return 0;
}
SEC("uprobe/trigger_func")
-int handle_uprobe_entry(struct pt_regs *ctx)
+int handle_uprobe(struct pt_regs *ctx)
{
- const int key = 2, value = 3;
-
- bpf_map_update_elem(&results_map, &key, &value, 0);
+ uprobe_res = 3;
return 0;
}
SEC("uretprobe/trigger_func")
-int handle_uprobe_return(struct pt_regs *ctx)
+int handle_uretprobe(struct pt_regs *ctx)
{
- const int key = 3, value = 4;
-
- bpf_map_update_elem(&results_map, &key, &value, 0);
+ uretprobe_res = 4;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c
new file mode 100644
index 000000000000..9bfc91d9d004
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_extern.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* non-existing BPF helper, to test dead code elimination */
+static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999;
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */
+extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak;
+extern bool CONFIG_BOOL __kconfig __weak;
+extern char CONFIG_CHAR __kconfig __weak;
+extern uint16_t CONFIG_USHORT __kconfig __weak;
+extern int CONFIG_INT __kconfig __weak;
+extern uint64_t CONFIG_ULONG __kconfig __weak;
+extern const char CONFIG_STR[8] __kconfig __weak;
+extern uint64_t CONFIG_MISSING __kconfig __weak;
+
+uint64_t kern_ver = -1;
+uint64_t bpf_syscall = -1;
+uint64_t tristate_val = -1;
+uint64_t bool_val = -1;
+uint64_t char_val = -1;
+uint64_t ushort_val = -1;
+uint64_t int_val = -1;
+uint64_t ulong_val = -1;
+char str_val[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+uint64_t missing_val = -1;
+
+SEC("raw_tp/sys_enter")
+int handle_sys_enter(struct pt_regs *ctx)
+{
+ int i;
+
+ kern_ver = LINUX_KERNEL_VERSION;
+ bpf_syscall = CONFIG_BPF_SYSCALL;
+ tristate_val = CONFIG_TRISTATE;
+ bool_val = CONFIG_BOOL;
+ char_val = CONFIG_CHAR;
+ ushort_val = CONFIG_USHORT;
+ int_val = CONFIG_INT;
+ ulong_val = CONFIG_ULONG;
+
+ for (i = 0; i < sizeof(CONFIG_STR); i++) {
+ str_val[i] = CONFIG_STR[i];
+ }
+
+ if (CONFIG_MISSING)
+ /* invalid, but dead code - never executed */
+ missing_val = bpf_missing_helper(ctx, 123);
+ else
+ missing_val = 0xDEADC0DE;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
index 89951b684282..053b86f6b53f 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -18,6 +18,7 @@ struct core_reloc_arrays_output {
char b123;
int c1c;
int d00d;
+ int f01c;
};
struct core_reloc_arrays_substruct {
@@ -30,6 +31,7 @@ struct core_reloc_arrays {
char b[2][3][4];
struct core_reloc_arrays_substruct c[3];
struct core_reloc_arrays_substruct d[1][2];
+ struct core_reloc_arrays_substruct f[][2];
};
#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
@@ -40,18 +42,16 @@ int test_core_arrays(void *ctx)
struct core_reloc_arrays *in = (void *)&data.in;
struct core_reloc_arrays_output *out = (void *)&data.out;
- /* in->a[2] */
if (CORE_READ(&out->a2, &in->a[2]))
return 1;
- /* in->b[1][2][3] */
if (CORE_READ(&out->b123, &in->b[1][2][3]))
return 1;
- /* in->c[1].c */
if (CORE_READ(&out->c1c, &in->c[1].c))
return 1;
- /* in->d[0][0].d */
if (CORE_READ(&out->d00d, &in->d[0][0].d))
return 1;
+ if (CORE_READ(&out->f01c, &in->f[0][1].c))
+ return 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index ea7d84f01235..b1f09f5bb1cf 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -62,7 +62,7 @@ struct {
goto done; \
})
-SEC("select_by_skb_data")
+SEC("sk_reuseport")
int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
{
__u32 linum, index = 0, flags = 0, index_zero = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index 2a9f4c736ebc..e18da87fe84f 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -17,6 +17,12 @@ int process(struct __sk_buff *skb)
}
skb->priority++;
skb->tstamp++;
+ skb->mark++;
+
+ if (skb->wire_len != 100)
+ return 1;
+ if (skb->gso_segs != 8)
+ return 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
new file mode 100644
index 000000000000..4f69aac5635f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct s {
+ int a;
+ long long b;
+} __attribute__((packed));
+
+int in1 = 0;
+long long in2 = 0;
+char in3 = '\0';
+long long in4 __attribute__((aligned(64))) = 0;
+struct s in5 = {};
+
+long long out2 = 0;
+char out3 = 0;
+long long out4 = 0;
+int out1 = 0;
+
+extern bool CONFIG_BPF_SYSCALL __kconfig;
+extern int LINUX_KERNEL_VERSION __kconfig;
+bool bpf_syscall = 0;
+int kern_ver = 0;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ static volatile struct s out5;
+
+ out1 = in1;
+ out2 = in2;
+ out3 = in3;
+ out4 = in4;
+ out5 = in5;
+
+ bpf_syscall = CONFIG_BPF_SYSCALL;
+ kern_ver = LINUX_KERNEL_VERSION;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c
deleted file mode 100644
index 7671909ee1cb..000000000000
--- a/tools/testing/selftests/bpf/test_cgroup_attach.c
+++ /dev/null
@@ -1,571 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/* eBPF example program:
- *
- * - Creates arraymap in kernel with 4 bytes keys and 8 byte values
- *
- * - Loads eBPF program
- *
- * The eBPF program accesses the map passed in to store two pieces of
- * information. The number of invocations of the program, which maps
- * to the number of packets received, is stored to key 0. Key 1 is
- * incremented on each iteration by the number of bytes stored in
- * the skb. The program also stores the number of received bytes
- * in the cgroup storage.
- *
- * - Attaches the new program to a cgroup using BPF_PROG_ATTACH
- *
- * - Every second, reads map[0] and map[1] to see how many bytes and
- * packets were seen on any socket of tasks in the given cgroup.
- */
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <assert.h>
-#include <sys/resource.h>
-#include <sys/time.h>
-#include <unistd.h>
-#include <linux/filter.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-
-#include "bpf_util.h"
-#include "bpf_rlimit.h"
-#include "cgroup_helpers.h"
-
-#define FOO "/foo"
-#define BAR "/foo/bar/"
-#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-#ifdef DEBUG
-#define debug(args...) printf(args)
-#else
-#define debug(args...)
-#endif
-
-static int prog_load(int verdict)
-{
- int ret;
- struct bpf_insn prog[] = {
- BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
-
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
- prog, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
-
- if (ret < 0) {
- log_err("Loading program");
- printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
- return 0;
- }
- return ret;
-}
-
-static int test_foo_bar(void)
-{
- int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0;
-
- allow_prog = prog_load(1);
- if (!allow_prog)
- goto err;
-
- drop_prog = prog_load(0);
- if (!drop_prog)
- goto err;
-
- if (setup_cgroup_environment())
- goto err;
-
- /* Create cgroup /foo, get fd, and join it */
- foo = create_and_get_cgroup(FOO);
- if (foo < 0)
- goto err;
-
- if (join_cgroup(FOO))
- goto err;
-
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to /foo");
- goto err;
- }
-
- debug("Attached DROP prog. This ping in cgroup /foo should fail...\n");
- assert(system(PING_CMD) != 0);
-
- /* Create cgroup /foo/bar, get fd, and join it */
- bar = create_and_get_cgroup(BAR);
- if (bar < 0)
- goto err;
-
- if (join_cgroup(BAR))
- goto err;
-
- debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n");
- assert(system(PING_CMD) != 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n");
- assert(system(PING_CMD) == 0);
-
- if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo/bar");
- goto err;
- }
-
- debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n"
- "This ping in cgroup /foo/bar should fail...\n");
- assert(system(PING_CMD) != 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo");
- goto err;
- }
-
- debug("Attached PASS from /foo/bar and detached DROP from /foo.\n"
- "This ping in cgroup /foo/bar should pass...\n");
- assert(system(PING_CMD) == 0);
-
- if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
- errno = 0;
- log_err("Unexpected success attaching prog to /foo/bar");
- goto err;
- }
-
- if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching program from /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) {
- errno = 0;
- log_err("Unexpected success in double detach from /foo");
- goto err;
- }
-
- if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
- log_err("Attaching non-overridable prog to /foo");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) {
- errno = 0;
- log_err("Unexpected success attaching non-overridable prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- errno = 0;
- log_err("Unexpected success attaching overridable prog to /foo/bar");
- goto err;
- }
-
- if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- errno = 0;
- log_err("Unexpected success attaching overridable prog to /foo");
- goto err;
- }
-
- if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) {
- log_err("Attaching different non-overridable prog to /foo");
- goto err;
- }
-
- goto out;
-
-err:
- rc = 1;
-
-out:
- close(foo);
- close(bar);
- cleanup_cgroup_environment();
- if (!rc)
- printf("#override:PASS\n");
- else
- printf("#override:FAIL\n");
- return rc;
-}
-
-static int map_fd = -1;
-
-static int prog_load_cnt(int verdict, int val)
-{
- int cgroup_storage_fd, percpu_cgroup_storage_fd;
-
- if (map_fd < 0)
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
- if (map_fd < 0) {
- printf("failed to create map '%s'\n", strerror(errno));
- return -1;
- }
-
- cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
- if (cgroup_storage_fd < 0) {
- printf("failed to create map '%s'\n", strerror(errno));
- return -1;
- }
-
- percpu_cgroup_storage_fd = bpf_create_map(
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
- if (percpu_cgroup_storage_fd < 0) {
- printf("failed to create map '%s'\n", strerror(errno));
- return -1;
- }
-
- struct bpf_insn prog[] = {
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */
- BPF_LD_MAP_FD(BPF_REG_1, map_fd),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
-
- BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_MOV64_IMM(BPF_REG_1, val),
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
-
- BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1),
- BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
-
- BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
- BPF_EXIT_INSN(),
- };
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
- int ret;
-
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
- prog, insns_cnt, "GPL", 0,
- bpf_log_buf, BPF_LOG_BUF_SIZE);
-
- if (ret < 0) {
- log_err("Loading program");
- printf("Output from verifier:\n%s\n-------\n", bpf_log_buf);
- return 0;
- }
- close(cgroup_storage_fd);
- return ret;
-}
-
-
-static int test_multiprog(void)
-{
- __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
- int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
- int drop_prog, allow_prog[6] = {}, rc = 0;
- unsigned long long value;
- int i = 0;
-
- for (i = 0; i < 6; i++) {
- allow_prog[i] = prog_load_cnt(1, 1 << i);
- if (!allow_prog[i])
- goto err;
- }
- drop_prog = prog_load_cnt(0, 1);
- if (!drop_prog)
- goto err;
-
- if (setup_cgroup_environment())
- goto err;
-
- cg1 = create_and_get_cgroup("/cg1");
- if (cg1 < 0)
- goto err;
- cg2 = create_and_get_cgroup("/cg1/cg2");
- if (cg2 < 0)
- goto err;
- cg3 = create_and_get_cgroup("/cg1/cg2/cg3");
- if (cg3 < 0)
- goto err;
- cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4");
- if (cg4 < 0)
- goto err;
- cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5");
- if (cg5 < 0)
- goto err;
-
- if (join_cgroup("/cg1/cg2/cg3/cg4/cg5"))
- goto err;
-
- if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Attaching prog to cg1");
- goto err;
- }
- if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Unexpected success attaching the same prog to cg1");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Attaching prog2 to cg1");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to cg2");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Attaching prog to cg3");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_OVERRIDE)) {
- log_err("Attaching prog to cg4");
- goto err;
- }
- if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) {
- log_err("Attaching prog to cg5");
- goto err;
- }
- assert(system(PING_CMD) == 0);
- assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
- assert(value == 1 + 2 + 8 + 32);
-
- /* query the number of effective progs in cg5 */
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
- NULL, NULL, &prog_cnt) == 0);
- assert(prog_cnt == 4);
- /* retrieve prog_ids of effective progs in cg5 */
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
- &attach_flags, prog_ids, &prog_cnt) == 0);
- assert(prog_cnt == 4);
- assert(attach_flags == 0);
- saved_prog_id = prog_ids[0];
- /* check enospc handling */
- prog_ids[0] = 0;
- prog_cnt = 2;
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
- &attach_flags, prog_ids, &prog_cnt) == -1 &&
- errno == ENOSPC);
- assert(prog_cnt == 4);
- /* check that prog_ids are returned even when buffer is too small */
- assert(prog_ids[0] == saved_prog_id);
- /* retrieve prog_id of single attached prog in cg5 */
- prog_ids[0] = 0;
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
- NULL, prog_ids, &prog_cnt) == 0);
- assert(prog_cnt == 1);
- assert(prog_ids[0] == saved_prog_id);
-
- /* detach bottom program and ping again */
- if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching prog from cg5");
- goto err;
- }
- value = 0;
- assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
- assert(system(PING_CMD) == 0);
- assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
- assert(value == 1 + 2 + 8 + 16);
-
- /* detach 3rd from bottom program and ping again */
- errno = 0;
- if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) {
- log_err("Unexpected success on detach from cg3");
- goto err;
- }
- if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching from cg3");
- goto err;
- }
- value = 0;
- assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
- assert(system(PING_CMD) == 0);
- assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
- assert(value == 1 + 2 + 16);
-
- /* detach 2nd from bottom program and ping again */
- if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) {
- log_err("Detaching prog from cg4");
- goto err;
- }
- value = 0;
- assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0);
- assert(system(PING_CMD) == 0);
- assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0);
- assert(value == 1 + 2 + 4);
-
- prog_cnt = 4;
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE,
- &attach_flags, prog_ids, &prog_cnt) == 0);
- assert(prog_cnt == 3);
- assert(attach_flags == 0);
- assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0,
- NULL, prog_ids, &prog_cnt) == 0);
- assert(prog_cnt == 0);
- goto out;
-err:
- rc = 1;
-
-out:
- for (i = 0; i < 6; i++)
- if (allow_prog[i] > 0)
- close(allow_prog[i]);
- close(cg1);
- close(cg2);
- close(cg3);
- close(cg4);
- close(cg5);
- cleanup_cgroup_environment();
- if (!rc)
- printf("#multi:PASS\n");
- else
- printf("#multi:FAIL\n");
- return rc;
-}
-
-static int test_autodetach(void)
-{
- __u32 prog_cnt = 4, attach_flags;
- int allow_prog[2] = {0};
- __u32 prog_ids[2] = {0};
- int cg = 0, i, rc = -1;
- void *ptr = NULL;
- int attempts;
-
- for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
- allow_prog[i] = prog_load_cnt(1, 1 << i);
- if (!allow_prog[i])
- goto err;
- }
-
- if (setup_cgroup_environment())
- goto err;
-
- /* create a cgroup, attach two programs and remember their ids */
- cg = create_and_get_cgroup("/cg_autodetach");
- if (cg < 0)
- goto err;
-
- if (join_cgroup("/cg_autodetach"))
- goto err;
-
- for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
- if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS,
- BPF_F_ALLOW_MULTI)) {
- log_err("Attaching prog[%d] to cg:egress", i);
- goto err;
- }
- }
-
- /* make sure that programs are attached and run some traffic */
- assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags,
- prog_ids, &prog_cnt) == 0);
- assert(system(PING_CMD) == 0);
-
- /* allocate some memory (4Mb) to pin the original cgroup */
- ptr = malloc(4 * (1 << 20));
- if (!ptr)
- goto err;
-
- /* close programs and cgroup fd */
- for (i = 0; i < ARRAY_SIZE(allow_prog); i++) {
- close(allow_prog[i]);
- allow_prog[i] = 0;
- }
-
- close(cg);
- cg = 0;
-
- /* leave the cgroup and remove it. don't detach programs */
- cleanup_cgroup_environment();
-
- /* wait for the asynchronous auto-detachment.
- * wait for no more than 5 sec and give up.
- */
- for (i = 0; i < ARRAY_SIZE(prog_ids); i++) {
- for (attempts = 5; attempts >= 0; attempts--) {
- int fd = bpf_prog_get_fd_by_id(prog_ids[i]);
-
- if (fd < 0)
- break;
-
- /* don't leave the fd open */
- close(fd);
-
- if (!attempts)
- goto err;
-
- sleep(1);
- }
- }
-
- rc = 0;
-err:
- for (i = 0; i < ARRAY_SIZE(allow_prog); i++)
- if (allow_prog[i] > 0)
- close(allow_prog[i]);
- if (cg)
- close(cg);
- free(ptr);
- cleanup_cgroup_environment();
- if (!rc)
- printf("#autodetach:PASS\n");
- else
- printf("#autodetach:FAIL\n");
- return rc;
-}
-
-int main(void)
-{
- int (*tests[])(void) = {
- test_foo_bar,
- test_multiprog,
- test_autodetach,
- };
- int errors = 0;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(tests); i++)
- if (tests[i]())
- errors++;
-
- if (errors)
- printf("test_cgroup_attach:FAIL\n");
- else
- printf("test_cgroup_attach:PASS\n");
-
- return errors ? EXIT_FAILURE : EXIT_SUCCESS;
-}
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index f0eb2727b766..6fe23a10d48a 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -1,12 +1,16 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#include <iostream>
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
+#include "test_core_extern.skel.h"
/* do nothing, just make sure we can link successfully */
int main(int argc, char *argv[])
{
+ struct test_core_extern *skel;
+
/* libbpf.h */
libbpf_set_print(NULL);
@@ -16,5 +20,11 @@ int main(int argc, char *argv[])
/* btf.h */
btf__new(NULL, 0);
+ /* BPF skeleton */
+ skel = test_core_extern__open_and_load();
+ test_core_extern__destroy(skel);
+
+ std::cout << "DONE!" << std::endl;
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/test_ftrace.sh b/tools/testing/selftests/bpf/test_ftrace.sh
new file mode 100755
index 000000000000..20de7bb873bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_ftrace.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+TR=/sys/kernel/debug/tracing/
+clear_trace() { # reset trace output
+ echo > $TR/trace
+}
+
+disable_tracing() { # stop trace recording
+ echo 0 > $TR/tracing_on
+}
+
+enable_tracing() { # start trace recording
+ echo 1 > $TR/tracing_on
+}
+
+reset_tracer() { # reset the current tracer
+ echo nop > $TR/current_tracer
+}
+
+disable_tracing
+clear_trace
+
+echo "" > $TR/set_ftrace_filter
+echo '*printk* *console* *wake* *serial* *lock*' > $TR/set_ftrace_notrace
+
+echo "bpf_prog_test*" > $TR/set_graph_function
+echo "" > $TR/set_graph_notrace
+
+echo function_graph > $TR/current_tracer
+
+enable_tracing
+./test_progs -t fentry
+./test_progs -t fexit
+disable_tracing
+clear_trace
+
+reset_tracer
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 8477df835979..de1fdaa4e7b4 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -100,6 +100,7 @@ extern struct ipv6_packet pkt_v6;
#define _CHECK(condition, tag, duration, format...) ({ \
int __ret = !!(condition); \
+ int __save_errno = errno; \
if (__ret) { \
test__fail(); \
printf("%s:FAIL:%s ", __func__, tag); \
@@ -108,15 +109,18 @@ extern struct ipv6_packet pkt_v6;
printf("%s:PASS:%s %d nsec\n", \
__func__, tag, duration); \
} \
+ errno = __save_errno; \
__ret; \
})
#define CHECK_FAIL(condition) ({ \
int __ret = !!(condition); \
+ int __save_errno = errno; \
if (__ret) { \
test__fail(); \
printf("%s:FAIL:%d\n", __func__, __LINE__); \
} \
+ errno = __save_errno; \
__ret; \
})
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index d27fd929abb9..87eaa49609a0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -408,10 +408,10 @@ static void update_map(int fd, int index)
assert(!bpf_map_update_elem(fd, &index, &value, 0));
}
-static int create_prog_dummy1(enum bpf_prog_type prog_type)
+static int create_prog_dummy_simple(enum bpf_prog_type prog_type, int ret)
{
struct bpf_insn prog[] = {
- BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, ret),
BPF_EXIT_INSN(),
};
@@ -419,14 +419,15 @@ static int create_prog_dummy1(enum bpf_prog_type prog_type)
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
}
-static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx)
+static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd,
+ int idx, int ret)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_3, idx),
BPF_LD_MAP_FD(BPF_REG_2, mfd),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 41),
+ BPF_MOV64_IMM(BPF_REG_0, ret),
BPF_EXIT_INSN(),
};
@@ -435,10 +436,9 @@ static int create_prog_dummy2(enum bpf_prog_type prog_type, int mfd, int idx)
}
static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
- int p1key)
+ int p1key, int p2key, int p3key)
{
- int p2key = 1;
- int mfd, p1fd, p2fd;
+ int mfd, p1fd, p2fd, p3fd;
mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
sizeof(int), max_elem, 0);
@@ -449,23 +449,24 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
return -1;
}
- p1fd = create_prog_dummy1(prog_type);
- p2fd = create_prog_dummy2(prog_type, mfd, p2key);
- if (p1fd < 0 || p2fd < 0)
- goto out;
+ p1fd = create_prog_dummy_simple(prog_type, 42);
+ p2fd = create_prog_dummy_loop(prog_type, mfd, p2key, 41);
+ p3fd = create_prog_dummy_simple(prog_type, 24);
+ if (p1fd < 0 || p2fd < 0 || p3fd < 0)
+ goto err;
if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
- goto out;
+ goto err;
if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
- goto out;
+ goto err;
+ if (bpf_map_update_elem(mfd, &p3key, &p3fd, BPF_ANY) < 0) {
+err:
+ close(mfd);
+ mfd = -1;
+ }
+ close(p3fd);
close(p2fd);
close(p1fd);
-
return mfd;
-out:
- close(p2fd);
- close(p1fd);
- close(mfd);
- return -1;
}
static int create_map_in_map(void)
@@ -684,7 +685,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
}
if (*fixup_prog1) {
- map_fds[4] = create_prog_array(prog_type, 4, 0);
+ map_fds[4] = create_prog_array(prog_type, 4, 0, 1, 2);
do {
prog[*fixup_prog1].imm = map_fds[4];
fixup_prog1++;
@@ -692,7 +693,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
}
if (*fixup_prog2) {
- map_fds[5] = create_prog_array(prog_type, 8, 7);
+ map_fds[5] = create_prog_array(prog_type, 8, 7, 1, 2);
do {
prog[*fixup_prog2].imm = map_fds[5];
fixup_prog2++;
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index ebcbf154c460..604b46151736 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -455,7 +455,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
/* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 3),
BPF_LD_MAP_FD(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
@@ -478,7 +478,7 @@
BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
BPF_EMIT_CALL(BPF_FUNC_sk_release),
/* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 3),
BPF_LD_MAP_FD(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
@@ -497,7 +497,7 @@
BPF_SK_LOOKUP(sk_lookup_tcp),
/* bpf_tail_call() */
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 3),
BPF_LD_MAP_FD(BPF_REG_2, 0),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
diff --git a/tools/testing/selftests/bpf/verifier/runtime_jit.c b/tools/testing/selftests/bpf/verifier/runtime_jit.c
index a9a8f620e71c..94c399d1faca 100644
--- a/tools/testing/selftests/bpf/verifier/runtime_jit.c
+++ b/tools/testing/selftests/bpf/verifier/runtime_jit.c
@@ -27,6 +27,19 @@
{
"runtime/jit: tail_call within bounds, no prog",
.insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 3),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "runtime/jit: tail_call within bounds, key 2",
+ .insns = {
BPF_MOV64_IMM(BPF_REG_3, 2),
BPF_LD_MAP_FD(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
@@ -35,9 +48,147 @@
},
.fixup_prog1 = { 1 },
.result = ACCEPT,
+ .retval = 24,
+},
+{
+ "runtime/jit: tail_call within bounds, key 2 / key 2, first branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 13),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5, 9 },
+ .result = ACCEPT,
+ .retval = 24,
+},
+{
+ "runtime/jit: tail_call within bounds, key 2 / key 2, second branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 14),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5, 9 },
+ .result = ACCEPT,
+ .retval = 24,
+},
+{
+ "runtime/jit: tail_call within bounds, key 0 / key 2, first branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 13),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5, 9 },
+ .result = ACCEPT,
+ .retval = 24,
+},
+{
+ "runtime/jit: tail_call within bounds, key 0 / key 2, second branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 14),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5, 9 },
+ .result = ACCEPT,
+ .retval = 42,
+},
+{
+ "runtime/jit: tail_call within bounds, different maps, first branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 13),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5 },
+ .fixup_prog2 = { 9 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "tail_call abusing map_ptr",
+ .result = ACCEPT,
.retval = 1,
},
{
+ "runtime/jit: tail_call within bounds, different maps, second branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 14),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5 },
+ .fixup_prog2 = { 9 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "tail_call abusing map_ptr",
+ .result = ACCEPT,
+ .retval = 42,
+},
+{
"runtime/jit: tail_call out of bounds",
.insns = {
BPF_MOV64_IMM(BPF_REG_3, 256),
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
new file mode 100755
index 000000000000..eff6393ce974
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for port-default priority. Non-IP packets ingress $swp1 and are
+# prioritized according to the default priority specified at the port.
+# rx_octets_prio_* counters are used to verify the prioritization.
+#
+# +-----------------------+
+# | H1 |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|------------------+
+# |
+# +----|------------------+
+# | SW | |
+# | + $swp1 |
+# | 192.0.2.2/28 |
+# | APP=<prio>,1,0 |
+# +-----------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_defprio
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=2
+: ${HIT_TIMEOUT:=1000} # ms
+source $lib_dir/lib.sh
+
+declare -a APP
+
+defprio_install()
+{
+ local dev=$1; shift
+ local prio=$1; shift
+ local app="app=$prio,1,0"
+
+ lldptool -T -i $dev -V APP $app >/dev/null
+ lldpad_app_wait_set $dev
+ APP[$prio]=$app
+}
+
+defprio_uninstall()
+{
+ local dev=$1; shift
+ local prio=$1; shift
+ local app=${APP[$prio]}
+
+ lldptool -T -i $dev -V APP -d $app >/dev/null
+ lldpad_app_wait_del
+ unset APP[$prio]
+}
+
+defprio_flush()
+{
+ local dev=$1; shift
+ local prio
+
+ if ((${#APP[@]})); then
+ lldptool -T -i $dev -V APP -d ${APP[@]} >/dev/null
+ fi
+ lldpad_app_wait_del
+ APP=()
+}
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ ip addr add dev $swp1 192.0.2.2/28
+}
+
+switch_destroy()
+{
+ defprio_flush $swp1
+ ip addr del dev $swp1 192.0.2.2/28
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+wait_for_packets()
+{
+ local t0=$1; shift
+ local prio_observe=$1; shift
+
+ local t1=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
+ local delta=$((t1 - t0))
+ echo $delta
+ ((delta >= 10))
+}
+
+__test_defprio()
+{
+ local prio_install=$1; shift
+ local prio_observe=$1; shift
+ local delta
+ local key
+ local i
+
+ RET=0
+
+ defprio_install $swp1 $prio_install
+
+ local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
+ mausezahn -q $h1 -d 100m -c 10 -t arp reply
+ delta=$(busywait "$HIT_TIMEOUT" wait_for_packets $t0 $prio_observe)
+
+ check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $delta."
+ log_test "Default priority $prio_install/$prio_observe"
+
+ defprio_uninstall $swp1 $prio_install
+}
+
+test_defprio()
+{
+ local prio
+
+ for prio in {0..7}; do
+ __test_defprio $prio $prio
+ done
+
+ defprio_install $swp1 3
+ __test_defprio 0 3
+ __test_defprio 1 3
+ __test_defprio 2 3
+ __test_defprio 4 4
+ __test_defprio 5 5
+ __test_defprio 6 6
+ __test_defprio 7 7
+ defprio_uninstall $swp1 3
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index e80be65799ad..a5937069ac16 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -24,24 +24,6 @@ rate()
echo $((8 * (t1 - t0) / interval))
}
-start_traffic()
-{
- local h_in=$1; shift # Where the traffic egresses the host
- local sip=$1; shift
- local dip=$1; shift
- local dmac=$1; shift
-
- $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
- -a own -b $dmac -t udp -q &
- sleep 1
-}
-
-stop_traffic()
-{
- # Suppress noise from killing mausezahn.
- { kill %% && wait %%; } 2>/dev/null
-}
-
check_rate()
{
local rate=$1; shift
@@ -96,3 +78,31 @@ measure_rate()
echo $ir $er
return $ret
}
+
+bail_on_lldpad()
+{
+ if systemctl is-active --quiet lldpad; then
+
+ cat >/dev/stderr <<-EOF
+ WARNING: lldpad is running
+
+ lldpad will likely configure DCB, and this test will
+ configure Qdiscs. mlxsw does not support both at the
+ same time, one of them is arbitrarily going to overwrite
+ the other. That will cause spurious failures (or,
+ unlikely, passes) of this test.
+ EOF
+
+ if [[ -z $ALLOW_LLDPAD ]]; then
+ cat >/dev/stderr <<-EOF
+
+ If you want to run the test anyway, please set
+ an environment variable ALLOW_LLDPAD to a
+ non-empty string.
+ EOF
+ exit 1
+ else
+ return
+ fi
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
new file mode 100755
index 000000000000..c9fc4d4885c1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A driver for the ETS selftest that implements testing in offloaded datapath.
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/sch_ets_core.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+ALL_TESTS="
+ ping_ipv4
+ priomap_mode
+ ets_test_strict
+ ets_test_mixed
+ ets_test_dwrr
+"
+
+switch_create()
+{
+ ets_switch_create
+
+ # Create a bottleneck so that the DWRR process can kick in.
+ ethtool -s $h2 speed 1000 autoneg off
+ ethtool -s $swp2 speed 1000 autoneg off
+
+ # Set the ingress quota high and use the three egress TCs to limit the
+ # amount of traffic that is admitted to the shared buffers. This makes
+ # sure that there is always enough traffic of all types to select from
+ # for the DWRR process.
+ devlink_port_pool_th_set $swp1 0 12
+ devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+ devlink_port_pool_th_set $swp2 4 12
+ devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+ devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+ devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
+
+ # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
+ # priorities at $swp1 based on their 802.1p headers. ingress-qos-map is
+ # not offloaded by mlxsw as of this writing, but the mapping used is
+ # 1:1, which is the mapping currently hard-coded by the driver.
+}
+
+switch_destroy()
+{
+ devlink_tc_bind_pool_th_restore $swp2 5 egress
+ devlink_tc_bind_pool_th_restore $swp2 6 egress
+ devlink_tc_bind_pool_th_restore $swp2 7 egress
+ devlink_port_pool_th_restore $swp2 4
+ devlink_tc_bind_pool_th_restore $swp1 0 ingress
+ devlink_port_pool_th_restore $swp1 0
+
+ ethtool -s $swp2 autoneg on
+ ethtool -s $h2 autoneg on
+
+ ets_switch_destroy
+}
+
+# Callback from sch_ets_tests.sh
+get_stats()
+{
+ local band=$1; shift
+
+ ethtool_stats_get "$h2" rx_octets_prio_$band
+}
+
+bail_on_lldpad
+ets_run
diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile
index e62f3d4f68da..78ae4aaf7141 100644
--- a/tools/testing/selftests/filesystems/epoll/Makefile
+++ b/tools/testing/selftests/filesystems/epoll/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -I../../../../../usr/include/
-LDFLAGS += -lpthread
+LDLIBS += -lpthread
TEST_GEN_PROGS := epoll_wakeup_test
include ../../lib.mk
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index b879305a766d..5b8c0fedee76 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -34,6 +34,12 @@ test_modprobe()
check_mods()
{
+ local uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "skip all tests: must be run as root" >&2
+ exit $ksft_skip
+ fi
+
trap "test_modprobe" EXIT
if [ ! -d $DIR ]; then
modprobe test_firmware
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 36fb59f886ea..1a52f2883fe0 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -3,6 +3,8 @@
# description: ftrace - stacktrace filter command
# flags: instance
+[ ! -f set_ftrace_filter ] && exit_unsupported
+
echo _do_fork:stacktrace >> set_ftrace_filter
grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
index 86a1f07ef2ca..71fa3f49e35e 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_cpumask.tc
@@ -15,6 +15,11 @@ if [ $NP -eq 1 ] ;then
exit_unresolved
fi
+if ! grep -q "function" available_tracers ; then
+ echo "Function trace is not enabled"
+ exit_unsupported
+fi
+
ORIG_CPUMASK=`cat tracing_cpumask`
do_reset() {
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 86986c4bba54..5d4550591ff9 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -46,6 +46,9 @@ reset_events_filter() { # reset all current setting filters
}
reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
+ if [ ! -f set_ftrace_filter ]; then
+ return 0
+ fi
echo > set_ftrace_filter
grep -v '^#' set_ftrace_filter | while read t; do
tr=`echo $t | cut -d: -f2`
@@ -93,7 +96,7 @@ initialize_ftrace() { # Reset ftrace to initial-state
disable_events
[ -f set_event_pid ] && echo > set_event_pid
[ -f set_ftrace_pid ] && echo > set_ftrace_pid
- [ -f set_ftrace_filter ] && echo | tee set_ftrace_*
+ [ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
[ -f set_graph_function ] && echo | tee set_graph_*
[ -f stack_trace_filter ] && echo > stack_trace_filter
[ -f kprobe_events ] && echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 5862eee91e1d..6e3dbe5f96b7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -20,9 +20,9 @@ while read i; do
test $N -eq 256 && break
done
-L=`wc -l kprobe_events`
-if [ $L -ne $N ]; then
- echo "The number of kprobes events ($L) is not $N"
+L=`cat kprobe_events | wc -l`
+if [ $L -ne 256 ]; then
+ echo "The number of kprobes events ($L) is not 256"
exit_fail
fi
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
index 1221240f8cf6..3f2aee115f6e 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-action-hist-xfail.tc
@@ -21,10 +21,10 @@ grep -q "snapshot()" README || exit_unsupported # version issue
echo "Test expected snapshot action failure"
-echo 'hist:keys=comm:onmatch(sched.sched_wakeup).snapshot()' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger && exit_fail
+echo 'hist:keys=comm:onmatch(sched.sched_wakeup).snapshot()' >> events/sched/sched_waking/trigger && exit_fail
echo "Test expected save action failure"
-echo 'hist:keys=comm:onmatch(sched.sched_wakeup).save(comm,prio)' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger && exit_fail
+echo 'hist:keys=comm:onmatch(sched.sched_wakeup).save(comm,prio)' >> events/sched/sched_waking/trigger && exit_fail
exit_xfail
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
index 064a284e4e75..c80007aa9f86 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
@@ -16,7 +16,7 @@ grep -q "onchange(var)" README || exit_unsupported # version issue
echo "Test onchange action"
-echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio) if comm=="ping"' >> events/sched/sched_waking/trigger
ping $LOCALHOST -c 3
nice -n 1 ping $LOCALHOST -c 3
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
index 18fff69fc433..f546c1b66a9b 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
@@ -23,9 +23,9 @@ grep -q "snapshot()" README || exit_unsupported # version issue
echo "Test snapshot action"
-echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+echo 1 > events/sched/enable
-echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+echo 'hist:keys=comm:newprio=prio:onchange($newprio).save(comm,prio):onchange($newprio).snapshot() if comm=="ping"' >> events/sched/sched_waking/trigger
ping $LOCALHOST -c 3
nice -n 1 ping $LOCALHOST -c 3
diff --git a/tools/testing/selftests/kselftest/module.sh b/tools/testing/selftests/kselftest/module.sh
index 18e1c7992d30..fb4733faff12 100755
--- a/tools/testing/selftests/kselftest/module.sh
+++ b/tools/testing/selftests/kselftest/module.sh
@@ -9,7 +9,7 @@
#
# #!/bin/sh
# SPDX-License-Identifier: GPL-2.0+
-# $(dirname $0)/../kselftest_module.sh "description" module_name
+# $(dirname $0)/../kselftest/module.sh "description" module_name
#
# Example: tools/testing/selftests/lib/printf.sh
diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl
index ec7e48118183..31f7c2a0a8bd 100755
--- a/tools/testing/selftests/kselftest/prefix.pl
+++ b/tools/testing/selftests/kselftest/prefix.pl
@@ -3,6 +3,7 @@
# Prefix all lines with "# ", unbuffered. Command being piped in may need
# to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd".
use strict;
+use IO::Handle;
binmode STDIN;
binmode STDOUT;
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 84de7bc74f2c..a8d20cbb711c 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -79,6 +79,7 @@ run_one()
if [ $rc -eq $skip_rc ]; then \
echo "not ok $test_num $TEST_HDR_MSG # SKIP"
elif [ $rc -eq $timeout_rc ]; then \
+ echo "#"
echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT"
else
echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 31eb09e38729..a6e3d5517a6f 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -7,6 +7,9 @@
MAX_RETRIES=600
RETRY_INTERVAL=".1" # seconds
+# Kselftest framework requirement - SKIP code is 4
+ksft_skip=4
+
# log(msg) - write message to kernel log
# msg - insightful words
function log() {
@@ -18,7 +21,16 @@ function log() {
function skip() {
log "SKIP: $1"
echo "SKIP: $1" >&2
- exit 4
+ exit $ksft_skip
+}
+
+# root test
+function is_root() {
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "skip all tests: must be run as root" >&2
+ exit $ksft_skip
+ fi
}
# die(msg) - game over, man
@@ -62,6 +74,7 @@ function set_ftrace_enabled() {
# for verbose livepatching output and turn on
# the ftrace_enabled sysctl.
function setup_config() {
+ is_root
push_config
set_dynamic_debug
set_ftrace_enabled 1
diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh
index dc2908c22c26..a08212708115 100755
--- a/tools/testing/selftests/livepatch/test-state.sh
+++ b/tools/testing/selftests/livepatch/test-state.sh
@@ -8,8 +8,7 @@ MOD_LIVEPATCH=test_klp_state
MOD_LIVEPATCH2=test_klp_state2
MOD_LIVEPATCH3=test_klp_state3
-set_dynamic_debug
-
+setup_config
# TEST: Loading and removing a module that modifies the system state
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 9fd3a0b97f0d..fb5c55dd6df8 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -32,12 +32,17 @@
# lo2: 127.0.0.1/8, ::1/128
# 172.16.2.2/32, 2001:db8:2::2/128
#
+# ns-A to ns-C connection - only for VRF and same config
+# as ns-A to ns-B
+#
# server / client nomenclature relative to ns-A
VERBOSE=0
NSA_DEV=eth1
+NSA_DEV2=eth2
NSB_DEV=eth1
+NSC_DEV=eth2
VRF=red
VRF_TABLE=1101
@@ -45,17 +50,22 @@ VRF_TABLE=1101
NSA_IP=172.16.1.1
NSB_IP=172.16.1.2
VRF_IP=172.16.3.1
+NS_NET=172.16.1.0/24
# IPv6 config
NSA_IP6=2001:db8:1::1
NSB_IP6=2001:db8:1::2
VRF_IP6=2001:db8:3::1
+NS_NET6=2001:db8:1::/120
NSA_LO_IP=172.16.2.1
NSB_LO_IP=172.16.2.2
NSA_LO_IP6=2001:db8:2::1
NSB_LO_IP6=2001:db8:2::2
+MD5_PW=abc123
+MD5_WRONG_PW=abc1234
+
MCAST=ff02::1
# set after namespace create
NSA_LINKIP6=
@@ -63,9 +73,11 @@ NSB_LINKIP6=
NSA=ns-A
NSB=ns-B
+NSC=ns-C
NSA_CMD="ip netns exec ${NSA}"
NSB_CMD="ip netns exec ${NSB}"
+NSC_CMD="ip netns exec ${NSC}"
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
@@ -195,6 +207,11 @@ run_cmd_nsb()
do_run_cmd ${NSB_CMD} $*
}
+run_cmd_nsc()
+{
+ do_run_cmd ${NSC_CMD} $*
+}
+
setup_cmd()
{
local cmd="$*"
@@ -401,6 +418,7 @@ cleanup()
fi
ip netns del ${NSB}
+ ip netns del ${NSC} >/dev/null 2>&1
}
setup()
@@ -432,6 +450,12 @@ setup()
ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV}
ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV}
+
+ # some VRF tests use ns-C which has the same config as
+ # ns-B but for a device NOT in the VRF
+ create_ns ${NSC} "-" "-"
+ connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
+ ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
else
ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV}
ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV}
@@ -714,6 +738,218 @@ ipv4_ping()
################################################################################
# IPv4 TCP
+#
+# MD5 tests without VRF
+#
+ipv4_tcp_md5_novrf()
+{
+ #
+ # single address
+ #
+
+ # basic use case
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: Single address config"
+
+ # client sends MD5, server not configured
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -s &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: Server no config, client uses password"
+
+ # wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: Client uses wrong password"
+
+ # client from different address
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -s -M ${MD5_PW} -r ${NSB_LO_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: Client address does not match address configured with password"
+
+ #
+ # MD5 extension - prefix length
+ #
+
+ # client in prefix
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: Prefix config"
+
+ # client in prefix, wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: Prefix config, client uses wrong password"
+
+ # client outside of prefix
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
+}
+
+#
+# MD5 tests with VRF
+#
+ipv4_tcp_md5()
+{
+ #
+ # single address
+ #
+
+ # basic use case
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Single address config"
+
+ # client sends MD5, server not configured
+ log_start
+ show_hint "Should timeout since server does not have MD5 auth"
+ run_cmd nettest -s -d ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Server no config, client uses password"
+
+ # wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Client uses wrong password"
+
+ # client from different address
+ log_start
+ show_hint "Should timeout since server config differs from client"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
+
+ #
+ # MD5 extension - prefix length
+ #
+
+ # client in prefix
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Prefix config"
+
+ # client in prefix, wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
+
+ # client outside of prefix
+ log_start
+ show_hint "Should timeout since client address is outside of prefix"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
+
+ #
+ # duplicate config between default VRF and a VRF
+ #
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
+
+ log_start
+ show_hint "Should timeout since client in default VRF uses VRF password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
+
+ log_start
+ show_hint "Should timeout since client in VRF uses default VRF password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
+
+ log_start
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
+
+ log_start
+ show_hint "Should timeout since client in default VRF uses VRF password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
+
+ log_start
+ show_hint "Should timeout since client in VRF uses default VRF password"
+ run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
+
+ #
+ # negative tests
+ #
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP}
+ log_test $? 1 "MD5: VRF: Device must be a VRF - single address"
+
+ log_start
+ run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET}
+ log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
+
+}
+
ipv4_tcp_novrf()
{
local a
@@ -831,6 +1067,8 @@ ipv4_tcp_novrf()
show_hint "Should fail 'Connection refused'"
run_cmd nettest -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 1 "No server, device client, local conn"
+
+ ipv4_tcp_md5_novrf
}
ipv4_tcp_vrf()
@@ -883,6 +1121,9 @@ ipv4_tcp_vrf()
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, local connection"
+ # run MD5 tests
+ ipv4_tcp_md5
+
#
# enable VRF global server
#
@@ -924,8 +1165,8 @@ ipv4_tcp_vrf()
for a in ${NSA_IP} ${VRF_IP}
do
log_start
- show_hint "Should fail 'No route to host' since client is not bound to VRF"
- run_cmd nettest -s -2 ${VRF} &
+ show_hint "Should fail 'Connection refused' since client is not bound to VRF"
+ run_cmd nettest -s -d ${VRF} &
sleep 1
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "Global server, local connection"
@@ -1961,6 +2202,218 @@ ipv6_ping()
################################################################################
# IPv6 TCP
+#
+# MD5 tests without VRF
+#
+ipv6_tcp_md5_novrf()
+{
+ #
+ # single address
+ #
+
+ # basic use case
+ log_start
+ run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: Single address config"
+
+ # client sends MD5, server not configured
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -6 -s &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: Server no config, client uses password"
+
+ # wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: Client uses wrong password"
+
+ # client from different address
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_LO_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: Client address does not match address configured with password"
+
+ #
+ # MD5 extension - prefix length
+ #
+
+ # client in prefix
+ log_start
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: Prefix config"
+
+ # client in prefix, wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: Prefix config, client uses wrong password"
+
+ # client outside of prefix
+ log_start
+ show_hint "Should timeout due to MD5 mismatch"
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
+}
+
+#
+# MD5 tests with VRF
+#
+ipv6_tcp_md5()
+{
+ #
+ # single address
+ #
+
+ # basic use case
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Single address config"
+
+ # client sends MD5, server not configured
+ log_start
+ show_hint "Should timeout since server does not have MD5 auth"
+ run_cmd nettest -6 -s -d ${VRF} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Server no config, client uses password"
+
+ # wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Client uses wrong password"
+
+ # client from different address
+ log_start
+ show_hint "Should timeout since server config differs from client"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
+
+ #
+ # MD5 extension - prefix length
+ #
+
+ # client in prefix
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Prefix config"
+
+ # client in prefix, wrong password
+ log_start
+ show_hint "Should timeout since client uses wrong password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
+
+ # client outside of prefix
+ log_start
+ show_hint "Should timeout since client address is outside of prefix"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
+
+ #
+ # duplicate config between default VRF and a VRF
+ #
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
+
+ log_start
+ show_hint "Should timeout since client in default VRF uses VRF password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
+
+ log_start
+ show_hint "Should timeout since client in VRF uses default VRF password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
+
+ log_start
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
+
+ log_start
+ show_hint "Should timeout since client in default VRF uses VRF password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
+
+ log_start
+ show_hint "Should timeout since client in VRF uses default VRF password"
+ run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
+ sleep 1
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
+
+ #
+ # negative tests
+ #
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP6}
+ log_test $? 1 "MD5: VRF: Device must be a VRF - single address"
+
+ log_start
+ run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6}
+ log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
+
+}
+
ipv6_tcp_novrf()
{
local a
@@ -2077,6 +2530,8 @@ ipv6_tcp_novrf()
run_cmd nettest -6 -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 1 "No server, device client, local conn"
done
+
+ ipv6_tcp_md5_novrf
}
ipv6_tcp_vrf()
@@ -2145,6 +2600,9 @@ ipv6_tcp_vrf()
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, local connection"
+ # run MD5 tests
+ ipv6_tcp_md5
+
#
# enable VRF global server
#
@@ -2205,8 +2663,8 @@ ipv6_tcp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- show_hint "Fails 'No route to host' since client is not in VRF"
- run_cmd nettest -6 -s -2 ${VRF} &
+ show_hint "Fails 'Connection refused' since client is not in VRF"
+ run_cmd nettest -6 -s -d ${VRF} &
sleep 1
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Global server, local connection"
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 1f64e7348f69..8dc5fac98cbc 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -225,6 +225,29 @@ log_info()
echo "INFO: $msg"
}
+busywait()
+{
+ local timeout=$1; shift
+
+ local start_time="$(date -u +%s%3N)"
+ while true
+ do
+ local out
+ out=$("$@")
+ local ret=$?
+ if ((!ret)); then
+ echo -n "$out"
+ return 0
+ fi
+
+ local current_time="$(date -u +%s%3N)"
+ if ((current_time - start_time > timeout)); then
+ echo -n "$out"
+ return 1
+ fi
+ done
+}
+
setup_wait_dev()
{
local dev=$1; shift
@@ -1065,3 +1088,21 @@ flood_test()
flood_unicast_test $br_port $host1_if $host2_if
flood_multicast_test $br_port $host1_if $host2_if
}
+
+start_traffic()
+{
+ local h_in=$1; shift # Where the traffic egresses the host
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+
+ $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
+ -a own -b $dmac -t udp -q &
+ sleep 1
+}
+
+stop_traffic()
+{
+ # Suppress noise from killing mausezahn.
+ { kill %% && wait %%; } 2>/dev/null
+}
diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/net/forwarding/loopback.sh
index 6e4626ae71b0..8f4057310b5b 100755
--- a/tools/testing/selftests/net/forwarding/loopback.sh
+++ b/tools/testing/selftests/net/forwarding/loopback.sh
@@ -1,6 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
ALL_TESTS="loopback_test"
NUM_NETIFS=2
source tc_common.sh
@@ -72,6 +75,11 @@ setup_prepare()
h1_create
h2_create
+
+ if ethtool -k $h1 | grep loopback | grep -q fixed; then
+ log_test "SKIP: dev $h1 does not support loopback feature"
+ exit $ksft_skip
+ fi
}
cleanup()
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index a75cb51cc5bd..057f91b05098 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,9 +1,23 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6"
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ sip_in_class_e
+ mc_mac_mismatch
+ ipv4_sip_equal_dip
+ ipv6_sip_equal_dip
+ ipv4_dip_link_local
+"
+
NUM_NETIFS=4
source lib.sh
+source tc_common.sh
+
+require_command $MCD
+require_command $MC_CLI
+table_name=selftests
h1_create()
{
@@ -64,6 +78,8 @@ router_create()
ip link set dev $rp1 up
ip link set dev $rp2 up
+ tc qdisc add dev $rp2 clsact
+
ip address add 192.0.2.1/24 dev $rp1
ip address add 2001:db8:1::1/64 dev $rp1
@@ -79,10 +95,31 @@ router_destroy()
ip address del 2001:db8:1::1/64 dev $rp1
ip address del 192.0.2.1/24 dev $rp1
+ tc qdisc del dev $rp2 clsact
+
ip link set dev $rp2 down
ip link set dev $rp1 down
}
+start_mcd()
+{
+ SMCROUTEDIR="$(mktemp -d)"
+
+ for ((i = 1; i <= $NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ $SMCROUTEDIR/$table_name.conf
+ done
+
+ $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
+ -P $SMCROUTEDIR/$table_name.pid
+}
+
+kill_mcd()
+{
+ pkill $MCD
+ rm -rf $SMCROUTEDIR
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -91,6 +128,10 @@ setup_prepare()
rp2=${NETIFS[p3]}
h2=${NETIFS[p4]}
+ rp1mac=$(mac_get $rp1)
+
+ start_mcd
+
vrf_prepare
h1_create
@@ -113,6 +154,8 @@ cleanup()
h1_destroy
vrf_cleanup
+
+ kill_mcd
}
ping_ipv4()
@@ -125,6 +168,150 @@ ping_ipv6()
ping6_test $h1 2001:db8:2::2
}
+sip_in_class_e()
+{
+ RET=0
+
+ # Disable rpfilter to prevent packets to be dropped because of it.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$rp1.rp_filter 0
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower src_ip 240.0.0.1 ip_proto udp action pass
+
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \
+ -A 240.0.0.1 -b $rp1mac -B 198.51.100.2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "Source IP in class E"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ sysctl_restore net.ipv4.conf.$rp1.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+create_mcast_sg()
+{
+ local if_name=$1; shift
+ local s_addr=$1; shift
+ local mcast=$1; shift
+ local dest_ifs=${@}
+
+ $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+}
+
+delete_mcast_sg()
+{
+ local if_name=$1; shift
+ local s_addr=$1; shift
+ local mcast=$1; shift
+ local dest_ifs=${@}
+
+ $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+}
+
+__mc_mac_mismatch()
+{
+ local desc=$1; shift
+ local proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local flags=${1:-""}; shift
+ local dmac=01:02:03:04:05:06
+
+ RET=0
+
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower dst_ip $dip action pass
+
+ create_mcast_sg $rp1 $sip $dip $rp2
+
+ $MZ $flags $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b $dmac \
+ -B $dip -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "Multicast MAC mismatch: $desc"
+
+ delete_mcast_sg $rp1 $sip $dip $rp2
+ tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower
+}
+
+mc_mac_mismatch()
+{
+ __mc_mac_mismatch "IPv4" "ip" 192.0.2.2 225.1.2.3
+ __mc_mac_mismatch "IPv6" "ipv6" 2001:db8:1::2 ff0e::3 "-6"
+}
+
+ipv4_sip_equal_dip()
+{
+ RET=0
+
+ # Disable rpfilter to prevent packets to be dropped because of it.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$rp1.rp_filter 0
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower src_ip 198.51.100.2 action pass
+
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \
+ -A 198.51.100.2 -b $rp1mac -B 198.51.100.2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "Source IP is equal to destination IP: IPv4"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ sysctl_restore net.ipv4.conf.$rp1.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+ipv6_sip_equal_dip()
+{
+ RET=0
+
+ tc filter add dev $rp2 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:2::2 action pass
+
+ $MZ -6 $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec \
+ -A 2001:db8:2::2 -b $rp1mac -B 2001:db8:2::2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "Source IP is equal to destination IP: IPv6"
+
+ tc filter del dev $rp2 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+ipv4_dip_link_local()
+{
+ local dip=169.254.1.1
+
+ RET=0
+
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 \
+ flower dst_ip $dip action pass
+
+ ip neigh add 169.254.1.1 lladdr 00:11:22:33:44:55 dev $rp2
+ ip route add 169.254.1.0/24 dev $rp2
+
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b $rp1mac -B $dip -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "IPv4 destination IP is link-local"
+
+ ip route del 169.254.1.0/24 dev $rp2
+ ip neigh del 169.254.1.1 lladdr 00:11:22:33:44:55 dev $rp2
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
index fef88eb4b873..fa6a88c50750 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -36,7 +36,7 @@ h2_destroy()
{
ip -6 route del 2001:db8:1::/64 vrf v$h2
ip -4 route del 192.0.2.0/28 vrf v$h2
- simple_if_fini $h2 192.0.2.130/28
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
}
router_create()
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
new file mode 100755
index 000000000000..40e0ad1bc4f2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A driver for the ETS selftest that implements testing in slowpath.
+lib_dir=.
+source sch_ets_core.sh
+
+ALL_TESTS="
+ ping_ipv4
+ priomap_mode
+ ets_test_strict
+ ets_test_mixed
+ ets_test_dwrr
+ classifier_mode
+ ets_test_strict
+ ets_test_mixed
+ ets_test_dwrr
+"
+
+switch_create()
+{
+ ets_switch_create
+
+ # Create a bottleneck so that the DWRR process can kick in.
+ tc qdisc add dev $swp2 root handle 1: tbf \
+ rate 1Gbit burst 1Mbit latency 100ms
+ PARENT="parent 1:"
+}
+
+switch_destroy()
+{
+ ets_switch_destroy
+ tc qdisc del dev $swp2 root
+}
+
+# Callback from sch_ets_tests.sh
+get_stats()
+{
+ local stream=$1; shift
+
+ link_stats_get $h2.1$stream rx bytes
+}
+
+ets_run
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
new file mode 100644
index 000000000000..f906fcc66572
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
@@ -0,0 +1,300 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is a template for ETS Qdisc test.
+#
+# This test sends from H1 several traffic streams with 802.1p-tagged packets.
+# The tags are used at $swp1 to prioritize the traffic. Each stream is then
+# queued at a different ETS band according to the assigned priority. After
+# runnig for a while, counters at H2 are consulted to determine whether the
+# traffic scheduling was according to the ETS configuration.
+#
+# This template is supposed to be embedded by a test driver, which implements
+# statistics collection, any HW-specific stuff, and prominently configures the
+# system to assure that there is overcommitment at $swp2. That is necessary so
+# that the ETS traffic selection algorithm kicks in and has to schedule some
+# traffic at the expense of other.
+#
+# A driver for veth-based testing is in sch_ets.sh, an example of a driver for
+# an offloaded data path is in selftests/drivers/net/mlxsw/sch_ets.sh.
+#
+# +---------------------------------------------------------------------+
+# | H1 |
+# | + $h1.10 + $h1.11 + $h1.12 |
+# | | 192.0.2.1/28 | 192.0.2.17/28 | 192.0.2.33/28 |
+# | | egress-qos-map | egress-qos-map | egress-qos-map |
+# | | 0:0 | 0:1 | 0:2 |
+# | \____________________ | ____________________/ |
+# | \|/ |
+# | + $h1 |
+# +---------------------------|-----------------------------------------+
+# |
+# +---------------------------|-----------------------------------------+
+# | SW + $swp1 |
+# | | >1Gbps |
+# | ____________________/|\____________________ |
+# | / | \ |
+# | +--|----------------+ +--|----------------+ +--|----------------+ |
+# | | + $swp1.10 | | + $swp1.11 | | + $swp1.12 | |
+# | | ingress-qos-map| | ingress-qos-map| | ingress-qos-map| |
+# | | 0:0 1:1 2:2 | | 0:0 1:1 2:2 | | 0:0 1:1 2:2 | |
+# | | | | | | | |
+# | | BR10 | | BR11 | | BR12 | |
+# | | | | | | | |
+# | | + $swp2.10 | | + $swp2.11 | | + $swp2.12 | |
+# | +--|----------------+ +--|----------------+ +--|----------------+ |
+# | \____________________ | ____________________/ |
+# | \|/ |
+# | + $swp2 |
+# | | 1Gbps (ethtool or HTB qdisc) |
+# | | qdisc ets quanta $W0 $W1 $W2 |
+# | | priomap 0 1 2 |
+# +---------------------------|-----------------------------------------+
+# |
+# +---------------------------|-----------------------------------------+
+# | H2 + $h2 |
+# | ____________________/|\____________________ |
+# | / | \ |
+# | + $h2.10 + $h2.11 + $h2.12 |
+# | 192.0.2.2/28 192.0.2.18/28 192.0.2.34/28 |
+# +---------------------------------------------------------------------+
+
+NUM_NETIFS=4
+CHECK_TC=yes
+source $lib_dir/lib.sh
+source $lib_dir/sch_ets_tests.sh
+
+PARENT=root
+QDISC_DEV=
+
+sip()
+{
+ echo 192.0.2.$((16 * $1 + 1))
+}
+
+dip()
+{
+ echo 192.0.2.$((16 * $1 + 2))
+}
+
+# Callback from sch_ets_tests.sh
+ets_start_traffic()
+{
+ local dst_mac=$(mac_get $h2)
+ local i=$1; shift
+
+ start_traffic $h1.1$i $(sip $i) $(dip $i) $dst_mac
+}
+
+ETS_CHANGE_QDISC=
+
+priomap_mode()
+{
+ echo "Running in priomap mode"
+ ets_delete_qdisc
+ ETS_CHANGE_QDISC=ets_change_qdisc_priomap
+}
+
+classifier_mode()
+{
+ echo "Running in classifier mode"
+ ets_delete_qdisc
+ ETS_CHANGE_QDISC=ets_change_qdisc_classifier
+}
+
+ets_change_qdisc_priomap()
+{
+ local dev=$1; shift
+ local nstrict=$1; shift
+ local priomap=$1; shift
+ local quanta=("${@}")
+
+ local op=$(if [[ -n $QDISC_DEV ]]; then echo change; else echo add; fi)
+
+ tc qdisc $op dev $dev $PARENT handle 10: ets \
+ $(if ((nstrict)); then echo strict $nstrict; fi) \
+ $(if ((${#quanta[@]})); then echo quanta ${quanta[@]}; fi) \
+ priomap $priomap
+ QDISC_DEV=$dev
+}
+
+ets_change_qdisc_classifier()
+{
+ local dev=$1; shift
+ local nstrict=$1; shift
+ local priomap=$1; shift
+ local quanta=("${@}")
+
+ local op=$(if [[ -n $QDISC_DEV ]]; then echo change; else echo add; fi)
+
+ tc qdisc $op dev $dev $PARENT handle 10: ets \
+ $(if ((nstrict)); then echo strict $nstrict; fi) \
+ $(if ((${#quanta[@]})); then echo quanta ${quanta[@]}; fi)
+
+ if [[ $op == add ]]; then
+ local prio=0
+ local band
+
+ for band in $priomap; do
+ tc filter add dev $dev parent 10: basic \
+ match "meta(priority eq $prio)" \
+ flowid 10:$((band + 1))
+ ((prio++))
+ done
+ fi
+ QDISC_DEV=$dev
+}
+
+# Callback from sch_ets_tests.sh
+ets_change_qdisc()
+{
+ if [[ -z "$ETS_CHANGE_QDISC" ]]; then
+ exit 1
+ fi
+ $ETS_CHANGE_QDISC "$@"
+}
+
+ets_delete_qdisc()
+{
+ if [[ -n $QDISC_DEV ]]; then
+ tc qdisc del dev $QDISC_DEV $PARENT
+ QDISC_DEV=
+ fi
+}
+
+h1_create()
+{
+ local i;
+
+ simple_if_init $h1
+ mtu_set $h1 9900
+ for i in {0..2}; do
+ vlan_create $h1 1$i v$h1 $(sip $i)/28
+ ip link set dev $h1.1$i type vlan egress 0:$i
+ done
+}
+
+h1_destroy()
+{
+ local i
+
+ for i in {0..2}; do
+ vlan_destroy $h1 1$i
+ done
+ mtu_restore $h1
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ local i
+
+ simple_if_init $h2
+ mtu_set $h2 9900
+ for i in {0..2}; do
+ vlan_create $h2 1$i v$h2 $(dip $i)/28
+ done
+}
+
+h2_destroy()
+{
+ local i
+
+ for i in {0..2}; do
+ vlan_destroy $h2 1$i
+ done
+ mtu_restore $h2
+ simple_if_fini $h2
+}
+
+ets_switch_create()
+{
+ local i
+
+ ip link set dev $swp1 up
+ mtu_set $swp1 9900
+
+ ip link set dev $swp2 up
+ mtu_set $swp2 9900
+
+ for i in {0..2}; do
+ vlan_create $swp1 1$i
+ ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2
+
+ vlan_create $swp2 1$i
+
+ ip link add dev br1$i type bridge
+ ip link set dev $swp1.1$i master br1$i
+ ip link set dev $swp2.1$i master br1$i
+
+ ip link set dev br1$i up
+ ip link set dev $swp1.1$i up
+ ip link set dev $swp2.1$i up
+ done
+}
+
+ets_switch_destroy()
+{
+ local i
+
+ ets_delete_qdisc
+
+ for i in {0..2}; do
+ ip link del dev br1$i
+ vlan_destroy $swp2 1$i
+ vlan_destroy $swp1 1$i
+ done
+
+ mtu_restore $swp2
+ ip link set dev $swp2 down
+
+ mtu_restore $swp1
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ put=$swp2
+ hut=$h2
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.10 $(dip 0) " vlan 10"
+ ping_test $h1.11 $(dip 1) " vlan 11"
+ ping_test $h1.12 $(dip 2) " vlan 12"
+}
+
+ets_run()
+{
+ trap cleanup EXIT
+
+ setup_prepare
+ setup_wait
+
+ tests_run
+
+ exit $EXIT_STATUS
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
new file mode 100644
index 000000000000..3c3b204d47e8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -0,0 +1,227 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Global interface:
+# $put -- port under test (e.g. $swp2)
+# get_stats($band) -- A function to collect stats for band
+# ets_start_traffic($band) -- Start traffic for this band
+# ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc
+
+# WS describes the Qdisc configuration. It has one value per band (so the
+# number of array elements indicates the number of bands). If the value is
+# 0, it is a strict band, otherwise the it's a DRR band and the value is
+# that band's quantum.
+declare -a WS
+
+qdisc_describe()
+{
+ local nbands=${#WS[@]}
+ local nstrict=0
+ local i
+
+ for ((i = 0; i < nbands; i++)); do
+ if ((!${WS[$i]})); then
+ : $((nstrict++))
+ fi
+ done
+
+ echo -n "ets bands $nbands"
+ if ((nstrict)); then
+ echo -n " strict $nstrict"
+ fi
+ if ((nstrict < nbands)); then
+ echo -n " quanta"
+ for ((i = nstrict; i < nbands; i++)); do
+ echo -n " ${WS[$i]}"
+ done
+ fi
+}
+
+__strict_eval()
+{
+ local desc=$1; shift
+ local d=$1; shift
+ local total=$1; shift
+ local above=$1; shift
+
+ RET=0
+
+ if ((! total)); then
+ check_err 1 "No traffic observed"
+ log_test "$desc"
+ return
+ fi
+
+ local ratio=$(echo "scale=2; 100 * $d / $total" | bc -l)
+ if ((above)); then
+ test $(echo "$ratio > 95.0" | bc -l) -eq 1
+ check_err $? "Not enough traffic"
+ log_test "$desc"
+ log_info "Expected ratio >95% Measured ratio $ratio"
+ else
+ test $(echo "$ratio < 5" | bc -l) -eq 1
+ check_err $? "Too much traffic"
+ log_test "$desc"
+ log_info "Expected ratio <5% Measured ratio $ratio"
+ fi
+}
+
+strict_eval()
+{
+ __strict_eval "$@" 1
+}
+
+notraf_eval()
+{
+ __strict_eval "$@" 0
+}
+
+__ets_dwrr_test()
+{
+ local -a streams=("$@")
+
+ local low_stream=${streams[0]}
+ local seen_strict=0
+ local -a t0 t1 d
+ local stream
+ local total
+ local i
+
+ echo "Testing $(qdisc_describe), streams ${streams[@]}"
+
+ for stream in ${streams[@]}; do
+ ets_start_traffic $stream
+ done
+
+ sleep 10
+
+ t0=($(for stream in ${streams[@]}; do
+ get_stats $stream
+ done))
+
+ sleep 10
+
+ t1=($(for stream in ${streams[@]}; do
+ get_stats $stream
+ done))
+ d=($(for ((i = 0; i < ${#streams[@]}; i++)); do
+ echo $((${t1[$i]} - ${t0[$i]}))
+ done))
+ total=$(echo ${d[@]} | sed 's/ /+/g' | bc)
+
+ for ((i = 0; i < ${#streams[@]}; i++)); do
+ local stream=${streams[$i]}
+ if ((seen_strict)); then
+ notraf_eval "band $stream" ${d[$i]} $total
+ elif ((${WS[$stream]} == 0)); then
+ strict_eval "band $stream" ${d[$i]} $total
+ seen_strict=1
+ elif ((stream == low_stream)); then
+ # Low stream is used as DWRR evaluation reference.
+ continue
+ else
+ multipath_eval "bands $low_stream:$stream" \
+ ${WS[$low_stream]} ${WS[$stream]} \
+ ${d[0]} ${d[$i]}
+ fi
+ done
+
+ for stream in ${streams[@]}; do
+ stop_traffic
+ done
+}
+
+ets_dwrr_test_012()
+{
+ __ets_dwrr_test 0 1 2
+}
+
+ets_dwrr_test_01()
+{
+ __ets_dwrr_test 0 1
+}
+
+ets_dwrr_test_12()
+{
+ __ets_dwrr_test 1 2
+}
+
+ets_qdisc_setup()
+{
+ local dev=$1; shift
+ local nstrict=$1; shift
+ local -a quanta=("$@")
+
+ local ndwrr=${#quanta[@]}
+ local nbands=$((nstrict + ndwrr))
+ local nstreams=$(if ((nbands > 3)); then echo 3; else echo $nbands; fi)
+ local priomap=$(seq 0 $((nstreams - 1)))
+ local i
+
+ WS=($(
+ for ((i = 0; i < nstrict; i++)); do
+ echo 0
+ done
+ for ((i = 0; i < ndwrr; i++)); do
+ echo ${quanta[$i]}
+ done
+ ))
+
+ ets_change_qdisc $dev $nstrict "$priomap" ${quanta[@]}
+}
+
+ets_set_dwrr_uniform()
+{
+ ets_qdisc_setup $put 0 3300 3300 3300
+}
+
+ets_set_dwrr_varying()
+{
+ ets_qdisc_setup $put 0 5000 3500 1500
+}
+
+ets_set_strict()
+{
+ ets_qdisc_setup $put 3
+}
+
+ets_set_mixed()
+{
+ ets_qdisc_setup $put 1 5000 2500 1500
+}
+
+ets_change_quantum()
+{
+ tc class change dev $put classid 10:2 ets quantum 8000
+ WS[1]=8000
+}
+
+ets_set_dwrr_two_bands()
+{
+ ets_qdisc_setup $put 0 5000 2500
+}
+
+ets_test_strict()
+{
+ ets_set_strict
+ ets_dwrr_test_01
+ ets_dwrr_test_12
+}
+
+ets_test_mixed()
+{
+ ets_set_mixed
+ ets_dwrr_test_01
+ ets_dwrr_test_12
+}
+
+ets_test_dwrr()
+{
+ ets_set_dwrr_uniform
+ ets_dwrr_test_012
+ ets_set_dwrr_varying
+ ets_dwrr_test_012
+ ets_change_quantum
+ ets_dwrr_test_012
+ ets_set_dwrr_two_bands
+ ets_dwrr_test_01
+}
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index c08f4db8330d..93208caacbe6 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -74,7 +74,14 @@ struct sock_args {
int use_cmsg;
const char *dev;
int ifindex;
+
const char *password;
+ /* prefix for MD5 password */
+ union {
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } md5_prefix;
+ unsigned int prefix_len;
/* expected addresses and device index for connection */
int expected_ifindex;
@@ -200,20 +207,33 @@ static void log_address(const char *desc, struct sockaddr *sa)
fflush(stdout);
}
-static int tcp_md5sig(int sd, void *addr, socklen_t alen, const char *password)
+static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args)
{
- struct tcp_md5sig md5sig;
- int keylen = password ? strlen(password) : 0;
+ int keylen = strlen(args->password);
+ struct tcp_md5sig md5sig = {};
+ int opt = TCP_MD5SIG;
int rc;
- memset(&md5sig, 0, sizeof(md5sig));
- memcpy(&md5sig.tcpm_addr, addr, alen);
md5sig.tcpm_keylen = keylen;
+ memcpy(md5sig.tcpm_key, args->password, keylen);
+
+ if (args->prefix_len) {
+ opt = TCP_MD5SIG_EXT;
+ md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_PREFIX;
+
+ md5sig.tcpm_prefixlen = args->prefix_len;
+ addr = &args->md5_prefix;
+ }
+ memcpy(&md5sig.tcpm_addr, addr, alen);
+
+ if (args->ifindex) {
+ opt = TCP_MD5SIG_EXT;
+ md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
- if (keylen)
- memcpy(md5sig.tcpm_key, password, keylen);
+ md5sig.tcpm_ifindex = args->ifindex;
+ }
- rc = setsockopt(sd, IPPROTO_TCP, TCP_MD5SIG, &md5sig, sizeof(md5sig));
+ rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig));
if (rc < 0) {
/* ENOENT is harmless. Returned when a password is cleared */
if (errno == ENOENT)
@@ -254,7 +274,7 @@ static int tcp_md5_remote(int sd, struct sock_args *args)
exit(1);
}
- if (tcp_md5sig(sd, addr, alen, args->password))
+ if (tcp_md5sig(sd, addr, alen, args))
return -1;
return 0;
@@ -1194,7 +1214,7 @@ static int do_server(struct sock_args *args)
if (args->password && tcp_md5_remote(lsd, args)) {
close(lsd);
- return -1;
+ return 1;
}
while (1) {
@@ -1313,7 +1333,7 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
if (args->type != SOCK_STREAM)
goto out;
- if (args->password && tcp_md5sig(sd, addr, alen, args->password))
+ if (args->password && tcp_md5sig(sd, addr, alen, args))
goto err;
if (args->bind_test_only)
@@ -1405,16 +1425,18 @@ enum addr_type {
ADDR_TYPE_MCAST,
ADDR_TYPE_EXPECTED_LOCAL,
ADDR_TYPE_EXPECTED_REMOTE,
+ ADDR_TYPE_MD5_PREFIX,
};
static int convert_addr(struct sock_args *args, const char *_str,
enum addr_type atype)
{
+ int pfx_len_max = args->version == AF_INET6 ? 128 : 32;
int family = args->version;
+ char *str, *dev, *sep;
struct in6_addr *in6;
struct in_addr *in;
const char *desc;
- char *str, *dev;
void *addr;
int rc = 0;
@@ -1443,6 +1465,30 @@ static int convert_addr(struct sock_args *args, const char *_str,
desc = "expected remote";
addr = &args->expected_raddr;
break;
+ case ADDR_TYPE_MD5_PREFIX:
+ desc = "md5 prefix";
+ if (family == AF_INET) {
+ args->md5_prefix.v4.sin_family = AF_INET;
+ addr = &args->md5_prefix.v4.sin_addr;
+ } else if (family == AF_INET6) {
+ args->md5_prefix.v6.sin6_family = AF_INET6;
+ addr = &args->md5_prefix.v6.sin6_addr;
+ } else
+ return 1;
+
+ sep = strchr(str, '/');
+ if (sep) {
+ *sep = '\0';
+ sep++;
+ if (str_to_uint(sep, 1, pfx_len_max,
+ &args->prefix_len) != 0) {
+ fprintf(stderr, "Invalid port\n");
+ return 1;
+ }
+ } else {
+ args->prefix_len = pfx_len_max;
+ }
+ break;
default:
log_error("unknown address type");
exit(1);
@@ -1522,7 +1568,7 @@ static char *random_msg(int len)
return m;
}
-#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:d:SCi6L:0:1:2:Fbq"
+#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:SCi6L:0:1:2:Fbq"
static void print_usage(char *prog)
{
@@ -1551,6 +1597,7 @@ static void print_usage(char *prog)
" -n num number of times to send message\n"
"\n"
" -M password use MD5 sum protection\n"
+ " -m prefix/len prefix and length to use for MD5 key\n"
" -g grp multicast group (e.g., 239.1.1.1)\n"
" -i interactive mode (default is echo and terminate)\n"
"\n"
@@ -1642,6 +1689,10 @@ int main(int argc, char *argv[])
case 'M':
args.password = optarg;
break;
+ case 'm':
+ if (convert_addr(&args, optarg, ADDR_TYPE_MD5_PREFIX) < 0)
+ return 1;
+ break;
case 'S':
args.use_setsockopt = 1;
break;
@@ -1706,11 +1757,16 @@ int main(int argc, char *argv[])
}
if (args.password &&
- (!args.has_remote_ip || args.type != SOCK_STREAM)) {
+ ((!args.has_remote_ip && !args.prefix_len) || args.type != SOCK_STREAM)) {
log_error("MD5 passwords apply to TCP only and require a remote ip for the password\n");
return 1;
}
+ if (args.prefix_len && !args.password) {
+ log_error("Prefix range for MD5 protection specified without a password\n");
+ return 1;
+ }
+
if ((args.use_setsockopt || args.use_cmsg) && !args.ifindex) {
fprintf(stderr, "Device binding not specified\n");
return 1;
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index d697815d2785..71a62e7e35b1 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -11,9 +11,9 @@
# R1 and R2 (also implemented with namespaces), with different MTUs:
#
# segment a_r1 segment b_r1 a_r1: 2000
-# .--------------R1--------------. a_r2: 1500
-# A B a_r3: 2000
-# '--------------R2--------------' a_r4: 1400
+# .--------------R1--------------. b_r1: 1400
+# A B a_r2: 2000
+# '--------------R2--------------' b_r2: 1500
# segment a_r2 segment b_r2
#
# Check that PMTU exceptions with the correct PMTU are created. Then
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 34df4c8882af..383bac05ac32 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -12,7 +12,11 @@
#include <arpa/inet.h>
#include <error.h>
#include <errno.h>
+#include <inttypes.h>
#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
@@ -28,7 +32,7 @@ static int cfg_clockid = CLOCK_TAI;
static bool cfg_do_ipv4;
static bool cfg_do_ipv6;
static uint16_t cfg_port = 8000;
-static int cfg_variance_us = 2000;
+static int cfg_variance_us = 4000;
static uint64_t glob_tstart;
@@ -43,6 +47,9 @@ static struct timed_send cfg_in[MAX_NUM_PKT];
static struct timed_send cfg_out[MAX_NUM_PKT];
static int cfg_num_pkt;
+static int cfg_errq_level;
+static int cfg_errq_type;
+
static uint64_t gettime_ns(void)
{
struct timespec ts;
@@ -90,13 +97,15 @@ static void do_send_one(int fdt, struct timed_send *ts)
}
-static void do_recv_one(int fdr, struct timed_send *ts)
+static bool do_recv_one(int fdr, struct timed_send *ts)
{
int64_t tstop, texpect;
char rbuf[2];
int ret;
ret = recv(fdr, rbuf, sizeof(rbuf), 0);
+ if (ret == -1 && errno == EAGAIN)
+ return true;
if (ret == -1)
error(1, errno, "read");
if (ret != 1)
@@ -113,6 +122,8 @@ static void do_recv_one(int fdr, struct timed_send *ts)
if (labs(tstop - texpect) > cfg_variance_us)
error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
+
+ return false;
}
static void do_recv_verify_empty(int fdr)
@@ -125,12 +136,70 @@ static void do_recv_verify_empty(int fdr)
error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
}
+static void do_recv_errqueue_timeout(int fdt)
+{
+ char control[CMSG_SPACE(sizeof(struct sock_extended_err)) +
+ CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
+ char data[sizeof(struct ipv6hdr) +
+ sizeof(struct tcphdr) + 1];
+ struct sock_extended_err *err;
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cm;
+ int64_t tstamp = 0;
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ while (1) {
+ ret = recvmsg(fdt, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "errqueue");
+ if (msg.msg_flags != MSG_ERRQUEUE)
+ error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+
+ cm = CMSG_FIRSTHDR(&msg);
+ if (cm->cmsg_level != cfg_errq_level ||
+ cm->cmsg_type != cfg_errq_type)
+ error(1, 0, "errqueue: type 0x%x.0x%x\n",
+ cm->cmsg_level, cm->cmsg_type);
+
+ err = (struct sock_extended_err *)CMSG_DATA(cm);
+ if (err->ee_origin != SO_EE_ORIGIN_TXTIME)
+ error(1, 0, "errqueue: origin 0x%x\n", err->ee_origin);
+ if (err->ee_code != ECANCELED)
+ error(1, 0, "errqueue: code 0x%x\n", err->ee_code);
+
+ tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info;
+ tstamp -= (int64_t) glob_tstart;
+ tstamp /= 1000 * 1000;
+ fprintf(stderr, "send: pkt %c at %" PRId64 "ms dropped\n",
+ data[ret - 1], tstamp);
+
+ msg.msg_flags = 0;
+ msg.msg_controllen = sizeof(control);
+ }
+
+ error(1, 0, "recv: timeout");
+}
+
static void setsockopt_txtime(int fd)
{
struct sock_txtime so_txtime_val = { .clockid = cfg_clockid };
struct sock_txtime so_txtime_val_read = { 0 };
socklen_t vallen = sizeof(so_txtime_val);
+ so_txtime_val.flags = SOF_TXTIME_REPORT_ERRORS;
+
if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
&so_txtime_val, sizeof(so_txtime_val)))
error(1, errno, "setsockopt txtime");
@@ -194,7 +263,8 @@ static void do_test(struct sockaddr *addr, socklen_t alen)
for (i = 0; i < cfg_num_pkt; i++)
do_send_one(fdt, &cfg_in[i]);
for (i = 0; i < cfg_num_pkt; i++)
- do_recv_one(fdr, &cfg_out[i]);
+ if (do_recv_one(fdr, &cfg_out[i]))
+ do_recv_errqueue_timeout(fdt);
do_recv_verify_empty(fdr);
@@ -280,6 +350,10 @@ int main(int argc, char **argv)
addr6.sin6_family = AF_INET6;
addr6.sin6_port = htons(cfg_port);
addr6.sin6_addr = in6addr_loopback;
+
+ cfg_errq_level = SOL_IPV6;
+ cfg_errq_type = IPV6_RECVERR;
+
do_test((void *)&addr6, sizeof(addr6));
}
@@ -289,6 +363,10 @@ int main(int argc, char **argv)
addr4.sin_family = AF_INET;
addr4.sin_port = htons(cfg_port);
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ cfg_errq_level = SOL_IP;
+ cfg_errq_type = IP_RECVERR;
+
do_test((void *)&addr4, sizeof(addr4));
}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
index 5aa519328a5b..3f7800eaecb1 100755
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -5,7 +5,12 @@
# Run in network namespace
if [[ $# -eq 0 ]]; then
- ./in_netns.sh $0 __subprocess
+ if ! ./in_netns.sh $0 __subprocess; then
+ # test is time sensitive, can be flaky
+ echo "test failed: retry once"
+ ./in_netns.sh $0 __subprocess
+ fi
+
exit $?
fi
@@ -18,7 +23,7 @@ tc qdisc add dev lo root fq
./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
-if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 200000; then
+if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then
! ./so_txtime -4 -6 -c tai a,-1 a,-1
! ./so_txtime -4 -6 -c tai a,0 a,0
./so_txtime -4 -6 -c tai a,10 a,10
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 13e5ef615026..0ea44d975b6c 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -722,34 +722,6 @@ TEST_F(tls, recv_lowat)
EXPECT_EQ(memcmp(send_mem, recv_mem + 10, 5), 0);
}
-TEST_F(tls, recv_rcvbuf)
-{
- char send_mem[4096];
- char recv_mem[4096];
- int rcv_buf = 1024;
-
- memset(send_mem, 0x1c, sizeof(send_mem));
-
- EXPECT_EQ(setsockopt(self->cfd, SOL_SOCKET, SO_RCVBUF,
- &rcv_buf, sizeof(rcv_buf)), 0);
-
- EXPECT_EQ(send(self->fd, send_mem, 512, 0), 512);
- memset(recv_mem, 0, sizeof(recv_mem));
- EXPECT_EQ(recv(self->cfd, recv_mem, sizeof(recv_mem), 0), 512);
- EXPECT_EQ(memcmp(send_mem, recv_mem, 512), 0);
-
- if (self->notls)
- return;
-
- EXPECT_EQ(send(self->fd, send_mem, 4096, 0), 4096);
- memset(recv_mem, 0, sizeof(recv_mem));
- EXPECT_EQ(recv(self->cfd, recv_mem, sizeof(recv_mem), 0), -1);
- EXPECT_EQ(errno, EMSGSIZE);
-
- EXPECT_EQ(recv(self->cfd, recv_mem, sizeof(recv_mem), 0), -1);
- EXPECT_EQ(errno, EMSGSIZE);
-}
-
TEST_F(tls, bidir)
{
char const *test_str = "test_read";
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 16571ac1dab4..d3e0809ab368 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -226,17 +226,19 @@ check_transfer()
return 0
}
-test_tcp_forwarding()
+test_tcp_forwarding_ip()
{
local nsa=$1
local nsb=$2
+ local dstip=$3
+ local dstport=$4
local lret=0
ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
lpid=$!
sleep 1
- ip netns exec $nsa nc -w 4 10.0.2.99 12345 < "$ns1in" > "$ns1out" &
+ ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
cpid=$!
sleep 3
@@ -258,6 +260,28 @@ test_tcp_forwarding()
return $lret
}
+test_tcp_forwarding()
+{
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+
+ return $?
+}
+
+test_tcp_forwarding_nat()
+{
+ local lret
+
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ lret=$?
+
+ if [ $lret -eq 0 ] ; then
+ test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+ lret=$?
+ fi
+
+ return $lret
+}
+
make_file "$ns1in" "ns1"
make_file "$ns2in" "ns2"
@@ -283,14 +307,19 @@ ip -net ns2 route add 192.168.10.1 via 10.0.2.1
# Same, but with NAT enabled.
ip netns exec nsr1 nft -f - <<EOF
table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+ }
+
chain postrouting {
type nat hook postrouting priority 0; policy accept;
- meta oifname "veth1" masquerade
+ meta oifname "veth1" counter masquerade
}
}
EOF
-test_tcp_forwarding ns1 ns2
+test_tcp_forwarding_nat ns1 ns2
if [ $? -eq 0 ] ;then
echo "PASS: flow offloaded for ns1/ns2 with NAT"
@@ -313,7 +342,7 @@ fi
ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-test_tcp_forwarding ns1 ns2
+test_tcp_forwarding_nat ns1 ns2
if [ $? -eq 0 ] ;then
echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
else
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index 1be55e705780..d7e07f4c3d7f 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -8,9 +8,14 @@ ksft_skip=4
ret=0
test_inet_nat=true
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
cleanup()
{
- for i in 0 1 2; do ip netns del ns$i;done
+ for i in 0 1 2; do ip netns del ns$i-"$sfx";done
}
nft --version > /dev/null 2>&1
@@ -25,40 +30,49 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
-ip netns add ns0
+ip netns add "$ns0"
if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
+ echo "SKIP: Could not create net namespace $ns0"
exit $ksft_skip
fi
trap cleanup EXIT
-ip netns add ns1
-ip netns add ns2
+ip netns add "$ns1"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns1"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns2"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns2"
+ exit $ksft_skip
+fi
-ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1
+ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
-ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2"
-ip -net ns0 link set lo up
-ip -net ns0 link set veth0 up
-ip -net ns0 addr add 10.0.1.1/24 dev veth0
-ip -net ns0 addr add dead:1::1/64 dev veth0
+ip -net "$ns0" link set lo up
+ip -net "$ns0" link set veth0 up
+ip -net "$ns0" addr add 10.0.1.1/24 dev veth0
+ip -net "$ns0" addr add dead:1::1/64 dev veth0
-ip -net ns0 link set veth1 up
-ip -net ns0 addr add 10.0.2.1/24 dev veth1
-ip -net ns0 addr add dead:2::1/64 dev veth1
+ip -net "$ns0" link set veth1 up
+ip -net "$ns0" addr add 10.0.2.1/24 dev veth1
+ip -net "$ns0" addr add dead:2::1/64 dev veth1
for i in 1 2; do
- ip -net ns$i link set lo up
- ip -net ns$i link set eth0 up
- ip -net ns$i addr add 10.0.$i.99/24 dev eth0
- ip -net ns$i route add default via 10.0.$i.1
- ip -net ns$i addr add dead:$i::99/64 dev eth0
- ip -net ns$i route add default via dead:$i::1
+ ip -net ns$i-$sfx link set lo up
+ ip -net ns$i-$sfx link set eth0 up
+ ip -net ns$i-$sfx addr add 10.0.$i.99/24 dev eth0
+ ip -net ns$i-$sfx route add default via 10.0.$i.1
+ ip -net ns$i-$sfx addr add dead:$i::99/64 dev eth0
+ ip -net ns$i-$sfx route add default via dead:$i::1
done
bad_counter()
@@ -66,8 +80,9 @@ bad_counter()
local ns=$1
local counter=$2
local expect=$3
+ local tag=$4
- echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2
+ echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2
ip netns exec $ns nft list counter inet filter $counter 1>&2
}
@@ -78,24 +93,24 @@ check_counters()
cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
if [ $? -ne 0 ]; then
- bad_counter $ns ns0in "packets 1 bytes 84"
+ bad_counter $ns ns0in "packets 1 bytes 84" "check_counters 1"
lret=1
fi
cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
if [ $? -ne 0 ]; then
- bad_counter $ns ns0out "packets 1 bytes 84"
+ bad_counter $ns ns0out "packets 1 bytes 84" "check_counters 2"
lret=1
fi
expect="packets 1 bytes 104"
cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter $ns ns0in6 "$expect"
+ bad_counter $ns ns0in6 "$expect" "check_counters 3"
lret=1
fi
cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter $ns ns0out6 "$expect"
+ bad_counter $ns ns0out6 "$expect" "check_counters 4"
lret=1
fi
@@ -107,41 +122,41 @@ check_ns0_counters()
local ns=$1
local lret=0
- cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0in "packets 0 bytes 0"
+ bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1"
lret=1
fi
- cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0in6 "packets 0 bytes 0"
+ bad_counter "$ns0" ns0in6 "packets 0 bytes 0"
lret=1
fi
- cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0out "packets 0 bytes 0"
+ bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2"
lret=1
fi
- cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0out6 "packets 0 bytes 0"
+ bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 "
lret=1
fi
for dir in "in" "out" ; do
expect="packets 1 bytes 84"
- cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 $ns$dir "$expect"
+ bad_counter "$ns0" $ns$dir "$expect" "check_ns0_counters 4"
lret=1
fi
expect="packets 1 bytes 104"
- cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 $ns$dir6 "$expect"
+ bad_counter "$ns0" $ns$dir6 "$expect" "check_ns0_counters 5"
lret=1
fi
done
@@ -152,7 +167,7 @@ check_ns0_counters()
reset_counters()
{
for i in 0 1 2;do
- ip netns exec ns$i nft reset counters inet > /dev/null
+ ip netns exec ns$i-$sfx nft reset counters inet > /dev/null
done
}
@@ -166,7 +181,7 @@ test_local_dnat6()
IPF="ip6"
fi
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain output {
type nat hook output priority 0; policy accept;
@@ -180,7 +195,7 @@ EOF
fi
# ping netns1, expect rewrite to netns2
- ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null
+ ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null
if [ $? -ne 0 ]; then
lret=1
echo "ERROR: ping6 failed"
@@ -189,18 +204,18 @@ EOF
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns1$dir "$expect"
+ bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1"
lret=1
fi
done
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns2$dir "$expect"
+ bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2"
lret=1
fi
done
@@ -208,9 +223,9 @@ EOF
# expect 0 count in ns1
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3"
lret=1
fi
done
@@ -218,15 +233,15 @@ EOF
# expect 1 packet in ns2
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns0$dir "$expect"
+ bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4"
lret=1
fi
done
- test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was $family NATted to ns2"
- ip netns exec ns0 nft flush chain ip6 nat output
+ test $lret -eq 0 && echo "PASS: ipv6 ping to $ns1 was $family NATted to $ns2"
+ ip netns exec "$ns0" nft flush chain ip6 nat output
return $lret
}
@@ -241,7 +256,7 @@ test_local_dnat()
IPF="ip"
fi
-ip netns exec ns0 nft -f - <<EOF 2>/dev/null
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF 2>/dev/null
table $family nat {
chain output {
type nat hook output priority 0; policy accept;
@@ -260,7 +275,7 @@ EOF
fi
# ping netns1, expect rewrite to netns2
- ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+ ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null
if [ $? -ne 0 ]; then
lret=1
echo "ERROR: ping failed"
@@ -269,18 +284,18 @@ EOF
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns1$dir "$expect"
+ bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat 1"
lret=1
fi
done
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns2$dir "$expect"
+ bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 2"
lret=1
fi
done
@@ -288,9 +303,9 @@ EOF
# expect 0 count in ns1
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat 3"
lret=1
fi
done
@@ -298,19 +313,19 @@ EOF
# expect 1 packet in ns2
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns0$dir "$expect"
+ bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 4"
lret=1
fi
done
- test $lret -eq 0 && echo "PASS: ping to ns1 was $family NATted to ns2"
+ test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2"
- ip netns exec ns0 nft flush chain $family nat output
+ ip netns exec "$ns0" nft flush chain $family nat output
reset_counters
- ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+ ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null
if [ $? -ne 0 ]; then
lret=1
echo "ERROR: ping failed"
@@ -319,17 +334,17 @@ EOF
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns1$dir "$expect"
+ bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5"
lret=1
fi
done
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns2$dir "$expect"
+ bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6"
lret=1
fi
done
@@ -337,9 +352,9 @@ EOF
# expect 1 count in ns1
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns0 ns0$dir "$expect"
+ bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7"
lret=1
fi
done
@@ -347,14 +362,14 @@ EOF
# expect 0 packet in ns2
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns2$dir "$expect"
+ bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8"
lret=1
fi
done
- test $lret -eq 0 && echo "PASS: ping to ns1 OK after $family nat output chain flush"
+ test $lret -eq 0 && echo "PASS: ping to $ns1 OK after $family nat output chain flush"
return $lret
}
@@ -366,26 +381,26 @@ test_masquerade6()
local natflags=$2
local lret=0
- ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 via ipv6"
+ echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
return 1
lret=1
fi
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns2$dir "$expect"
+ bad_counter "$ns1" ns2$dir "$expect" "test_masquerade6 1"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 2"
lret=1
fi
done
@@ -393,7 +408,7 @@ test_masquerade6()
reset_counters
# add masquerading rule
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
@@ -406,24 +421,24 @@ EOF
return $ksft_skip
fi
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
lret=1
fi
# ns1 should have seen packets from ns0, due to masquerade
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4"
lret=1
fi
done
@@ -431,32 +446,32 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5"
lret=1
fi
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns0" ns1$dir "$expect" "test_masquerade6 6"
lret=1
fi
done
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerade $natflags (attempt 2)"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)"
lret=1
fi
- ip netns exec ns0 nft flush chain $family nat postrouting
+ ip netns exec "$ns0" nft flush chain $family nat postrouting
if [ $? -ne 0 ]; then
echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for ns2"
+ test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for $ns2"
return $lret
}
@@ -467,26 +482,26 @@ test_masquerade()
local natflags=$2
local lret=0
- ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
- ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 $natflags"
+ echo "ERROR: cannot ping $ns1 from "$ns2" $natflags"
lret=1
fi
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns2$dir "$expect"
+ bad_counter "$ns1" ns2$dir "$expect" "test_masquerade 1"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 2"
lret=1
fi
done
@@ -494,7 +509,7 @@ test_masquerade()
reset_counters
# add masquerading rule
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
@@ -507,24 +522,24 @@ EOF
return $ksft_skip
fi
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active $family masquerade $natflags"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
lret=1
fi
# ns1 should have seen packets from ns0, due to masquerade
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 3"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 4"
lret=1
fi
done
@@ -532,32 +547,32 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 5"
lret=1
fi
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns0" ns1$dir "$expect" "test_masquerade 6"
lret=1
fi
done
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active ip masquerade $natflags (attempt 2)"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)"
lret=1
fi
- ip netns exec ns0 nft flush chain $family nat postrouting
+ ip netns exec "$ns0" nft flush chain $family nat postrouting
if [ $? -ne 0 ]; then
echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for ns2"
+ test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for $ns2"
return $lret
}
@@ -567,25 +582,25 @@ test_redirect6()
local family=$1
local lret=0
- ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannnot ping ns1 from ns2 via ipv6"
+ echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
lret=1
fi
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns2$dir "$expect"
+ bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2"
lret=1
fi
done
@@ -593,7 +608,7 @@ test_redirect6()
reset_counters
# add redirect rule
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@@ -606,18 +621,18 @@ EOF
return $ksft_skip
fi
- ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 via ipv6 with active $family redirect"
+ echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect"
lret=1
fi
# ns1 should have seen no packets from ns2, due to redirection
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3"
lret=1
fi
done
@@ -625,20 +640,20 @@ EOF
# ns0 should have seen packets from ns2, due to masquerade
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4"
lret=1
fi
done
- ip netns exec ns0 nft delete table $family nat
+ ip netns exec "$ns0" nft delete table $family nat
if [ $? -ne 0 ]; then
echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: $family IPv6 redirection for ns2"
+ test $lret -eq 0 && echo "PASS: $family IPv6 redirection for $ns2"
return $lret
}
@@ -648,26 +663,26 @@ test_redirect()
local family=$1
local lret=0
- ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
- ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2"
+ echo "ERROR: cannot ping $ns1 from $ns2"
lret=1
fi
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns2$dir "$expect"
+ bad_counter "$ns1" $ns2$dir "$expect" "test_redirect 1"
lret=1
fi
- cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns2 ns1$dir "$expect"
+ bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2"
lret=1
fi
done
@@ -675,7 +690,7 @@ test_redirect()
reset_counters
# add redirect rule
-ip netns exec ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@@ -688,9 +703,9 @@ EOF
return $ksft_skip
fi
- ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping ns1 from ns2 with active $family ip redirect"
+ echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect"
lret=1
fi
@@ -698,9 +713,9 @@ EOF
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3"
lret=1
fi
done
@@ -708,28 +723,28 @@ EOF
# ns0 should have seen packets from ns2, due to masquerade
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
if [ $? -ne 0 ]; then
- bad_counter ns1 ns0$dir "$expect"
+ bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4"
lret=1
fi
done
- ip netns exec ns0 nft delete table $family nat
+ ip netns exec "$ns0" nft delete table $family nat
if [ $? -ne 0 ]; then
echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
- test $lret -eq 0 && echo "PASS: $family IP redirection for ns2"
+ test $lret -eq 0 && echo "PASS: $family IP redirection for $ns2"
return $lret
}
-# ip netns exec ns0 ping -c 1 -q 10.0.$i.99
+# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
-ip netns exec ns$i nft -f - <<EOF
+ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF
table inet filter {
counter ns0in {}
counter ns1in {}
@@ -796,18 +811,18 @@ done
sleep 3
# test basic connectivity
for i in 1 2; do
- ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null
+ ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 > /dev/null
if [ $? -ne 0 ];then
echo "ERROR: Could not reach other namespace(s)" 1>&2
ret=1
fi
- ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null
+ ip netns exec "$ns0" ping -c 1 -q dead:$i::99 > /dev/null
if [ $? -ne 0 ];then
echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
ret=1
fi
- check_counters ns$i
+ check_counters ns$i-$sfx
if [ $? -ne 0 ]; then
ret=1
fi
@@ -820,7 +835,7 @@ for i in 1 2; do
done
if [ $ret -eq 0 ];then
- echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2"
+ echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2"
fi
reset_counters
@@ -846,4 +861,9 @@ reset_counters
$test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
+if [ $ret -ne 0 ];then
+ echo -n "FAIL: "
+ nft --version
+fi
+
exit $ret
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index eec2663261f2..e8a657a5f48a 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -15,7 +15,7 @@
#include <errno.h>
#include <stddef.h>
-static inline pid_t gettid(void)
+static inline pid_t rseq_gettid(void)
{
return syscall(__NR_gettid);
}
@@ -373,11 +373,12 @@ void *test_percpu_spinlock_thread(void *arg)
rseq_percpu_unlock(&data->lock, cpu);
#ifndef BENCHMARK
if (i != 0 && !(i % (reps / 10)))
- printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
+ printf_verbose("tid %d: count %lld\n",
+ (int) rseq_gettid(), i);
#endif
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && thread_data->reg &&
rseq_unregister_current_thread())
abort();
@@ -454,11 +455,12 @@ void *test_percpu_inc_thread(void *arg)
} while (rseq_unlikely(ret));
#ifndef BENCHMARK
if (i != 0 && !(i % (reps / 10)))
- printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
+ printf_verbose("tid %d: count %lld\n",
+ (int) rseq_gettid(), i);
#endif
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && thread_data->reg &&
rseq_unregister_current_thread())
abort();
@@ -605,7 +607,7 @@ void *test_percpu_list_thread(void *arg)
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
@@ -796,7 +798,7 @@ void *test_percpu_buffer_thread(void *arg)
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
@@ -1011,7 +1013,7 @@ void *test_percpu_memcpy_buffer_thread(void *arg)
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
- (int) gettid(), nr_abort, signals_delivered);
+ (int) rseq_gettid(), nr_abort, signals_delivered);
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index d40d60e7499e..3f63eb362b92 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -149,11 +149,13 @@ static inline void rseq_clear_rseq_cs(void)
/*
* rseq_prepare_unload() should be invoked by each thread executing a rseq
* critical section at least once between their last critical section and
- * library unload of the library defining the rseq critical section
- * (struct rseq_cs). This also applies to use of rseq in code generated by
- * JIT: rseq_prepare_unload() should be invoked at least once by each
- * thread executing a rseq critical section before reclaim of the memory
- * holding the struct rseq_cs.
+ * library unload of the library defining the rseq critical section (struct
+ * rseq_cs) or the code referred to by the struct rseq_cs start_ip and
+ * post_commit_offset fields. This also applies to use of rseq in code
+ * generated by JIT: rseq_prepare_unload() should be invoked at least once by
+ * each thread executing a rseq critical section before reclaim of the memory
+ * holding the struct rseq_cs or reclaim of the code pointed to by struct
+ * rseq_cs start_ip and post_commit_offset fields.
*/
static inline void rseq_prepare_unload(void)
{
diff --git a/tools/testing/selftests/rseq/settings b/tools/testing/selftests/rseq/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/rseq/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/safesetid/Makefile b/tools/testing/selftests/safesetid/Makefile
index 98da7a504737..fa02c4d5ec13 100644
--- a/tools/testing/selftests/safesetid/Makefile
+++ b/tools/testing/selftests/safesetid/Makefile
@@ -1,8 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for mount selftests.
-CFLAGS = -Wall -lcap -O2
+CFLAGS = -Wall -O2
+LDLIBS = -lcap
-TEST_PROGS := run_tests.sh
+TEST_PROGS := safesetid-test.sh
TEST_GEN_FILES := safesetid-test
include ../lib.mk
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 8f40c6ecdad1..0c4d50644c13 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -213,7 +213,8 @@ static void test_setuid(uid_t child_uid, bool expect_success)
}
if (cpid == 0) { /* Code executed by child */
- setuid(child_uid);
+ if (setuid(child_uid) < 0)
+ exit(EXIT_FAILURE);
if (getuid() == child_uid)
exit(EXIT_SUCCESS);
else
@@ -291,8 +292,10 @@ int main(int argc, char **argv)
// First test to make sure we can write userns mappings from a user
// that doesn't have any restrictions (as long as it has CAP_SETUID);
- setuid(NO_POLICY_USER);
- setgid(NO_POLICY_USER);
+ if (setuid(NO_POLICY_USER) < 0)
+ die("Error with set uid(%d)\n", NO_POLICY_USER);
+ if (setgid(NO_POLICY_USER) < 0)
+ die("Error with set gid(%d)\n", NO_POLICY_USER);
// Take away all but setid caps
drop_caps(true);
@@ -306,8 +309,10 @@ int main(int argc, char **argv)
die("test_userns failed when it should work\n");
}
- setuid(RESTRICTED_PARENT);
- setgid(RESTRICTED_PARENT);
+ if (setuid(RESTRICTED_PARENT) < 0)
+ die("Error with set uid(%d)\n", RESTRICTED_PARENT);
+ if (setgid(RESTRICTED_PARENT) < 0)
+ die("Error with set gid(%d)\n", RESTRICTED_PARENT);
test_setuid(ROOT_USER, false);
test_setuid(ALLOWED_CHILD1, true);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 6944b898bb53..ee1b727ede04 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -3158,7 +3158,18 @@ TEST(user_notification_basic)
EXPECT_GT(poll(&pollfd, 1, -1), 0);
EXPECT_EQ(pollfd.revents, POLLIN);
- EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ /* Test that we can't pass garbage to the kernel. */
+ memset(&req, 0, sizeof(req));
+ req.pid = -1;
+ errno = 0;
+ ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+
+ if (ret) {
+ req.pid = 0;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ }
pollfd.fd = listener;
pollfd.events = POLLIN | POLLOUT;
@@ -3278,6 +3289,7 @@ TEST(user_notification_signal)
close(sk_pair[1]);
+ memset(&req, 0, sizeof(req));
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
EXPECT_EQ(kill(pid, SIGUSR1), 0);
@@ -3296,6 +3308,7 @@ TEST(user_notification_signal)
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), -1);
EXPECT_EQ(errno, ENOENT);
+ memset(&req, 0, sizeof(req));
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
resp.id = req.id;
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
index 76ae03a64506..2e361cea63bc 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
@@ -152,7 +152,7 @@
]
},
{
- "id": "6f5e",
+ "id": "b99c",
"name": "Add basic filter with cmp ematch u8/transport layer and default action",
"category": [
"filter",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 0f89cd50a94b..8877f7b2b809 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -1,27 +1,5 @@
[
{
- "id": "e9a3",
- "name": "Add u32 with source match",
- "category": [
- "filter",
- "u32"
- ],
- "plugins": {
- "requires": "nsPlugin"
- },
- "setup": [
- "$TC qdisc add dev $DEV1 ingress"
- ],
- "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok",
- "expExitCode": "0",
- "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
- "matchPattern": "match 7f000001/ffffffff at 12",
- "matchCount": "1",
- "teardown": [
- "$TC qdisc del dev $DEV1 ingress"
- ]
- },
- {
"id": "2638",
"name": "Add matchall and try to get it",
"category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
new file mode 100644
index 000000000000..e09d3c0e307f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -0,0 +1,205 @@
+[
+ {
+ "id": "afa9",
+ "name": "Add u32 with source match",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 1 u32 chain (0[ ]+$|0 fh 800: ht divisor 1|0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1.*match 7f000001/ffffffff at 12)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6aa7",
+ "name": "Add/Replace u32 with source match and invalid indev",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.1/32 indev notexist20 flowid 1:1 action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 1 u32 chain 0",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "bc4d",
+ "name": "Replace valid u32 with source match and invalid indev",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.3/32 flowid 1:3 action ok"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 1 u32 match ip src 127.0.0.2/32 indev notexist20 flowid 1:2 action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 1 u32 chain (0[ ]+$|0 fh 800: ht divisor 1|0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:3.*match 7f000003/ffffffff at 12)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "648b",
+ "name": "Add u32 with custom hash table",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress prio 99 handle 42: u32 divisor 256",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "pref 99 u32 chain (0[ ]+$|0 fh 42: ht divisor 256|0 fh 800: ht divisor 1)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6658",
+ "name": "Add/Replace u32 with custom hash table and invalid handle",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress prio 99 handle 42:42 u32 divisor 256",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "pref 99 u32 chain 0",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9d0a",
+ "name": "Replace valid u32 with custom hash table and invalid handle",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress prio 99 handle 42: u32 divisor 256"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress prio 99 handle 42:42 u32 divisor 128",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "pref 99 u32 chain (0[ ]+$|0 fh 42: ht divisor 256|0 fh 800: ht divisor 1)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1644",
+ "name": "Add u32 filter that links to a custom hash table",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress prio 99 handle 43: u32 divisor 256"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 98 u32 link 43: hashkey mask 0x0000ff00 at 12 match ip src 192.168.0.0/16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 98 u32 chain (0[ ]+$|0 fh 801: ht divisor 1|0 fh 801::800 order 2048 key ht 801 bkt 0 link 43:.*match c0a80000/ffff0000 at 12.*hash mask 0000ff00 at 12)",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "74c2",
+ "name": "Add/Replace u32 filter with invalid hash table id",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 20 u32 ht 47:47 action drop",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 20 u32 chain 0",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1fe6",
+ "name": "Replace valid u32 filter with invalid hash table id",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress protocol ip prio 99 handle 43: u32 divisor 1",
+ "$TC filter add dev $DEV1 ingress protocol ip prio 98 u32 ht 43: match tcp src 22 FFFF classid 1:3"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 ingress protocol ip prio 98 u32 ht 43:1 match tcp src 23 FFFF classid 1:4",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol ip pref 99 u32 chain (0[ ]+$|0 fh (43|800): ht divisor 1|0 fh 43::800 order 2048 key ht 43 bkt 0 flowid 1:3.*match 00160000/ffff0000 at nexthdr\\+0)",
+ "matchCount": "4",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json
new file mode 100644
index 000000000000..180593010675
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json
@@ -0,0 +1,940 @@
+[
+ {
+ "id": "e90e",
+ "name": "Add ETS qdisc using bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .* bands 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "b059",
+ "name": "Add ETS qdisc using quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 900 800 700",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e8e7",
+ "name": "Add ETS qdisc using strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 3 strict 3",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "233c",
+ "name": "Add ETS qdisc using bands + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 quanta 1000 900 800 700",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "3d35",
+ "name": "Add ETS qdisc using bands + strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 3 strict 3 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7f3b",
+ "name": "Add ETS qdisc using strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3 quanta 1500 750",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 5 strict 3 quanta 1500 750 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "4593",
+ "name": "Add ETS qdisc using strict 0 + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 0 quanta 1500 750",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 2 quanta 1500 750 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "8938",
+ "name": "Add ETS qdisc using bands + strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 strict 3 quanta 1500 750",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 5 .*strict 3 quanta 1500 750 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "0782",
+ "name": "Add ETS qdisc with more bands than quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 2 .*quanta 1000 [1-9][0-9]* priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "501b",
+ "name": "Add ETS qdisc with more bands than strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta ([1-9][0-9]* ){2}priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "671a",
+ "name": "Add ETS qdisc with more bands than strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1 quanta 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta 1000 [1-9][0-9]* priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2a23",
+ "name": "Add ETS qdisc with 16 bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .* bands 16",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "8daf",
+ "name": "Add ETS qdisc with 17 bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7f95",
+ "name": "Add ETS qdisc with 17 strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 17",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "837a",
+ "name": "Add ETS qdisc with 16 quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .* bands 16",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "65b6",
+ "name": "Add ETS qdisc with 17 quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "b9e9",
+ "name": "Add ETS qdisc with 16 strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 8 quanta 1 2 3 4 5 6 7 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .* bands 16",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "9877",
+ "name": "Add ETS qdisc with 17 strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 9 quanta 1 2 3 4 5 6 7 8",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "c696",
+ "name": "Add ETS qdisc with priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "30c4",
+ "name": "Add ETS qdisc with quanta + priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e8ac",
+ "name": "Add ETS qdisc with strict + priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*bands 5 strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "5a7e",
+ "name": "Add ETS qdisc with quanta + strict + priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "cb8b",
+ "name": "Show ETS class :1",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY classid 1:1",
+ "matchPattern": "class ets 1:1 root quantum 4000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "1b4e",
+ "name": "Show ETS class :2",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY classid 1:2",
+ "matchPattern": "class ets 1:2 root quantum 3000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "f642",
+ "name": "Show ETS class :3",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY classid 1:3",
+ "matchPattern": "class ets 1:3 root quantum 2000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "0a5f",
+ "name": "Show ETS strict class",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY classid 1:1",
+ "matchPattern": "class ets 1:1 root $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "f7c8",
+ "name": "Add ETS qdisc with too many quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000 2000 3000",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2389",
+ "name": "Add ETS qdisc with too many strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 3",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "fe3c",
+ "name": "Add ETS qdisc with too many strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 2 quanta 1000 2000 3000",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "cb04",
+ "name": "Add ETS qdisc with excess priomap elements",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0 1 2",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "c32e",
+ "name": "Add ETS qdisc with priomap above bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 priomap 0 1 2",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "744c",
+ "name": "Add ETS qdisc with priomap above quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 500 priomap 0 1 2",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "7b33",
+ "name": "Add ETS qdisc with priomap above strict",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 priomap 0 1 2",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "dbe6",
+ "name": "Add ETS qdisc with priomap above strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 1 quanta 1000 500 priomap 0 1 2 3",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "bdb2",
+ "name": "Add ETS qdisc with priomap within bands with strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "39a3",
+ "name": "Add ETS qdisc with priomap above bands with strict + quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3 4",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "557c",
+ "name": "Unset priorities default to the last band",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 priomap 0 0 0 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets .*priomap 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "a347",
+ "name": "Unset priorities default to the last band -- no priomap",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets .*priomap 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "39c4",
+ "name": "Add ETS qdisc with too few bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 0",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "930b",
+ "name": "Add ETS qdisc with too many bands",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "406a",
+ "name": "Add ETS qdisc without parameters",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e51a",
+ "name": "Zero element in quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 0 800 700",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "e7f2",
+ "name": "Sole zero element in quanta",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 0",
+ "expExitCode": "1",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "d6e6",
+ "name": "No values after the quanta keyword",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta",
+ "expExitCode": "255",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "28c6",
+ "name": "Change ETS band quantum",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets quantum 1500",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*quanta 1500 2000 3000 priomap ",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "4714",
+ "name": "Change ETS band without quantum",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 priomap ",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "6979",
+ "name": "Change quantum of a strict ETS band",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets strict 5"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets quantum 1500",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets .*bands 5 .*strict 5",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "9a7d",
+ "name": "Change ETS strict band without quantum",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root ets strict 5"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc ets .*bands 5 .*strict 5",
+ "matchCount": "1",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 80521d46220c..8155c2ea7ccb 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -2,3 +2,9 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
python -m unittest -v tpm2_tests.SmokeTest
+python -m unittest -v tpm2_tests.AsyncTest
+
+CLEAR_CMD=$(which tpm2_clear)
+if [ -n $CLEAR_CMD ]; then
+ tpm2_clear -T device
+fi
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index 828c18584624..d0fcb66a88a6 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -6,8 +6,8 @@ import socket
import struct
import sys
import unittest
-from fcntl import ioctl
-
+import fcntl
+import select
TPM2_ST_NO_SESSIONS = 0x8001
TPM2_ST_SESSIONS = 0x8002
@@ -352,6 +352,7 @@ def hex_dump(d):
class Client:
FLAG_DEBUG = 0x01
FLAG_SPACE = 0x02
+ FLAG_NONBLOCK = 0x04
TPM_IOC_NEW_SPACE = 0xa200
def __init__(self, flags = 0):
@@ -362,13 +363,27 @@ class Client:
else:
self.tpm = open('/dev/tpmrm0', 'r+b', buffering=0)
+ if (self.flags & Client.FLAG_NONBLOCK):
+ flags = fcntl.fcntl(self.tpm, fcntl.F_GETFL)
+ flags |= os.O_NONBLOCK
+ fcntl.fcntl(self.tpm, fcntl.F_SETFL, flags)
+ self.tpm_poll = select.poll()
+
def close(self):
self.tpm.close()
def send_cmd(self, cmd):
self.tpm.write(cmd)
+
+ if (self.flags & Client.FLAG_NONBLOCK):
+ self.tpm_poll.register(self.tpm, select.POLLIN)
+ self.tpm_poll.poll(10000)
+
rsp = self.tpm.read()
+ if (self.flags & Client.FLAG_NONBLOCK):
+ self.tpm_poll.unregister(self.tpm)
+
if (self.flags & Client.FLAG_DEBUG) != 0:
sys.stderr.write('cmd' + os.linesep)
sys.stderr.write(hex_dump(cmd) + os.linesep)
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index d4973be53493..728be7c69b76 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -288,3 +288,16 @@ class SpaceTest(unittest.TestCase):
self.assertEqual(rc, tpm2.TPM2_RC_COMMAND_CODE |
tpm2.TSS2_RESMGR_TPM_RC_LAYER)
+
+class AsyncTest(unittest.TestCase):
+ def setUp(self):
+ logging.basicConfig(filename='AsyncTest.log', level=logging.DEBUG)
+
+ def test_async(self):
+ log = logging.getLogger(__name__)
+ log.debug(sys._getframe().f_code.co_name)
+
+ async_client = tpm2.Client(tpm2.Client.FLAG_NONBLOCK)
+ log.debug("Calling get_cap in a NON_BLOCKING mode")
+ async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION)
+ async_client.close()
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
new file mode 100755
index 000000000000..d5c85c7494f2
--- /dev/null
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -0,0 +1,534 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+#
+# This script tests the below topology:
+#
+# ┌─────────────────────┐ ┌──────────────────────────────────┐ ┌─────────────────────┐
+# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │
+# │ │ │ │ │ │
+# │┌────────┐ │ │ ┌────────┐ │ │ ┌────────┐│
+# ││ wg0 │───────────┼───┼────────────│ lo │────────────┼───┼───────────│ wg0 ││
+# │├────────┴──────────┐│ │ ┌───────┴────────┴────────┐ │ │┌──────────┴────────┤│
+# ││192.168.241.1/24 ││ │ │(ns1) (ns2) │ │ ││192.168.241.2/24 ││
+# ││fd00::1/24 ││ │ │127.0.0.1:1 127.0.0.1:2│ │ ││fd00::2/24 ││
+# │└───────────────────┘│ │ │[::]:1 [::]:2 │ │ │└───────────────────┘│
+# └─────────────────────┘ │ └─────────────────────────┘ │ └─────────────────────┘
+# └──────────────────────────────────┘
+#
+# After the topology is prepared we run a series of TCP/UDP iperf3 tests between the
+# wireguard peers in $ns1 and $ns2. Note that $ns0 is the endpoint for the wg0
+# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
+# details on how this is accomplished.
+set -e
+
+exec 3>&1
+export WG_HIDE_KEYS=never
+netns0="wg-test-$$-0"
+netns1="wg-test-$$-1"
+netns2="wg-test-$$-2"
+pretty() { echo -e "\x1b[32m\x1b[1m[+] ${1:+NS$1: }${2}\x1b[0m" >&3; }
+pp() { pretty "" "$*"; "$@"; }
+maybe_exec() { if [[ $BASHPID -eq $$ ]]; then "$@"; else exec "$@"; fi; }
+n0() { pretty 0 "$*"; maybe_exec ip netns exec $netns0 "$@"; }
+n1() { pretty 1 "$*"; maybe_exec ip netns exec $netns1 "$@"; }
+n2() { pretty 2 "$*"; maybe_exec ip netns exec $netns2 "$@"; }
+ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
+ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
+ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
+sleep() { read -t "$1" -N 1 || true; }
+waitiperf() { pretty "${1//*-}" "wait for iperf:5201"; while [[ $(ss -N "$1" -tlp 'sport = 5201') != *iperf3* ]]; do sleep 0.1; done; }
+waitncatudp() { pretty "${1//*-}" "wait for udp:1111"; while [[ $(ss -N "$1" -ulp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
+waitncattcp() { pretty "${1//*-}" "wait for tcp:1111"; while [[ $(ss -N "$1" -tlp 'sport = 1111') != *ncat* ]]; do sleep 0.1; done; }
+waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
+
+cleanup() {
+ set +e
+ exec 2>/dev/null
+ printf "$orig_message_cost" > /proc/sys/net/core/message_cost
+ ip0 link del dev wg0
+ ip1 link del dev wg0
+ ip2 link del dev wg0
+ local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
+ [[ -n $to_kill ]] && kill $to_kill
+ pp ip netns del $netns1
+ pp ip netns del $netns2
+ pp ip netns del $netns0
+ exit
+}
+
+orig_message_cost="$(< /proc/sys/net/core/message_cost)"
+trap cleanup EXIT
+printf 0 > /proc/sys/net/core/message_cost
+
+ip netns del $netns0 2>/dev/null || true
+ip netns del $netns1 2>/dev/null || true
+ip netns del $netns2 2>/dev/null || true
+pp ip netns add $netns0
+pp ip netns add $netns1
+pp ip netns add $netns2
+ip0 link set up dev lo
+
+ip0 link add dev wg0 type wireguard
+ip0 link set wg0 netns $netns1
+ip0 link add dev wg0 type wireguard
+ip0 link set wg0 netns $netns2
+key1="$(pp wg genkey)"
+key2="$(pp wg genkey)"
+key3="$(pp wg genkey)"
+pub1="$(pp wg pubkey <<<"$key1")"
+pub2="$(pp wg pubkey <<<"$key2")"
+pub3="$(pp wg pubkey <<<"$key3")"
+psk="$(pp wg genpsk)"
+[[ -n $key1 && -n $key2 && -n $psk ]]
+
+configure_peers() {
+ ip1 addr add 192.168.241.1/24 dev wg0
+ ip1 addr add fd00::1/24 dev wg0
+
+ ip2 addr add 192.168.241.2/24 dev wg0
+ ip2 addr add fd00::2/24 dev wg0
+
+ n1 wg set wg0 \
+ private-key <(echo "$key1") \
+ listen-port 1 \
+ peer "$pub2" \
+ preshared-key <(echo "$psk") \
+ allowed-ips 192.168.241.2/32,fd00::2/128
+ n2 wg set wg0 \
+ private-key <(echo "$key2") \
+ listen-port 2 \
+ peer "$pub1" \
+ preshared-key <(echo "$psk") \
+ allowed-ips 192.168.241.1/32,fd00::1/128
+
+ ip1 link set up dev wg0
+ ip2 link set up dev wg0
+}
+configure_peers
+
+tests() {
+ # Ping over IPv4
+ n2 ping -c 10 -f -W 1 192.168.241.1
+ n1 ping -c 10 -f -W 1 192.168.241.2
+
+ # Ping over IPv6
+ n2 ping6 -c 10 -f -W 1 fd00::1
+ n1 ping6 -c 10 -f -W 1 fd00::2
+
+ # TCP over IPv4
+ n2 iperf3 -s -1 -B 192.168.241.2 &
+ waitiperf $netns2
+ n1 iperf3 -Z -t 3 -c 192.168.241.2
+
+ # TCP over IPv6
+ n1 iperf3 -s -1 -B fd00::1 &
+ waitiperf $netns1
+ n2 iperf3 -Z -t 3 -c fd00::1
+
+ # UDP over IPv4
+ n1 iperf3 -s -1 -B 192.168.241.1 &
+ waitiperf $netns1
+ n2 iperf3 -Z -t 3 -b 0 -u -c 192.168.241.1
+
+ # UDP over IPv6
+ n2 iperf3 -s -1 -B fd00::2 &
+ waitiperf $netns2
+ n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
+}
+
+[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
+big_mtu=$(( 34816 - 1500 + $orig_mtu ))
+
+# Test using IPv4 as outer transport
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1
+# Before calling tests, we first make sure that the stats counters and timestamper are working
+n2 ping -c 10 -f -W 1 192.168.241.1
+{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip2 -stats link show dev wg0)
+(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) ))
+{ read _; read _; read _; read rx_bytes _; read _; read tx_bytes _; } < <(ip1 -stats link show dev wg0)
+(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) ))
+read _ rx_bytes tx_bytes < <(n2 wg show wg0 transfer)
+(( rx_bytes == 1372 && (tx_bytes == 1428 || tx_bytes == 1460) ))
+read _ rx_bytes tx_bytes < <(n1 wg show wg0 transfer)
+(( tx_bytes == 1372 && (rx_bytes == 1428 || rx_bytes == 1460) ))
+read _ timestamp < <(n1 wg show wg0 latest-handshakes)
+(( timestamp != 0 ))
+
+tests
+ip1 link set wg0 mtu $big_mtu
+ip2 link set wg0 mtu $big_mtu
+tests
+
+ip1 link set wg0 mtu $orig_mtu
+ip2 link set wg0 mtu $orig_mtu
+
+# Test using IPv6 as outer transport
+n1 wg set wg0 peer "$pub2" endpoint [::1]:2
+n2 wg set wg0 peer "$pub1" endpoint [::1]:1
+tests
+ip1 link set wg0 mtu $big_mtu
+ip2 link set wg0 mtu $big_mtu
+tests
+
+# Test that route MTUs work with the padding
+ip1 link set wg0 mtu 1300
+ip2 link set wg0 mtu 1300
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n2 wg set wg0 peer "$pub1" endpoint 127.0.0.1:1
+n0 iptables -A INPUT -m length --length 1360 -j DROP
+n1 ip route add 192.168.241.2/32 dev wg0 mtu 1299
+n2 ip route add 192.168.241.1/32 dev wg0 mtu 1299
+n2 ping -c 1 -W 1 -s 1269 192.168.241.1
+n2 ip route delete 192.168.241.1/32 dev wg0 mtu 1299
+n1 ip route delete 192.168.241.2/32 dev wg0 mtu 1299
+n0 iptables -F INPUT
+
+ip1 link set wg0 mtu $orig_mtu
+ip2 link set wg0 mtu $orig_mtu
+
+# Test using IPv4 that roaming works
+ip0 -4 addr del 127.0.0.1/8 dev lo
+ip0 -4 addr add 127.212.121.99/8 dev lo
+n1 wg set wg0 listen-port 9999
+n1 wg set wg0 peer "$pub2" endpoint 127.0.0.1:2
+n1 ping6 -W 1 -c 1 fd00::2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 127.212.121.99:9999" ]]
+
+# Test using IPv6 that roaming works
+n1 wg set wg0 listen-port 9998
+n1 wg set wg0 peer "$pub2" endpoint [::1]:2
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [::1]:9998" ]]
+
+# Test that crypto-RP filter works
+n1 wg set wg0 peer "$pub2" allowed-ips 192.168.241.0/24
+exec 4< <(n1 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns1
+n2 ncat -u 192.168.241.1 1111 <<<"X"
+read -r -N 1 -t 1 out <&4 && [[ $out == "X" ]]
+kill $ncat_pid
+more_specific_key="$(pp wg genkey | pp wg pubkey)"
+n1 wg set wg0 peer "$more_specific_key" allowed-ips 192.168.241.2/32
+n2 wg set wg0 listen-port 9997
+exec 4< <(n1 ncat -l -u -p 1111)
+ncat_pid=$!
+waitncatudp $netns1
+n2 ncat -u 192.168.241.1 1111 <<<"X"
+! read -r -N 1 -t 1 out <&4 || false
+kill $ncat_pid
+n1 wg set wg0 peer "$more_specific_key" remove
+[[ $(n1 wg show wg0 endpoints) == "$pub2 [::1]:9997" ]]
+
+# Test that we can change private keys keys and immediately handshake
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips 192.168.241.2/32 endpoint 127.0.0.1:2
+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32
+n1 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 private-key <(echo "$key3")
+n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
+n1 ping -W 1 -c 1 192.168.241.2
+
+ip1 link del wg0
+ip2 link del wg0
+
+# Test using NAT. We now change the topology to this:
+# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐
+# │ $ns1 namespace │ │ $ns0 namespace │ │ $ns2 namespace │
+# │ │ │ │ │ │
+# │ ┌─────┐ ┌─────┐ │ │ ┌──────┐ ┌──────┐ │ │ ┌─────┐ ┌─────┐ │
+# │ │ wg0 │─────────────│vethc│───────────┼────┼────│vethrc│ │vethrs│──────────────┼─────┼──│veths│────────────│ wg0 │ │
+# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├──────┴─────────┐ ├──────┴────────────┐ │ │ ├─────┴──────────┐ ├─────┴──────────┐ │
+# │ │192.168.241.1/24│ │192.168.1.100/24││ │ │192.168.1.1/24 │ │10.0.0.1/24 │ │ │ │10.0.0.100/24 │ │192.168.241.2/24│ │
+# │ │fd00::1/24 │ │ ││ │ │ │ │SNAT:192.168.1.0/24│ │ │ │ │ │fd00::2/24 │ │
+# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └───────────────────┘ │ │ └────────────────┘ └────────────────┘ │
+# └────────────────────────────────────────┘ └────────────────────────────────────────────────┘ └────────────────────────────────────────┘
+
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+
+ip0 link add vethrc type veth peer name vethc
+ip0 link add vethrs type veth peer name veths
+ip0 link set vethc netns $netns1
+ip0 link set veths netns $netns2
+ip0 link set vethrc up
+ip0 link set vethrs up
+ip0 addr add 192.168.1.1/24 dev vethrc
+ip0 addr add 10.0.0.1/24 dev vethrs
+ip1 addr add 192.168.1.100/24 dev vethc
+ip1 link set vethc up
+ip1 route add default via 192.168.1.1
+ip2 addr add 10.0.0.100/24 dev veths
+ip2 link set veths up
+waitiface $netns0 vethrc
+waitiface $netns0 vethrs
+waitiface $netns1 vethc
+waitiface $netns2 veths
+
+n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward'
+n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout'
+n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream'
+n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1
+
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.100:2 persistent-keepalive 1
+n1 ping -W 1 -c 1 192.168.241.2
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+# Demonstrate n2 can still send packets to n1, since persistent-keepalive will prevent connection tracking entry from expiring (to see entries: `n0 conntrack -L`).
+pp sleep 3
+n2 ping -W 1 -c 1 192.168.241.1
+n1 wg set wg0 peer "$pub2" persistent-keepalive 0
+
+# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
+ip1 -6 addr add fc00::9/96 dev vethc
+ip1 -6 route add default via fc00::1
+ip2 -4 addr add 192.168.99.7/32 dev wg0
+ip2 -6 addr add abab::1111/128 dev wg0
+n1 wg set wg0 fwmark 51820 peer "$pub2" allowed-ips 192.168.99.7,abab::1111
+ip1 -6 route add default dev wg0 table 51820
+ip1 -6 rule add not fwmark 51820 table 51820
+ip1 -6 rule add table main suppress_prefixlength 0
+ip1 -4 route add default dev wg0 table 51820
+ip1 -4 rule add not fwmark 51820 table 51820
+ip1 -4 rule add table main suppress_prefixlength 0
+# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
+n1 ping -W 1 -c 100 -f 192.168.99.7
+n1 ping -W 1 -c 100 -f abab::1111
+
+n0 iptables -t nat -F
+ip0 link del vethrc
+ip0 link del vethrs
+ip1 link del wg0
+ip2 link del wg0
+
+# Test that saddr routing is sticky but not too sticky, changing to this topology:
+# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────┐
+# │ $ns1 namespace │ │ $ns2 namespace │
+# │ │ │ │
+# │ ┌─────┐ ┌─────┐ │ │ ┌─────┐ ┌─────┐ │
+# │ │ wg0 │─────────────│veth1│───────────┼────┼──│veth2│────────────│ wg0 │ │
+# │ ├─────┴──────────┐ ├─────┴──────────┐│ │ ├─────┴──────────┐ ├─────┴──────────┐ │
+# │ │192.168.241.1/24│ │10.0.0.1/24 ││ │ │10.0.0.2/24 │ │192.168.241.2/24│ │
+# │ │fd00::1/24 │ │fd00:aa::1/96 ││ │ │fd00:aa::2/96 │ │fd00::2/24 │ │
+# │ └────────────────┘ └────────────────┘│ │ └────────────────┘ └────────────────┘ │
+# └────────────────────────────────────────┘ └────────────────────────────────────────┘
+
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+ip1 link add veth1 type veth peer name veth2
+ip1 link set veth2 netns $netns2
+n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/all/accept_dad'
+n1 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth1/accept_dad'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv6/conf/veth2/accept_dad'
+n1 bash -c 'printf 1 > /proc/sys/net/ipv4/conf/veth1/promote_secondaries'
+
+# First we check that we aren't overly sticky and can fall over to new IPs when old ones are removed
+ip1 addr add 10.0.0.1/24 dev veth1
+ip1 addr add fd00:aa::1/96 dev veth1
+ip2 addr add 10.0.0.2/24 dev veth2
+ip2 addr add fd00:aa::2/96 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
+n1 ping -W 1 -c 1 192.168.241.2
+ip1 addr add 10.0.0.10/24 dev veth1
+ip1 addr del 10.0.0.1/24 dev veth1
+n1 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
+n1 ping -W 1 -c 1 192.168.241.2
+ip1 addr add fd00:aa::10/96 dev veth1
+ip1 addr del fd00:aa::1/96 dev veth1
+n1 ping -W 1 -c 1 192.168.241.2
+
+# Now we show that we can successfully do reply to sender routing
+ip1 link set veth1 down
+ip2 link set veth2 down
+ip1 addr flush dev veth1
+ip2 addr flush dev veth2
+ip1 addr add 10.0.0.1/24 dev veth1
+ip1 addr add 10.0.0.2/24 dev veth1
+ip1 addr add fd00:aa::1/96 dev veth1
+ip1 addr add fd00:aa::2/96 dev veth1
+ip2 addr add 10.0.0.3/24 dev veth2
+ip2 addr add fd00:aa::3/96 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+n2 wg set wg0 peer "$pub1" endpoint 10.0.0.1:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::1]:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint 10.0.0.2:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.2:1" ]]
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::2]:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 [fd00:aa::2]:1" ]]
+
+# What happens if the inbound destination address belongs to a different interface as the default route?
+ip1 link add dummy0 type dummy
+ip1 addr add 10.50.0.1/24 dev dummy0
+ip1 link set dummy0 up
+ip2 route add 10.50.0.0/24 dev veth2
+n2 wg set wg0 peer "$pub1" endpoint 10.50.0.1:1
+n2 ping -W 1 -c 1 192.168.241.1
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.50.0.1:1" ]]
+
+ip1 link del dummy0
+ip1 addr flush dev veth1
+ip2 addr flush dev veth2
+ip1 route flush dev veth1
+ip2 route flush dev veth2
+
+# Now we see what happens if another interface route takes precedence over an ongoing one
+ip1 link add veth3 type veth peer name veth4
+ip1 link set veth4 netns $netns2
+ip1 addr add 10.0.0.1/24 dev veth1
+ip2 addr add 10.0.0.2/24 dev veth2
+ip1 addr add 10.0.0.3/24 dev veth3
+ip1 link set veth1 up
+ip2 link set veth2 up
+ip1 link set veth3 up
+ip2 link set veth4 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+waitiface $netns1 veth3
+waitiface $netns2 veth4
+ip1 route flush dev veth1
+ip1 route flush dev veth3
+ip1 route add 10.0.0.0/24 dev veth1 src 10.0.0.1 metric 2
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.2:2
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.1:1" ]]
+ip1 route add 10.0.0.0/24 dev veth3 src 10.0.0.3 metric 1
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth1/rp_filter'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/veth4/rp_filter'
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
+n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
+n1 ping -W 1 -c 1 192.168.241.2
+[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]]
+
+ip1 link del veth1
+ip1 link del veth3
+ip1 link del wg0
+ip2 link del wg0
+
+# We test that Netlink/IPC is working properly by doing things that usually cause split responses
+ip0 link add dev wg0 type wireguard
+config=( "[Interface]" "PrivateKey=$(wg genkey)" "[Peer]" "PublicKey=$(wg genkey)" )
+for a in {1..255}; do
+ for b in {0..255}; do
+ config+=( "AllowedIPs=$a.$b.0.0/16,$a::$b/128" )
+ done
+done
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+i=0
+for ip in $(n0 wg show wg0 allowed-ips); do
+ ((++i))
+done
+((i == 255*256*2+1))
+ip0 link del wg0
+ip0 link add dev wg0 type wireguard
+config=( "[Interface]" "PrivateKey=$(wg genkey)" )
+for a in {1..40}; do
+ config+=( "[Peer]" "PublicKey=$(wg genkey)" )
+ for b in {1..52}; do
+ config+=( "AllowedIPs=$a.$b.0.0/16" )
+ done
+done
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+i=0
+while read -r line; do
+ j=0
+ for ip in $line; do
+ ((++j))
+ done
+ ((j == 53))
+ ((++i))
+done < <(n0 wg show wg0 allowed-ips)
+((i == 40))
+ip0 link del wg0
+ip0 link add wg0 type wireguard
+config=( )
+for i in {1..29}; do
+ config+=( "[Peer]" "PublicKey=$(wg genkey)" )
+done
+config+=( "[Peer]" "PublicKey=$(wg genkey)" "AllowedIPs=255.2.3.4/32,abcd::255/128" )
+n0 wg setconf wg0 <(printf '%s\n' "${config[@]}")
+n0 wg showconf wg0 > /dev/null
+ip0 link del wg0
+
+allowedips=( )
+for i in {1..197}; do
+ allowedips+=( abcd::$i )
+done
+saved_ifs="$IFS"
+IFS=,
+allowedips="${allowedips[*]}"
+IFS="$saved_ifs"
+ip0 link add wg0 type wireguard
+n0 wg set wg0 peer "$pub1"
+n0 wg set wg0 peer "$pub2" allowed-ips "$allowedips"
+{
+ read -r pub allowedips
+ [[ $pub == "$pub1" && $allowedips == "(none)" ]]
+ read -r pub allowedips
+ [[ $pub == "$pub2" ]]
+ i=0
+ for _ in $allowedips; do
+ ((++i))
+ done
+ ((i == 197))
+} < <(n0 wg show wg0 allowed-ips)
+ip0 link del wg0
+
+! n0 wg show doesnotexist || false
+
+ip0 link add wg0 type wireguard
+n0 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk")
+[[ $(n0 wg show wg0 private-key) == "$key1" ]]
+[[ $(n0 wg show wg0 preshared-keys) == "$pub2 $psk" ]]
+n0 wg set wg0 private-key /dev/null peer "$pub2" preshared-key /dev/null
+[[ $(n0 wg show wg0 private-key) == "(none)" ]]
+[[ $(n0 wg show wg0 preshared-keys) == "$pub2 (none)" ]]
+n0 wg set wg0 peer "$pub2"
+n0 wg set wg0 private-key <(echo "$key2")
+[[ $(n0 wg show wg0 public-key) == "$pub2" ]]
+[[ -z $(n0 wg show wg0 peers) ]]
+n0 wg set wg0 peer "$pub2"
+[[ -z $(n0 wg show wg0 peers) ]]
+n0 wg set wg0 private-key <(echo "$key1")
+n0 wg set wg0 peer "$pub2"
+[[ $(n0 wg show wg0 peers) == "$pub2" ]]
+n0 wg set wg0 private-key <(echo "/${key1:1}")
+[[ $(n0 wg show wg0 private-key) == "+${key1:1}" ]]
+n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0,10.0.0.0/8,100.0.0.0/10,172.16.0.0/12,192.168.0.0/16
+n0 wg set wg0 peer "$pub2" allowed-ips 0.0.0.0/0
+n0 wg set wg0 peer "$pub2" allowed-ips ::/0,1700::/111,5000::/4,e000::/37,9000::/75
+n0 wg set wg0 peer "$pub2" allowed-ips ::/0
+ip0 link del wg0
+
+declare -A objects
+while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
+ [[ $line =~ .*(wg[0-9]+:\ [A-Z][a-z]+\ [0-9]+)\ .*(created|destroyed).* ]] || continue
+ objects["${BASH_REMATCH[1]}"]+="${BASH_REMATCH[2]}"
+done < /dev/kmsg
+alldeleted=1
+for object in "${!objects[@]}"; do
+ if [[ ${objects["$object"]} != *createddestroyed ]]; then
+ echo "Error: $object: merely ${objects["$object"]}" >&3
+ alldeleted=0
+ fi
+done
+[[ $alldeleted -eq 1 ]]
+pretty "" "Objects that were created were also destroyed."
diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
new file mode 100644
index 000000000000..415b542a9d59
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -0,0 +1,2 @@
+build/
+distfiles/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
new file mode 100644
index 000000000000..f10aa3590adc
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -0,0 +1,387 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+
+PWD := $(shell pwd)
+
+CHOST := $(shell gcc -dumpmachine)
+HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
+ifneq (,$(ARCH))
+CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
+ifeq (,$(CBUILD))
+$(error The toolchain for $(ARCH) is not installed)
+endif
+else
+CBUILD := $(CHOST)
+ARCH := $(firstword $(subst -, ,$(CBUILD)))
+endif
+
+# Set these from the environment to override
+KERNEL_PATH ?= $(PWD)/../../../../..
+BUILD_PATH ?= $(PWD)/build/$(ARCH)
+DISTFILES_PATH ?= $(PWD)/distfiles
+NR_CPUS ?= 4
+
+MIRROR := https://download.wireguard.com/qemu-test/distfiles/
+
+default: qemu
+
+# variable name, tarball project name, version, tarball extension, default URI base
+define tar_download =
+$(1)_VERSION := $(3)
+$(1)_NAME := $(2)-$$($(1)_VERSION)
+$(1)_TAR := $(DISTFILES_PATH)/$$($(1)_NAME)$(4)
+$(1)_PATH := $(BUILD_PATH)/$$($(1)_NAME)
+$(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
+endef
+
+define file_download =
+$(DISTFILES_PATH)/$(1):
+ mkdir -p $(DISTFILES_PATH)
+ flock -x [email protected] -c '[ -f $$@ ] && exit 0; wget -O [email protected] $(MIRROR)$(1) || wget -O [email protected] $(2)$(1) || rm -f [email protected]'
+ if echo "$(3) [email protected]" | sha256sum -c -; then mv [email protected] $$@; else rm -f [email protected]; exit 71; fi
+endef
+
+$(eval $(call tar_download,MUSL,musl,1.1.24,.tar.gz,https://www.musl-libc.org/releases/,1370c9a812b2cf2a7d92802510cca0058cc37e66a7bedd70051f0a34015022a3))
+$(eval $(call tar_download,LIBMNL,libmnl,1.0.4,.tar.bz2,https://www.netfilter.org/projects/libmnl/files/,171f89699f286a5854b72b91d06e8f8e3683064c5901fb09d954a9ab6f551f81))
+$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
+$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
+$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
+$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
+$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20191226,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,aa8af0fdc9872d369d8c890a84dbc2a2466b55795dccd5b47721b2d97644b04f))
+
+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
+rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
+WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
+
+export CFLAGS ?= -O3 -pipe
+export LDFLAGS ?=
+export CPPFLAGS := -I$(BUILD_PATH)/include
+
+ifeq ($(HOST_ARCH),$(ARCH))
+CROSS_COMPILE_FLAG := --host=$(CHOST)
+CFLAGS += -march=native
+STRIP := strip
+else
+$(info Cross compilation: building for $(CBUILD) using $(CHOST))
+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
+export CROSS_COMPILE=$(CBUILD)-
+STRIP := $(CBUILD)-strip
+endif
+ifeq ($(ARCH),aarch64)
+QEMU_ARCH := aarch64
+KERNEL_ARCH := arm64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a53 -machine virt
+CFLAGS += -march=armv8-a -mtune=cortex-a53
+endif
+else ifeq ($(ARCH),aarch64_be)
+QEMU_ARCH := aarch64
+KERNEL_ARCH := arm64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a53 -machine virt
+CFLAGS += -march=armv8-a -mtune=cortex-a53
+endif
+else ifeq ($(ARCH),arm)
+QEMU_ARCH := arm
+KERNEL_ARCH := arm
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a15 -machine virt
+CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
+endif
+else ifeq ($(ARCH),armeb)
+QEMU_ARCH := arm
+KERNEL_ARCH := arm
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
+else
+QEMU_MACHINE := -cpu cortex-a15 -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian.
+LDFLAGS += -Wl,--be8
+endif
+else ifeq ($(ARCH),x86_64)
+QEMU_ARCH := x86_64
+KERNEL_ARCH := x86_64
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+else
+QEMU_MACHINE := -cpu Skylake-Server -machine q35
+CFLAGS += -march=skylake-avx512
+endif
+else ifeq ($(ARCH),i686)
+QEMU_ARCH := i386
+KERNEL_ARCH := x86
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
+QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+else
+QEMU_MACHINE := -cpu coreduo -machine q35
+CFLAGS += -march=prescott
+endif
+else ifeq ($(ARCH),mips64)
+QEMU_ARCH := mips64
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EB
+else
+QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
+CFLAGS += -march=mips64r2 -EB
+endif
+else ifeq ($(ARCH),mips64el)
+QEMU_ARCH := mips64el
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EL
+else
+QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
+CFLAGS += -march=mips64r2 -EL
+endif
+else ifeq ($(ARCH),mips)
+QEMU_ARCH := mips
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EB
+else
+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
+CFLAGS += -march=mips32r2 -EB
+endif
+else ifeq ($(ARCH),mipsel)
+QEMU_ARCH := mipsel
+KERNEL_ARCH := mips
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host -machine malta,accel=kvm
+CFLAGS += -EL
+else
+QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
+CFLAGS += -march=mips32r2 -EL
+endif
+else ifeq ($(ARCH),powerpc64le)
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
+else
+QEMU_MACHINE := -machine pseries
+endif
+CFLAGS += -mcpu=powerpc64le -mlong-double-64
+else ifeq ($(ARCH),powerpc)
+QEMU_ARCH := ppc
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
+else
+QEMU_MACHINE := -machine ppce500
+endif
+CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
+else ifeq ($(ARCH),m68k)
+QEMU_ARCH := m68k
+KERNEL_ARCH := m68k
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+else
+QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+endif
+else
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
+endif
+
+REAL_CC := $(CBUILD)-gcc
+MUSL_CC := $(BUILD_PATH)/musl-gcc
+export CC := $(MUSL_CC)
+USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
+
+build: $(KERNEL_BZIMAGE)
+qemu: $(KERNEL_BZIMAGE)
+ rm -f $(BUILD_PATH)/result
+ timeout --foreground 20m qemu-system-$(QEMU_ARCH) \
+ -nodefaults \
+ -nographic \
+ -smp $(NR_CPUS) \
+ $(QEMU_MACHINE) \
+ -m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
+ -serial stdio \
+ -serial file:$(BUILD_PATH)/result \
+ -no-reboot \
+ -monitor none \
+ -kernel $<
+ grep -Fq success $(BUILD_PATH)/result
+
+$(BUILD_PATH)/init-cpio-spec.txt:
+ mkdir -p $(BUILD_PATH)
+ echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
+ echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
+ echo "dir /dev 755 0 0" >> $@
+ echo "nod /dev/console 644 0 0 c 5 1" >> $@
+ echo "dir /bin 755 0 0" >> $@
+ echo "file /bin/iperf3 $(IPERF_PATH)/src/iperf3 755 0 0" >> $@
+ echo "file /bin/wg $(WIREGUARD_TOOLS_PATH)/src/wg 755 0 0" >> $@
+ echo "file /bin/bash $(BASH_PATH)/bash 755 0 0" >> $@
+ echo "file /bin/ip $(IPROUTE2_PATH)/ip/ip 755 0 0" >> $@
+ echo "file /bin/ss $(IPROUTE2_PATH)/misc/ss 755 0 0" >> $@
+ echo "file /bin/ping $(IPUTILS_PATH)/ping 755 0 0" >> $@
+ echo "file /bin/ncat $(NMAP_PATH)/ncat/ncat 755 0 0" >> $@
+ echo "file /bin/xtables-legacy-multi $(IPTABLES_PATH)/iptables/xtables-legacy-multi 755 0 0" >> $@
+ echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
+ echo "slink /bin/ping6 ping 777 0 0" >> $@
+ echo "dir /lib 755 0 0" >> $@
+ echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
+ echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
+
+$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
+ mkdir -p $(KERNEL_BUILD_PATH)
+ cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
+ printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
+ cat arch/$(ARCH).config >> $(KERNEL_BUILD_PATH)/minimal.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) allnoconfig
+ cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
+ $(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
+
+$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
+
+$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+ touch $@
+
+$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
+ $(MAKE) -C $(MUSL_PATH)
+ $(STRIP) -s $@
+
+$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
+ $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
+ touch $@
+
+$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
+ sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
+ printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
+ chmod +x $(BUILD_PATH)/musl-gcc
+
+$(IPERF_PATH)/.installed: $(IPERF_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ sed -i '1s/^/#include <stdint.h>/' $(IPERF_PATH)/src/cjson.h $(IPERF_PATH)/src/timer.h
+ sed -i -r 's/-p?g//g' $(IPERF_PATH)/src/Makefile*
+ touch $@
+
+$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
+ $(MAKE) -C $(IPERF_PATH)
+ $(STRIP) -s $@
+
+$(LIBMNL_PATH)/.installed: $(LIBMNL_TAR)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(LIBMNL_PATH)/src/.libs/libmnl.a: | $(LIBMNL_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(LIBMNL_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared
+ $(MAKE) -C $(LIBMNL_PATH)
+ sed -i 's:prefix=.*:prefix=$(LIBMNL_PATH):' $(LIBMNL_PATH)/libmnl.pc
+
+$(WIREGUARD_TOOLS_PATH)/.installed: $(WIREGUARD_TOOLS_TAR)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" $(MAKE) -C $(WIREGUARD_TOOLS_PATH)/src LIBMNL_CFLAGS="-I$(LIBMNL_PATH)/include" LIBMNL_LDLIBS="-lmnl" wg
+ $(STRIP) -s $@
+
+$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
+ mkdir -p $(BUILD_PATH)
+ $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+ $(STRIP) -s $@
+
+$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(IPUTILS_PATH)/ping: | $(IPUTILS_PATH)/.installed $(USERSPACE_DEPS)
+ sed -i /atexit/d $(IPUTILS_PATH)/ping.c
+ cd $(IPUTILS_PATH) && $(CC) $(CFLAGS) -std=c99 -o $@ ping.c ping_common.c ping6_common.c iputils_common.c -D_GNU_SOURCE -D'IPUTILS_VERSION(f)=f' -lresolv $(LDFLAGS)
+ $(STRIP) -s $@
+
+$(BASH_PATH)/.installed: $(BASH_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
+ $(MAKE) -C $(BASH_PATH)
+ $(STRIP) -s $@
+
+$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=y\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_LIBMNL -I$(LIBMNL_PATH)/include\nLDLIBS+=-lmnl' > $(IPROUTE2_PATH)/config.mk
+ printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+ touch $@
+
+$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ ip/ip
+ $(STRIP) -s $(IPROUTE2_PATH)/ip/ip
+
+$(IPROUTE2_PATH)/misc/ss: | $(IPROUTE2_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ LDFLAGS="$(LDFLAGS) -L$(LIBMNL_PATH)/src/.libs" PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" $(MAKE) -C $(IPROUTE2_PATH) PREFIX=/ misc/ss
+ $(STRIP) -s $(IPROUTE2_PATH)/misc/ss
+
+$(IPTABLES_PATH)/.installed: $(IPTABLES_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ sed -i -e "/nfnetlink=[01]/s:=[01]:=0:" -e "/nfconntrack=[01]/s:=[01]:=0:" $(IPTABLES_PATH)/configure
+ touch $@
+
+$(IPTABLES_PATH)/iptables/xtables-legacy-multi: | $(IPTABLES_PATH)/.installed $(LIBMNL_PATH)/src/.libs/libmnl.a $(USERSPACE_DEPS)
+ cd $(IPTABLES_PATH) && PKG_CONFIG_LIBDIR="$(LIBMNL_PATH)" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --disable-nftables --disable-bpf-compiler --disable-nfsynproxy --disable-libipq --with-kernel=$(BUILD_PATH)/include
+ $(MAKE) -C $(IPTABLES_PATH)
+ $(STRIP) -s $@
+
+$(NMAP_PATH)/.installed: $(NMAP_TAR)
+ mkdir -p $(BUILD_PATH)
+ flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
+ touch $@
+
+$(NMAP_PATH)/ncat/ncat: | $(NMAP_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(NMAP_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --without-ndiff --without-zenmap --without-nping --with-libpcap=included --with-libpcre=included --with-libdnet=included --without-liblua --with-liblinear=included --without-nmap-update --without-openssl --with-pcap=linux --without-libssh
+ $(MAKE) -C $(NMAP_PATH)/libpcap
+ $(MAKE) -C $(NMAP_PATH)/ncat
+ $(STRIP) -s $@
+
+clean:
+ rm -rf $(BUILD_PATH)
+
+distclean: clean
+ rm -rf $(DISTFILES_PATH)
+
+menuconfig: $(KERNEL_BUILD_PATH)/.config
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
+
+.PHONY: qemu build clean distclean menuconfig
+.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
new file mode 100644
index 000000000000..3d063bb247bb
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
new file mode 100644
index 000000000000..dbdc7e406a7b
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
@@ -0,0 +1,6 @@
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
new file mode 100644
index 000000000000..148f49905418
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -0,0 +1,9 @@
+CONFIG_MMU=y
+CONFIG_ARCH_MULTI_V7=y
+CONFIG_ARCH_VIRT=y
+CONFIG_THUMB2_KERNEL=n
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
new file mode 100644
index 000000000000..bd76b07d00a2
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -0,0 +1,10 @@
+CONFIG_MMU=y
+CONFIG_ARCH_MULTI_V7=y
+CONFIG_ARCH_VIRT=y
+CONFIG_THUMB2_KERNEL=n
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
new file mode 100644
index 000000000000..a85025d7206e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
new file mode 100644
index 000000000000..62a15bdb877e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -0,0 +1,9 @@
+CONFIG_MMU=y
+CONFIG_M68KCLASSIC=y
+CONFIG_M68040=y
+CONFIG_MAC=y
+CONFIG_SERIAL_PMACZILOG=y
+CONFIG_SERIAL_PMACZILOG_TTYS=y
+CONFIG_SERIAL_PMACZILOG_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
new file mode 100644
index 000000000000..df71d6b95546
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -0,0 +1,11 @@
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_MIPS_MALTA=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
new file mode 100644
index 000000000000..90c783f725c4
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
@@ -0,0 +1,14 @@
+CONFIG_64BIT=y
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_MIPS32_N32=y
+CONFIG_CPU_HAS_MSA=y
+CONFIG_MIPS_MALTA=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
new file mode 100644
index 000000000000..435b0b43e00c
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
@@ -0,0 +1,15 @@
+CONFIG_64BIT=y
+CONFIG_CPU_MIPS64_R2=y
+CONFIG_MIPS32_N32=y
+CONFIG_CPU_HAS_MSA=y
+CONFIG_MIPS_MALTA=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
new file mode 100644
index 000000000000..62bb50c4a85f
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -0,0 +1,12 @@
+CONFIG_CPU_MIPS32_R2=y
+CONFIG_MIPS_MALTA=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_MIPS_CPS=y
+CONFIG_MIPS_FP_SUPPORT=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
new file mode 100644
index 000000000000..57957093b71b
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -0,0 +1,10 @@
+CONFIG_PPC_QEMU_E500=y
+CONFIG_FSL_SOC_BOOKE=y
+CONFIG_PPC_85xx=y
+CONFIG_PHYS_64BIT=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_MATH_EMULATION=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
new file mode 100644
index 000000000000..990c510a9cfa
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -0,0 +1,12 @@
+CONFIG_PPC64=y
+CONFIG_PPC_PSERIES=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
new file mode 100644
index 000000000000..00a1ef4869d5
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -0,0 +1,5 @@
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
new file mode 100644
index 000000000000..b9c72706fe4d
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -0,0 +1,67 @@
+CONFIG_LOCALVERSION="-debug"
+CONFIG_ENABLE_WARN_DEPRECATED=y
+CONFIG_ENABLE_MUST_CHECK=y
+CONFIG_FRAME_POINTER=y
+CONFIG_STACK_VALIDATION=y
+CONFIG_DEBUG_KERNEL=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_PAGE_EXTENSION=y
+CONFIG_PAGE_POISONING=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_FREE=y
+CONFIG_DEBUG_OBJECTS_TIMERS=y
+CONFIG_DEBUG_OBJECTS_WORK=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
+CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
+CONFIG_SLUB_DEBUG_ON=y
+CONFIG_DEBUG_VM=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
+CONFIG_DEBUG_STACKOVERFLOW=y
+CONFIG_HAVE_ARCH_KMEMCHECK=y
+CONFIG_HAVE_ARCH_KASAN=y
+CONFIG_KASAN=y
+CONFIG_KASAN_INLINE=y
+CONFIG_UBSAN=y
+CONFIG_UBSAN_SANITIZE_ALL=y
+CONFIG_UBSAN_NO_ALIGNMENT=y
+CONFIG_UBSAN_NULL=y
+CONFIG_DEBUG_KMEMLEAK=y
+CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
+CONFIG_DEBUG_STACK_USAGE=y
+CONFIG_DEBUG_SHIRQ=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_SCHED_DEBUG=y
+CONFIG_SCHED_INFO=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_DEBUG_TIMEKEEPING=y
+CONFIG_TIMER_STATS=y
+CONFIG_DEBUG_PREEMPT=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_TRACE_IRQFLAGS=y
+CONFIG_DEBUG_BUGVERBOSE=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_PROVE_RCU=y
+CONFIG_SPARSE_RCU_POINTER=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=21
+CONFIG_RCU_TRACE=y
+CONFIG_RCU_EQS_DEBUG=y
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_DEBUG_SG=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DOUBLEFAULT=y
+CONFIG_X86_DEBUG_FPU=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y
+CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
new file mode 100644
index 000000000000..90bc9813cadc
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/io.h>
+#include <sys/ioctl.h>
+#include <sys/reboot.h>
+#include <sys/utsname.h>
+#include <sys/sendfile.h>
+#include <sys/sysmacros.h>
+#include <linux/random.h>
+#include <linux/version.h>
+
+__attribute__((noreturn)) static void poweroff(void)
+{
+ fflush(stdout);
+ fflush(stderr);
+ reboot(RB_AUTOBOOT);
+ sleep(30);
+ fprintf(stderr, "\x1b[37m\x1b[41m\x1b[1mFailed to power off!!!\x1b[0m\n");
+ exit(1);
+}
+
+static void panic(const char *what)
+{
+ fprintf(stderr, "\n\n\x1b[37m\x1b[41m\x1b[1mSOMETHING WENT HORRIBLY WRONG\x1b[0m\n\n \x1b[31m\x1b[1m%s: %s\x1b[0m\n\n\x1b[37m\x1b[44m\x1b[1mPower off...\x1b[0m\n\n", what, strerror(errno));
+ poweroff();
+}
+
+#define pretty_message(msg) puts("\x1b[32m\x1b[1m" msg "\x1b[0m")
+
+static void print_banner(void)
+{
+ struct utsname utsname;
+ int len;
+
+ if (uname(&utsname) < 0)
+ panic("uname");
+
+ len = strlen(" WireGuard Test Suite on ") + strlen(utsname.sysname) + strlen(utsname.release) + strlen(utsname.machine);
+ printf("\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\x1b[45m\x1b[33m\x1b[1m WireGuard Test Suite on %s %s %s \x1b[0m\n\x1b[45m\x1b[33m\x1b[1m%*.s\x1b[0m\n\n", len, "", utsname.sysname, utsname.release, utsname.machine, len, "");
+}
+
+static void seed_rng(void)
+{
+ int fd;
+ struct {
+ int entropy_count;
+ int buffer_size;
+ unsigned char buffer[256];
+ } entropy = {
+ .entropy_count = sizeof(entropy.buffer) * 8,
+ .buffer_size = sizeof(entropy.buffer),
+ .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!"
+ };
+
+ if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9)))
+ panic("mknod(/dev/urandom)");
+ fd = open("/dev/urandom", O_WRONLY);
+ if (fd < 0)
+ panic("open(urandom)");
+ for (int i = 0; i < 256; ++i) {
+ if (ioctl(fd, RNDADDENTROPY, &entropy) < 0)
+ panic("ioctl(urandom)");
+ }
+ close(fd);
+}
+
+static void mount_filesystems(void)
+{
+ pretty_message("[+] Mounting filesystems...");
+ mkdir("/dev", 0755);
+ mkdir("/proc", 0755);
+ mkdir("/sys", 0755);
+ mkdir("/tmp", 0755);
+ mkdir("/run", 0755);
+ mkdir("/var", 0755);
+ if (mount("none", "/dev", "devtmpfs", 0, NULL))
+ panic("devtmpfs mount");
+ if (mount("none", "/proc", "proc", 0, NULL))
+ panic("procfs mount");
+ if (mount("none", "/sys", "sysfs", 0, NULL))
+ panic("sysfs mount");
+ if (mount("none", "/tmp", "tmpfs", 0, NULL))
+ panic("tmpfs mount");
+ if (mount("none", "/run", "tmpfs", 0, NULL))
+ panic("tmpfs mount");
+ if (mount("none", "/sys/kernel/debug", "debugfs", 0, NULL))
+ ; /* Not a problem if it fails.*/
+ if (symlink("/run", "/var/run"))
+ panic("run symlink");
+ if (symlink("/proc/self/fd", "/dev/fd"))
+ panic("fd symlink");
+}
+
+static void enable_logging(void)
+{
+ int fd;
+ pretty_message("[+] Enabling logging...");
+ fd = open("/proc/sys/kernel/printk", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "9\n", 2) != 2)
+ panic("write(printk)");
+ close(fd);
+ }
+ fd = open("/proc/sys/debug/exception-trace", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "1\n", 2) != 2)
+ panic("write(exception-trace)");
+ close(fd);
+ }
+ fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
+ if (fd >= 0) {
+ if (write(fd, "1\n", 2) != 2)
+ panic("write(panic_on_warn)");
+ close(fd);
+ }
+}
+
+static void kmod_selftests(void)
+{
+ FILE *file;
+ char line[2048], *start, *pass;
+ bool success = true;
+ pretty_message("[+] Module self-tests:");
+ file = fopen("/proc/kmsg", "r");
+ if (!file)
+ panic("fopen(kmsg)");
+ if (fcntl(fileno(file), F_SETFL, O_NONBLOCK) < 0)
+ panic("fcntl(kmsg, nonblock)");
+ while (fgets(line, sizeof(line), file)) {
+ start = strstr(line, "wireguard: ");
+ if (!start)
+ continue;
+ start += 11;
+ *strchrnul(start, '\n') = '\0';
+ if (strstr(start, "www.wireguard.com"))
+ break;
+ pass = strstr(start, ": pass");
+ if (!pass || pass[6] != '\0') {
+ success = false;
+ printf(" \x1b[31m* %s\x1b[0m\n", start);
+ } else
+ printf(" \x1b[32m* %s\x1b[0m\n", start);
+ }
+ fclose(file);
+ if (!success) {
+ puts("\x1b[31m\x1b[1m[-] Tests failed! \u2639\x1b[0m");
+ poweroff();
+ }
+}
+
+static void launch_tests(void)
+{
+ char cmdline[4096], *success_dev;
+ int status, fd;
+ pid_t pid;
+
+ pretty_message("[+] Launching tests...");
+ pid = fork();
+ if (pid == -1)
+ panic("fork");
+ else if (pid == 0) {
+ execl("/init.sh", "init", NULL);
+ panic("exec");
+ }
+ if (waitpid(pid, &status, 0) < 0)
+ panic("waitpid");
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+ pretty_message("[+] Tests successful! :-)");
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (fd < 0)
+ panic("open(/proc/cmdline)");
+ if (read(fd, cmdline, sizeof(cmdline) - 1) <= 0)
+ panic("read(/proc/cmdline)");
+ cmdline[sizeof(cmdline) - 1] = '\0';
+ for (success_dev = strtok(cmdline, " \n"); success_dev; success_dev = strtok(NULL, " \n")) {
+ if (strncmp(success_dev, "wg.success=", 11))
+ continue;
+ memcpy(success_dev + 11 - 5, "/dev/", 5);
+ success_dev += 11 - 5;
+ break;
+ }
+ if (!success_dev || !strlen(success_dev))
+ panic("Unable to find success device");
+
+ fd = open(success_dev, O_WRONLY);
+ if (fd < 0)
+ panic("open(success_dev)");
+ if (write(fd, "success\n", 8) != 8)
+ panic("write(success_dev)");
+ close(fd);
+ } else {
+ const char *why = "unknown cause";
+ int what = -1;
+
+ if (WIFEXITED(status)) {
+ why = "exit code";
+ what = WEXITSTATUS(status);
+ } else if (WIFSIGNALED(status)) {
+ why = "signal";
+ what = WTERMSIG(status);
+ }
+ printf("\x1b[31m\x1b[1m[-] Tests failed with %s %d! \u2639\x1b[0m\n", why, what);
+ }
+}
+
+static void ensure_console(void)
+{
+ for (unsigned int i = 0; i < 1000; ++i) {
+ int fd = open("/dev/console", O_RDWR);
+ if (fd < 0) {
+ usleep(50000);
+ continue;
+ }
+ dup2(fd, 0);
+ dup2(fd, 1);
+ dup2(fd, 2);
+ close(fd);
+ if (write(1, "\0\0\0\0\n", 5) == 5)
+ return;
+ }
+ panic("Unable to open console device");
+}
+
+static void clear_leaks(void)
+{
+ int fd;
+
+ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY);
+ if (fd < 0)
+ return;
+ pretty_message("[+] Starting memory leak detection...");
+ write(fd, "clear\n", 5);
+ close(fd);
+}
+
+static void check_leaks(void)
+{
+ int fd;
+
+ fd = open("/sys/kernel/debug/kmemleak", O_WRONLY);
+ if (fd < 0)
+ return;
+ pretty_message("[+] Scanning for memory leaks...");
+ sleep(2); /* Wait for any grace periods. */
+ write(fd, "scan\n", 5);
+ close(fd);
+
+ fd = open("/sys/kernel/debug/kmemleak", O_RDONLY);
+ if (fd < 0)
+ return;
+ if (sendfile(1, fd, NULL, 0x7ffff000) > 0)
+ panic("Memory leaks encountered");
+ close(fd);
+}
+
+int main(int argc, char *argv[])
+{
+ seed_rng();
+ ensure_console();
+ print_banner();
+ mount_filesystems();
+ kmod_selftests();
+ enable_logging();
+ clear_leaks();
+ launch_tests();
+ check_leaks();
+ poweroff();
+ return 1;
+}
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
new file mode 100644
index 000000000000..af9323a0b6e0
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -0,0 +1,88 @@
+CONFIG_LOCALVERSION=""
+CONFIG_NET=y
+CONFIG_NETDEVICES=y
+CONFIG_NET_CORE=y
+CONFIG_NET_IPIP=y
+CONFIG_DUMMY=y
+CONFIG_VETH=y
+CONFIG_MULTIUSER=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_NS=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IPV6=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_NAT=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XT_NAT=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NF_CONNTRACK_IPV4=y
+CONFIG_NF_NAT_IPV4=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_TTY=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_SCRIPT=y
+CONFIG_VDSO=y
+CONFIG_VIRTUALIZATION=y
+CONFIG_HYPERVISOR_GUEST=y
+CONFIG_PARAVIRT=y
+CONFIG_KVM_GUEST=y
+CONFIG_PARAVIRT_SPINLOCKS=y
+CONFIG_PRINTK=y
+CONFIG_KALLSYMS=y
+CONFIG_BUG=y
+CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
+CONFIG_JUMP_LABEL=y
+CONFIG_EMBEDDED=n
+CONFIG_BASE_FULL=y
+CONFIG_FUTEX=y
+CONFIG_SHMEM=y
+CONFIG_SLUB=y
+CONFIG_SPARSEMEM_VMEMMAP=y
+CONFIG_SMP=y
+CONFIG_SCHED_SMT=y
+CONFIG_SCHED_MC=y
+CONFIG_NUMA=y
+CONFIG_PREEMPT=y
+CONFIG_NO_HZ=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_ARCH_RANDOM=y
+CONFIG_FILE_LOCKING=y
+CONFIG_POSIX_TIMERS=y
+CONFIG_DEVTMPFS=y
+CONFIG_PROC_FS=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
+CONFIG_PRINTK_TIME=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
+CONFIG_KERNEL_GZIP=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_BUG_ON_DATA_CORRUPTION=y
+CONFIG_LOCKUP_DETECTOR=y
+CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_WQ_WATCHDOG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC=y
+CONFIG_PANIC_TIMEOUT=-1
+CONFIG_STACKTRACE=y
+CONFIG_EARLY_PRINTK=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_WIREGUARD=y
+CONFIG_WIREGUARD_DEBUG=y
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore
index dc5f11faf530..7f7a2ccc30c4 100644
--- a/tools/testing/vsock/.gitignore
+++ b/tools/testing/vsock/.gitignore
@@ -1,2 +1,3 @@
*.d
+vsock_test
vsock_diag_test
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
index 5be687b1e16c..f8293c6910c9 100644
--- a/tools/testing/vsock/Makefile
+++ b/tools/testing/vsock/Makefile
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
all: test
-test: vsock_diag_test
-vsock_diag_test: vsock_diag_test.o timeout.o control.o
+test: vsock_test vsock_diag_test
+vsock_test: vsock_test.o timeout.o control.o util.o
+vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o
-CFLAGS += -g -O2 -Werror -Wall -I. -I../../include/uapi -I../../include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
+CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
.PHONY: all test clean
clean:
- ${RM} *.o *.d vsock_diag_test
+ ${RM} *.o *.d vsock_test vsock_diag_test
-include *.d
diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README
index 2cc6d7302db6..4d5045e7d2c3 100644
--- a/tools/testing/vsock/README
+++ b/tools/testing/vsock/README
@@ -5,12 +5,13 @@ Hyper-V.
The following tests are available:
+ * vsock_test - core AF_VSOCK socket functionality
* vsock_diag_test - vsock_diag.ko module for listing open sockets
The following prerequisite steps are not automated and must be performed prior
to running tests:
-1. Build the kernel and these tests.
+1. Build the kernel, make headers_install, and build these tests.
2. Install the kernel and tests on the host.
3. Install the kernel and tests inside the guest.
4. Boot the guest and ensure that the AF_VSOCK transport is enabled.
diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c
index 45f328c6ff23..4874872fc5a3 100644
--- a/tools/testing/vsock/control.c
+++ b/tools/testing/vsock/control.c
@@ -205,11 +205,22 @@ void control_expectln(const char *str)
char *line;
line = control_readln();
- if (strcmp(str, line) != 0) {
+
+ control_cmpln(line, str, true);
+
+ free(line);
+}
+
+bool control_cmpln(char *line, const char *str, bool fail)
+{
+ if (strcmp(str, line) == 0)
+ return true;
+
+ if (fail) {
fprintf(stderr, "expected \"%s\" on control socket, got \"%s\"\n",
str, line);
exit(EXIT_FAILURE);
}
- free(line);
+ return false;
}
diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h
index 54a07efd267c..51814b4f9ac1 100644
--- a/tools/testing/vsock/control.h
+++ b/tools/testing/vsock/control.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef CONTROL_H
#define CONTROL_H
@@ -9,5 +10,6 @@ void control_cleanup(void);
void control_writeln(const char *str);
char *control_readln(void);
void control_expectln(const char *str);
+bool control_cmpln(char *line, const char *str, bool fail);
#endif /* CONTROL_H */
diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h
index 77db9ce9860a..ecb7c840e65a 100644
--- a/tools/testing/vsock/timeout.h
+++ b/tools/testing/vsock/timeout.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#ifndef TIMEOUT_H
#define TIMEOUT_H
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
new file mode 100644
index 000000000000..93cbd6f603f9
--- /dev/null
+++ b/tools/testing/vsock/util.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vsock test utilities
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <[email protected]>
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/epoll.h>
+
+#include "timeout.h"
+#include "control.h"
+#include "util.h"
+
+/* Install signal handlers */
+void init_signals(void)
+{
+ struct sigaction act = {
+ .sa_handler = sigalrm,
+ };
+
+ sigaction(SIGALRM, &act, NULL);
+ signal(SIGPIPE, SIG_IGN);
+}
+
+/* Parse a CID in string representation */
+unsigned int parse_cid(const char *str)
+{
+ char *endptr = NULL;
+ unsigned long n;
+
+ errno = 0;
+ n = strtoul(str, &endptr, 10);
+ if (errno || *endptr != '\0') {
+ fprintf(stderr, "malformed CID \"%s\"\n", str);
+ exit(EXIT_FAILURE);
+ }
+ return n;
+}
+
+/* Wait for the remote to close the connection */
+void vsock_wait_remote_close(int fd)
+{
+ struct epoll_event ev;
+ int epollfd, nfds;
+
+ epollfd = epoll_create1(0);
+ if (epollfd == -1) {
+ perror("epoll_create1");
+ exit(EXIT_FAILURE);
+ }
+
+ ev.events = EPOLLRDHUP | EPOLLHUP;
+ ev.data.fd = fd;
+ if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev) == -1) {
+ perror("epoll_ctl");
+ exit(EXIT_FAILURE);
+ }
+
+ nfds = epoll_wait(epollfd, &ev, 1, TIMEOUT * 1000);
+ if (nfds == -1) {
+ perror("epoll_wait");
+ exit(EXIT_FAILURE);
+ }
+
+ if (nfds == 0) {
+ fprintf(stderr, "epoll_wait timed out\n");
+ exit(EXIT_FAILURE);
+ }
+
+ assert(nfds == 1);
+ assert(ev.events & (EPOLLRDHUP | EPOLLHUP));
+ assert(ev.data.fd == fd);
+
+ close(epollfd);
+}
+
+/* Connect to <cid, port> and return the file descriptor. */
+int vsock_stream_connect(unsigned int cid, unsigned int port)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = port,
+ .svm_cid = cid,
+ },
+ };
+ int ret;
+ int fd;
+
+ control_expectln("LISTENING");
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = connect(fd, &addr.sa, sizeof(addr.svm));
+ timeout_check("connect");
+ } while (ret < 0 && errno == EINTR);
+ timeout_end();
+
+ if (ret < 0) {
+ int old_errno = errno;
+
+ close(fd);
+ fd = -1;
+ errno = old_errno;
+ }
+ return fd;
+}
+
+/* Listen on <cid, port> and return the first incoming connection. The remote
+ * address is stored to clientaddrp. clientaddrp may be NULL.
+ */
+int vsock_stream_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = port,
+ .svm_cid = cid,
+ },
+ };
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } clientaddr;
+ socklen_t clientaddr_len = sizeof(clientaddr.svm);
+ int fd;
+ int client_fd;
+ int old_errno;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("LISTENING");
+
+ timeout_begin(TIMEOUT);
+ do {
+ client_fd = accept(fd, &clientaddr.sa, &clientaddr_len);
+ timeout_check("accept");
+ } while (client_fd < 0 && errno == EINTR);
+ timeout_end();
+
+ old_errno = errno;
+ close(fd);
+ errno = old_errno;
+
+ if (client_fd < 0)
+ return client_fd;
+
+ if (clientaddr_len != sizeof(clientaddr.svm)) {
+ fprintf(stderr, "unexpected addrlen from accept(2), %zu\n",
+ (size_t)clientaddr_len);
+ exit(EXIT_FAILURE);
+ }
+ if (clientaddr.sa.sa_family != AF_VSOCK) {
+ fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n",
+ clientaddr.sa.sa_family);
+ exit(EXIT_FAILURE);
+ }
+
+ if (clientaddrp)
+ *clientaddrp = clientaddr.svm;
+ return client_fd;
+}
+
+/* Transmit one byte and check the return value.
+ *
+ * expected_ret:
+ * <0 Negative errno (for testing errors)
+ * 0 End-of-file
+ * 1 Success
+ */
+void send_byte(int fd, int expected_ret, int flags)
+{
+ const uint8_t byte = 'A';
+ ssize_t nwritten;
+
+ timeout_begin(TIMEOUT);
+ do {
+ nwritten = send(fd, &byte, sizeof(byte), flags);
+ timeout_check("write");
+ } while (nwritten < 0 && errno == EINTR);
+ timeout_end();
+
+ if (expected_ret < 0) {
+ if (nwritten != -1) {
+ fprintf(stderr, "bogus send(2) return value %zd\n",
+ nwritten);
+ exit(EXIT_FAILURE);
+ }
+ if (errno != -expected_ret) {
+ perror("write");
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+
+ if (nwritten < 0) {
+ perror("write");
+ exit(EXIT_FAILURE);
+ }
+ if (nwritten == 0) {
+ if (expected_ret == 0)
+ return;
+
+ fprintf(stderr, "unexpected EOF while sending byte\n");
+ exit(EXIT_FAILURE);
+ }
+ if (nwritten != sizeof(byte)) {
+ fprintf(stderr, "bogus send(2) return value %zd\n", nwritten);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Receive one byte and check the return value.
+ *
+ * expected_ret:
+ * <0 Negative errno (for testing errors)
+ * 0 End-of-file
+ * 1 Success
+ */
+void recv_byte(int fd, int expected_ret, int flags)
+{
+ uint8_t byte;
+ ssize_t nread;
+
+ timeout_begin(TIMEOUT);
+ do {
+ nread = recv(fd, &byte, sizeof(byte), flags);
+ timeout_check("read");
+ } while (nread < 0 && errno == EINTR);
+ timeout_end();
+
+ if (expected_ret < 0) {
+ if (nread != -1) {
+ fprintf(stderr, "bogus recv(2) return value %zd\n",
+ nread);
+ exit(EXIT_FAILURE);
+ }
+ if (errno != -expected_ret) {
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+
+ if (nread < 0) {
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+ if (nread == 0) {
+ if (expected_ret == 0)
+ return;
+
+ fprintf(stderr, "unexpected EOF while receiving byte\n");
+ exit(EXIT_FAILURE);
+ }
+ if (nread != sizeof(byte)) {
+ fprintf(stderr, "bogus recv(2) return value %zd\n", nread);
+ exit(EXIT_FAILURE);
+ }
+ if (byte != 'A') {
+ fprintf(stderr, "unexpected byte read %c\n", byte);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Run test cases. The program terminates if a failure occurs. */
+void run_tests(const struct test_case *test_cases,
+ const struct test_opts *opts)
+{
+ int i;
+
+ for (i = 0; test_cases[i].name; i++) {
+ void (*run)(const struct test_opts *opts);
+ char *line;
+
+ printf("%d - %s...", i, test_cases[i].name);
+ fflush(stdout);
+
+ /* Full barrier before executing the next test. This
+ * ensures that client and server are executing the
+ * same test case. In particular, it means whoever is
+ * faster will not see the peer still executing the
+ * last test. This is important because port numbers
+ * can be used by multiple test cases.
+ */
+ if (test_cases[i].skip)
+ control_writeln("SKIP");
+ else
+ control_writeln("NEXT");
+
+ line = control_readln();
+ if (control_cmpln(line, "SKIP", false) || test_cases[i].skip) {
+
+ printf("skipped\n");
+
+ free(line);
+ continue;
+ }
+
+ control_cmpln(line, "NEXT", true);
+ free(line);
+
+ if (opts->mode == TEST_MODE_CLIENT)
+ run = test_cases[i].run_client;
+ else
+ run = test_cases[i].run_server;
+
+ if (run)
+ run(opts);
+
+ printf("ok\n");
+ }
+}
+
+void list_tests(const struct test_case *test_cases)
+{
+ int i;
+
+ printf("ID\tTest name\n");
+
+ for (i = 0; test_cases[i].name; i++)
+ printf("%d\t%s\n", i, test_cases[i].name);
+
+ exit(EXIT_FAILURE);
+}
+
+void skip_test(struct test_case *test_cases, size_t test_cases_len,
+ const char *test_id_str)
+{
+ unsigned long test_id;
+ char *endptr = NULL;
+
+ errno = 0;
+ test_id = strtoul(test_id_str, &endptr, 10);
+ if (errno || *endptr != '\0') {
+ fprintf(stderr, "malformed test ID \"%s\"\n", test_id_str);
+ exit(EXIT_FAILURE);
+ }
+
+ if (test_id >= test_cases_len) {
+ fprintf(stderr, "test ID (%lu) larger than the max allowed (%lu)\n",
+ test_id, test_cases_len - 1);
+ exit(EXIT_FAILURE);
+ }
+
+ test_cases[test_id].skip = true;
+}
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
new file mode 100644
index 000000000000..e53dd09d26d9
--- /dev/null
+++ b/tools/testing/vsock/util.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef UTIL_H
+#define UTIL_H
+
+#include <sys/socket.h>
+#include <linux/vm_sockets.h>
+
+/* Tests can either run as the client or the server */
+enum test_mode {
+ TEST_MODE_UNSET,
+ TEST_MODE_CLIENT,
+ TEST_MODE_SERVER
+};
+
+/* Test runner options */
+struct test_opts {
+ enum test_mode mode;
+ unsigned int peer_cid;
+};
+
+/* A test case definition. Test functions must print failures to stderr and
+ * terminate with exit(EXIT_FAILURE).
+ */
+struct test_case {
+ const char *name; /* human-readable name */
+
+ /* Called when test mode is TEST_MODE_CLIENT */
+ void (*run_client)(const struct test_opts *opts);
+
+ /* Called when test mode is TEST_MODE_SERVER */
+ void (*run_server)(const struct test_opts *opts);
+
+ bool skip;
+};
+
+void init_signals(void);
+unsigned int parse_cid(const char *str);
+int vsock_stream_connect(unsigned int cid, unsigned int port);
+int vsock_stream_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp);
+void vsock_wait_remote_close(int fd);
+void send_byte(int fd, int expected_ret, int flags);
+void recv_byte(int fd, int expected_ret, int flags);
+void run_tests(const struct test_case *test_cases,
+ const struct test_opts *opts);
+void list_tests(const struct test_case *test_cases);
+void skip_test(struct test_case *test_cases, size_t test_cases_len,
+ const char *test_id_str);
+#endif /* UTIL_H */
diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c
index c481101364a4..cec6f5a738e1 100644
--- a/tools/testing/vsock/vsock_diag_test.c
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -9,32 +9,22 @@
#include <getopt.h>
#include <stdio.h>
-#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
-#include <signal.h>
-#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <linux/list.h>
#include <linux/net.h>
#include <linux/netlink.h>
#include <linux/sock_diag.h>
+#include <linux/vm_sockets_diag.h>
#include <netinet/tcp.h>
-#include "../../../include/uapi/linux/vm_sockets.h"
-#include "../../../include/uapi/linux/vm_sockets_diag.h"
-
#include "timeout.h"
#include "control.h"
-
-enum test_mode {
- TEST_MODE_UNSET,
- TEST_MODE_CLIENT,
- TEST_MODE_SERVER
-};
+#include "util.h"
/* Per-socket status */
struct vsock_stat {
@@ -335,7 +325,7 @@ static void free_sock_stat(struct list_head *sockets)
free(st);
}
-static void test_no_sockets(unsigned int peer_cid)
+static void test_no_sockets(const struct test_opts *opts)
{
LIST_HEAD(sockets);
@@ -346,7 +336,7 @@ static void test_no_sockets(unsigned int peer_cid)
free_sock_stat(&sockets);
}
-static void test_listen_socket_server(unsigned int peer_cid)
+static void test_listen_socket_server(const struct test_opts *opts)
{
union {
struct sockaddr sa;
@@ -384,35 +374,14 @@ static void test_listen_socket_server(unsigned int peer_cid)
free_sock_stat(&sockets);
}
-static void test_connect_client(unsigned int peer_cid)
+static void test_connect_client(const struct test_opts *opts)
{
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } addr = {
- .svm = {
- .svm_family = AF_VSOCK,
- .svm_port = 1234,
- .svm_cid = peer_cid,
- },
- };
int fd;
- int ret;
LIST_HEAD(sockets);
struct vsock_stat *st;
- control_expectln("LISTENING");
-
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
-
- timeout_begin(TIMEOUT);
- do {
- ret = connect(fd, &addr.sa, sizeof(addr.svm));
- timeout_check("connect");
- } while (ret < 0 && errno == EINTR);
- timeout_end();
-
- if (ret < 0) {
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
perror("connect");
exit(EXIT_FAILURE);
}
@@ -430,68 +399,21 @@ static void test_connect_client(unsigned int peer_cid)
free_sock_stat(&sockets);
}
-static void test_connect_server(unsigned int peer_cid)
+static void test_connect_server(const struct test_opts *opts)
{
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } addr = {
- .svm = {
- .svm_family = AF_VSOCK,
- .svm_port = 1234,
- .svm_cid = VMADDR_CID_ANY,
- },
- };
- union {
- struct sockaddr sa;
- struct sockaddr_vm svm;
- } clientaddr;
- socklen_t clientaddr_len = sizeof(clientaddr.svm);
- LIST_HEAD(sockets);
struct vsock_stat *st;
- int fd;
+ LIST_HEAD(sockets);
int client_fd;
- fd = socket(AF_VSOCK, SOCK_STREAM, 0);
-
- if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
- perror("bind");
- exit(EXIT_FAILURE);
- }
-
- if (listen(fd, 1) < 0) {
- perror("listen");
- exit(EXIT_FAILURE);
- }
-
- control_writeln("LISTENING");
-
- timeout_begin(TIMEOUT);
- do {
- client_fd = accept(fd, &clientaddr.sa, &clientaddr_len);
- timeout_check("accept");
- } while (client_fd < 0 && errno == EINTR);
- timeout_end();
-
+ client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
if (client_fd < 0) {
perror("accept");
exit(EXIT_FAILURE);
}
- if (clientaddr.sa.sa_family != AF_VSOCK) {
- fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n",
- clientaddr.sa.sa_family);
- exit(EXIT_FAILURE);
- }
- if (clientaddr.svm.svm_cid != peer_cid) {
- fprintf(stderr, "expected peer CID %u from accept(2), got %u\n",
- peer_cid, clientaddr.svm.svm_cid);
- exit(EXIT_FAILURE);
- }
read_vsock_stat(&sockets);
- check_num_sockets(&sockets, 2);
- find_vsock_stat(&sockets, fd);
+ check_num_sockets(&sockets, 1);
st = find_vsock_stat(&sockets, client_fd);
check_socket_state(st, TCP_ESTABLISHED);
@@ -499,15 +421,10 @@ static void test_connect_server(unsigned int peer_cid)
control_expectln("DONE");
close(client_fd);
- close(fd);
free_sock_stat(&sockets);
}
-static struct {
- const char *name;
- void (*run_client)(unsigned int peer_cid);
- void (*run_server)(unsigned int peer_cid);
-} test_cases[] = {
+static struct test_case test_cases[] = {
{
.name = "No sockets",
.run_server = test_no_sockets,
@@ -524,30 +441,6 @@ static struct {
{},
};
-static void init_signals(void)
-{
- struct sigaction act = {
- .sa_handler = sigalrm,
- };
-
- sigaction(SIGALRM, &act, NULL);
- signal(SIGPIPE, SIG_IGN);
-}
-
-static unsigned int parse_cid(const char *str)
-{
- char *endptr = NULL;
- unsigned long int n;
-
- errno = 0;
- n = strtoul(str, &endptr, 10);
- if (errno || *endptr != '\0') {
- fprintf(stderr, "malformed CID \"%s\"\n", str);
- exit(EXIT_FAILURE);
- }
- return n;
-}
-
static const char optstring[] = "";
static const struct option longopts[] = {
{
@@ -571,6 +464,16 @@ static const struct option longopts[] = {
.val = 'p',
},
{
+ .name = "list",
+ .has_arg = no_argument,
+ .val = 'l',
+ },
+ {
+ .name = "skip",
+ .has_arg = required_argument,
+ .val = 's',
+ },
+ {
.name = "help",
.has_arg = no_argument,
.val = '?',
@@ -580,7 +483,7 @@ static const struct option longopts[] = {
static void usage(void)
{
- fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n"
+ fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n"
"\n"
" Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n"
" Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
@@ -594,7 +497,18 @@ static void usage(void)
"listen address and the client requires an address to\n"
"connect to.\n"
"\n"
- "The CID of the other side must be given with --peer-cid=<cid>.\n");
+ "The CID of the other side must be given with --peer-cid=<cid>.\n"
+ "\n"
+ "Options:\n"
+ " --help This help message\n"
+ " --control-host <host> Server IP address to connect to\n"
+ " --control-port <port> Server port to listen on/connect to\n"
+ " --mode client|server Server or client mode\n"
+ " --peer-cid <cid> CID of the other side\n"
+ " --list List of tests that will be executed\n"
+ " --skip <test_id> Test ID to skip;\n"
+ " use multiple --skip options to skip more tests\n"
+ );
exit(EXIT_FAILURE);
}
@@ -602,9 +516,10 @@ int main(int argc, char **argv)
{
const char *control_host = NULL;
const char *control_port = NULL;
- int mode = TEST_MODE_UNSET;
- unsigned int peer_cid = VMADDR_CID_ANY;
- int i;
+ struct test_opts opts = {
+ .mode = TEST_MODE_UNSET,
+ .peer_cid = VMADDR_CID_ANY,
+ };
init_signals();
@@ -620,20 +535,27 @@ int main(int argc, char **argv)
break;
case 'm':
if (strcmp(optarg, "client") == 0)
- mode = TEST_MODE_CLIENT;
+ opts.mode = TEST_MODE_CLIENT;
else if (strcmp(optarg, "server") == 0)
- mode = TEST_MODE_SERVER;
+ opts.mode = TEST_MODE_SERVER;
else {
fprintf(stderr, "--mode must be \"client\" or \"server\"\n");
return EXIT_FAILURE;
}
break;
case 'p':
- peer_cid = parse_cid(optarg);
+ opts.peer_cid = parse_cid(optarg);
break;
case 'P':
control_port = optarg;
break;
+ case 'l':
+ list_tests(test_cases);
+ break;
+ case 's':
+ skip_test(test_cases, ARRAY_SIZE(test_cases) - 1,
+ optarg);
+ break;
case '?':
default:
usage();
@@ -642,35 +564,21 @@ int main(int argc, char **argv)
if (!control_port)
usage();
- if (mode == TEST_MODE_UNSET)
+ if (opts.mode == TEST_MODE_UNSET)
usage();
- if (peer_cid == VMADDR_CID_ANY)
+ if (opts.peer_cid == VMADDR_CID_ANY)
usage();
if (!control_host) {
- if (mode != TEST_MODE_SERVER)
+ if (opts.mode != TEST_MODE_SERVER)
usage();
control_host = "0.0.0.0";
}
- control_init(control_host, control_port, mode == TEST_MODE_SERVER);
+ control_init(control_host, control_port,
+ opts.mode == TEST_MODE_SERVER);
- for (i = 0; test_cases[i].name; i++) {
- void (*run)(unsigned int peer_cid);
-
- printf("%s...", test_cases[i].name);
- fflush(stdout);
-
- if (mode == TEST_MODE_CLIENT)
- run = test_cases[i].run_client;
- else
- run = test_cases[i].run_server;
-
- if (run)
- run(peer_cid);
-
- printf("ok\n");
- }
+ run_tests(test_cases, &opts);
control_cleanup();
return EXIT_SUCCESS;
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
new file mode 100644
index 000000000000..1d8b93f1af31
--- /dev/null
+++ b/tools/testing/vsock/vsock_test.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vsock_test - vsock.ko test suite
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <[email protected]>
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <linux/kernel.h>
+
+#include "timeout.h"
+#include "control.h"
+#include "util.h"
+
+static void test_stream_connection_reset(const struct test_opts *opts)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = opts->peer_cid,
+ },
+ };
+ int ret;
+ int fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = connect(fd, &addr.sa, sizeof(addr.svm));
+ timeout_check("connect");
+ } while (ret < 0 && errno == EINTR);
+ timeout_end();
+
+ if (ret != -1) {
+ fprintf(stderr, "expected connect(2) failure, got %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+ if (errno != ECONNRESET) {
+ fprintf(stderr, "unexpected connect(2) errno %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_client_close_client(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ send_byte(fd, 1, 0);
+ close(fd);
+}
+
+static void test_stream_client_close_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Wait for the remote to close the connection, before check
+ * -EPIPE error on send.
+ */
+ vsock_wait_remote_close(fd);
+
+ send_byte(fd, -EPIPE, 0);
+ recv_byte(fd, 1, 0);
+ recv_byte(fd, 0, 0);
+ close(fd);
+}
+
+static void test_stream_server_close_client(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Wait for the remote to close the connection, before check
+ * -EPIPE error on send.
+ */
+ vsock_wait_remote_close(fd);
+
+ send_byte(fd, -EPIPE, 0);
+ recv_byte(fd, 1, 0);
+ recv_byte(fd, 0, 0);
+ close(fd);
+}
+
+static void test_stream_server_close_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ send_byte(fd, 1, 0);
+ close(fd);
+}
+
+/* With the standard socket sizes, VMCI is able to support about 100
+ * concurrent stream connections.
+ */
+#define MULTICONN_NFDS 100
+
+static void test_stream_multiconn_client(const struct test_opts *opts)
+{
+ int fds[MULTICONN_NFDS];
+ int i;
+
+ for (i = 0; i < MULTICONN_NFDS; i++) {
+ fds[i] = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fds[i] < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ for (i = 0; i < MULTICONN_NFDS; i++) {
+ if (i % 2)
+ recv_byte(fds[i], 1, 0);
+ else
+ send_byte(fds[i], 1, 0);
+ }
+
+ for (i = 0; i < MULTICONN_NFDS; i++)
+ close(fds[i]);
+}
+
+static void test_stream_multiconn_server(const struct test_opts *opts)
+{
+ int fds[MULTICONN_NFDS];
+ int i;
+
+ for (i = 0; i < MULTICONN_NFDS; i++) {
+ fds[i] = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fds[i] < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ for (i = 0; i < MULTICONN_NFDS; i++) {
+ if (i % 2)
+ send_byte(fds[i], 1, 0);
+ else
+ recv_byte(fds[i], 1, 0);
+ }
+
+ for (i = 0; i < MULTICONN_NFDS; i++)
+ close(fds[i]);
+}
+
+static void test_stream_msg_peek_client(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, 1234);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ send_byte(fd, 1, 0);
+ close(fd);
+}
+
+static void test_stream_msg_peek_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ recv_byte(fd, 1, MSG_PEEK);
+ recv_byte(fd, 1, 0);
+ close(fd);
+}
+
+static struct test_case test_cases[] = {
+ {
+ .name = "SOCK_STREAM connection reset",
+ .run_client = test_stream_connection_reset,
+ },
+ {
+ .name = "SOCK_STREAM client close",
+ .run_client = test_stream_client_close_client,
+ .run_server = test_stream_client_close_server,
+ },
+ {
+ .name = "SOCK_STREAM server close",
+ .run_client = test_stream_server_close_client,
+ .run_server = test_stream_server_close_server,
+ },
+ {
+ .name = "SOCK_STREAM multiple connections",
+ .run_client = test_stream_multiconn_client,
+ .run_server = test_stream_multiconn_server,
+ },
+ {
+ .name = "SOCK_STREAM MSG_PEEK",
+ .run_client = test_stream_msg_peek_client,
+ .run_server = test_stream_msg_peek_server,
+ },
+ {},
+};
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+ {
+ .name = "control-host",
+ .has_arg = required_argument,
+ .val = 'H',
+ },
+ {
+ .name = "control-port",
+ .has_arg = required_argument,
+ .val = 'P',
+ },
+ {
+ .name = "mode",
+ .has_arg = required_argument,
+ .val = 'm',
+ },
+ {
+ .name = "peer-cid",
+ .has_arg = required_argument,
+ .val = 'p',
+ },
+ {
+ .name = "list",
+ .has_arg = no_argument,
+ .val = 'l',
+ },
+ {
+ .name = "skip",
+ .has_arg = required_argument,
+ .val = 's',
+ },
+ {
+ .name = "help",
+ .has_arg = no_argument,
+ .val = '?',
+ },
+ {},
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n"
+ "\n"
+ " Server: vsock_test --control-port=1234 --mode=server --peer-cid=3\n"
+ " Client: vsock_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
+ "\n"
+ "Run vsock.ko tests. Must be launched in both guest\n"
+ "and host. One side must use --mode=client and\n"
+ "the other side must use --mode=server.\n"
+ "\n"
+ "A TCP control socket connection is used to coordinate tests\n"
+ "between the client and the server. The server requires a\n"
+ "listen address and the client requires an address to\n"
+ "connect to.\n"
+ "\n"
+ "The CID of the other side must be given with --peer-cid=<cid>.\n"
+ "\n"
+ "Options:\n"
+ " --help This help message\n"
+ " --control-host <host> Server IP address to connect to\n"
+ " --control-port <port> Server port to listen on/connect to\n"
+ " --mode client|server Server or client mode\n"
+ " --peer-cid <cid> CID of the other side\n"
+ " --list List of tests that will be executed\n"
+ " --skip <test_id> Test ID to skip;\n"
+ " use multiple --skip options to skip more tests\n"
+ );
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+ const char *control_host = NULL;
+ const char *control_port = NULL;
+ struct test_opts opts = {
+ .mode = TEST_MODE_UNSET,
+ .peer_cid = VMADDR_CID_ANY,
+ };
+
+ init_signals();
+
+ for (;;) {
+ int opt = getopt_long(argc, argv, optstring, longopts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'H':
+ control_host = optarg;
+ break;
+ case 'm':
+ if (strcmp(optarg, "client") == 0)
+ opts.mode = TEST_MODE_CLIENT;
+ else if (strcmp(optarg, "server") == 0)
+ opts.mode = TEST_MODE_SERVER;
+ else {
+ fprintf(stderr, "--mode must be \"client\" or \"server\"\n");
+ return EXIT_FAILURE;
+ }
+ break;
+ case 'p':
+ opts.peer_cid = parse_cid(optarg);
+ break;
+ case 'P':
+ control_port = optarg;
+ break;
+ case 'l':
+ list_tests(test_cases);
+ break;
+ case 's':
+ skip_test(test_cases, ARRAY_SIZE(test_cases) - 1,
+ optarg);
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ }
+
+ if (!control_port)
+ usage();
+ if (opts.mode == TEST_MODE_UNSET)
+ usage();
+ if (opts.peer_cid == VMADDR_CID_ANY)
+ usage();
+
+ if (!control_host) {
+ if (opts.mode != TEST_MODE_SERVER)
+ usage();
+ control_host = "0.0.0.0";
+ }
+
+ control_init(control_host, control_port,
+ opts.mode == TEST_MODE_SERVER);
+
+ run_tests(test_cases, &opts);
+
+ control_cleanup();
+ return EXIT_SUCCESS;
+}
diff --git a/usr/gen_initramfs_list.sh b/usr/gen_initramfs_list.sh
index 0aad760fcd8c..2bbac73e6477 100755
--- a/usr/gen_initramfs_list.sh
+++ b/usr/gen_initramfs_list.sh
@@ -128,7 +128,7 @@ parse() {
str="${ftype} ${name} ${location} ${str}"
;;
"nod")
- local dev=`LC_ALL=C ls -l "${location}"`
+ local dev="`LC_ALL=C ls -l "${location}"`"
local maj=`field 5 ${dev}`
local min=`field 6 ${dev}`
maj=${maj%,}
diff --git a/usr/include/Makefile b/usr/include/Makefile
index 4a753a48767b..84598469e6ff 100644
--- a/usr/include/Makefile
+++ b/usr/include/Makefile
@@ -91,7 +91,7 @@ endif
# asm-generic/*.h is used by asm/*.h, and should not be included directly
header-test- += asm-generic/%
-extra-y := $(patsubst $(obj)/%.h,%.hdrtest, $(shell find $(obj) -name '*.h'))
+extra-y := $(patsubst $(obj)/%.h,%.hdrtest, $(shell find $(obj) -name '*.h' 2>/dev/null))
quiet_cmd_hdrtest = HDRTEST $<
cmd_hdrtest = \
diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
index 12e0280291ce..8de4daf25097 100644
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@@ -1352,7 +1352,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
}
}
-static void cpu_init_hyp_mode(void *dummy)
+static void cpu_init_hyp_mode(void)
{
phys_addr_t pgd_ptr;
unsigned long hyp_stack_ptr;
@@ -1386,7 +1386,7 @@ static void cpu_hyp_reinit(void)
if (is_kernel_in_hyp_mode())
kvm_timer_init_vhe();
else
- cpu_init_hyp_mode(NULL);
+ cpu_init_hyp_mode();
kvm_arm_init_debug();
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 38b4c910b6c3..0b32a904a1bb 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -38,6 +38,11 @@ static unsigned long io_map_base;
#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0)
#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1)
+static bool is_iomap(unsigned long flags)
+{
+ return flags & KVM_S2PTE_FLAG_IS_IOMAP;
+}
+
static bool memslot_is_logging(struct kvm_memory_slot *memslot)
{
return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY);
@@ -1698,6 +1703,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_pagesize = vma_kernel_pagesize(vma);
if (logging_active ||
+ (vma->vm_flags & VM_PFNMAP) ||
!fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) {
force_pte = true;
vma_pagesize = PAGE_SIZE;
@@ -1760,6 +1766,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
writable = false;
}
+ if (exec_fault && is_iomap(flags))
+ return -ENOEXEC;
+
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq))
goto out_unlock;
@@ -1781,7 +1790,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
if (writable)
kvm_set_pfn_dirty(pfn);
- if (fault_status != FSC_PERM)
+ if (fault_status != FSC_PERM && !is_iomap(flags))
clean_dcache_guest_page(pfn, vma_pagesize);
if (exec_fault)
@@ -1948,9 +1957,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
if (kvm_is_error_hva(hva) || (write_fault && !writable)) {
if (is_iabt) {
/* Prefetch Abort on I/O address */
- kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
- ret = 1;
- goto out_unlock;
+ ret = -ENOEXEC;
+ goto out;
}
/*
@@ -1992,6 +2000,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
+out:
+ if (ret == -ENOEXEC) {
+ kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu));
+ ret = 1;
+ }
out_unlock:
srcu_read_unlock(&vcpu->kvm->srcu, idx);
return ret;
@@ -2302,15 +2315,6 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
break;
/*
- * Mapping a read-only VMA is only allowed if the
- * memory region is configured as read-only.
- */
- if (writable && !(vma->vm_flags & VM_WRITE)) {
- ret = -EPERM;
- break;
- }
-
- /*
* Take the intersection of this VMA with the memory region
*/
vm_start = max(hva, vma->vm_start);
diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c
index b3c5de48064c..a963b9d766b7 100644
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@@ -70,7 +70,7 @@ void kvm_vgic_early_init(struct kvm *kvm)
*/
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
- int i, vcpu_lock_idx = -1, ret;
+ int i, ret;
struct kvm_vcpu *vcpu;
if (irqchip_in_kernel(kvm))
@@ -86,17 +86,9 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
!kvm_vgic_global_state.can_emulate_gicv2)
return -ENODEV;
- /*
- * Any time a vcpu is run, vcpu_load is called which tries to grab the
- * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure
- * that no other VCPUs are run while we create the vgic.
- */
ret = -EBUSY;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!mutex_trylock(&vcpu->mutex))
- goto out_unlock;
- vcpu_lock_idx = i;
- }
+ if (!lock_all_vcpus(kvm))
+ return ret;
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.has_run_once)
@@ -125,10 +117,7 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions);
out_unlock:
- for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) {
- vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx);
- mutex_unlock(&vcpu->mutex);
- }
+ unlock_all_vcpus(kvm);
return ret;
}
@@ -177,6 +166,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis)
break;
default:
kfree(dist->spis);
+ dist->spis = NULL;
return -EINVAL;
}
}